From b03055099affdc2579e2b4837bfaab8cff62c125 Mon Sep 17 00:00:00 2001 From: delangel Date: Fri, 11 Mar 2011 17:37:41 +0000 Subject: [PATCH] a) Changed the way we classify and log indel events (e.g. in IndelClasses table inside IndelStatistics VE module). Made names clearer, and split logging of event length with number of repetitions of event. b) Add an experimental annotation to log indel type string inside the INFO field, just for debugging/temp analysis purposes (will consider making it standard if it proves useful). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5424 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/annotator/IndelType.java | 48 +++++++++++++++++ .../sting/utils/IndelUtils.java | 52 +++++++++++++------ 2 files changed, 84 insertions(+), 16 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java new file mode 100755 index 000000000..b44b561c7 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java @@ -0,0 +1,48 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.vcf.VCFHeaderLineType; +import org.broad.tribble.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.IndelUtils; + +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: delangel + * Date: Mar 11, 2011 + * Time: 11:47:33 AM + * To change this template use File | Settings | File Templates. + */ +public class IndelType implements InfoFieldAnnotation, ExperimentalAnnotation { + + public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + + int run; + if ( vc.isIndel() && vc.isBiallelic() ) { + String type=""; + ArrayList inds = IndelUtils.findEventClassificationIndex(vc, ref); + for (int k : inds) { + type = type+ IndelUtils.getIndelClassificationName(k)+"."; + } + Map map = new HashMap(); + map.put(getKeyNames().get(0), String.format("%s", type)); + return map; + + } else { + return null; + } + + } + + public List getKeyNames() { return Arrays.asList("IndelType"); } + + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("IndelType", 1, VCFHeaderLineType.String, "Indel type description")); } + +} diff --git a/java/src/org/broadinstitute/sting/utils/IndelUtils.java b/java/src/org/broadinstitute/sting/utils/IndelUtils.java index e5b7bdc41..dda8c2480 100755 --- a/java/src/org/broadinstitute/sting/utils/IndelUtils.java +++ b/java/src/org/broadinstitute/sting/utils/IndelUtils.java @@ -20,7 +20,7 @@ public class IndelUtils { static { - COLUMN_KEYS= new String[41]; + COLUMN_KEYS= new String[51]; COLUMN_KEYS[0] = "Novel_A"; COLUMN_KEYS[1] = "Novel_C"; COLUMN_KEYS[2] = "Novel_G"; @@ -51,17 +51,27 @@ public class IndelUtils { COLUMN_KEYS[27] = "RepeatExpansion_TA"; COLUMN_KEYS[28] = "RepeatExpansion_TC"; COLUMN_KEYS[29] = "RepeatExpansion_TG"; - COLUMN_KEYS[30] = "RepeatExpansion_1"; - COLUMN_KEYS[31] = "RepeatExpansion_2"; - COLUMN_KEYS[32] = "RepeatExpansion_3"; - COLUMN_KEYS[33] = "RepeatExpansion_4"; - COLUMN_KEYS[34] = "RepeatExpansion_5"; - COLUMN_KEYS[35] = "RepeatExpansion_6"; - COLUMN_KEYS[36] = "RepeatExpansion_7"; - COLUMN_KEYS[37] = "RepeatExpansion_8"; - COLUMN_KEYS[38] = "RepeatExpansion_9"; - COLUMN_KEYS[39] = "RepeatExpansion_10orMore"; - COLUMN_KEYS[40] = "Other"; + COLUMN_KEYS[30] = "EventLength_1"; + COLUMN_KEYS[31] = "EventLength_2"; + COLUMN_KEYS[32] = "EventLength_3"; + COLUMN_KEYS[33] = "EventLength_4"; + COLUMN_KEYS[34] = "EventLength_5"; + COLUMN_KEYS[35] = "EventLength_6"; + COLUMN_KEYS[36] = "EventLength_7"; + COLUMN_KEYS[37] = "EventLength_8"; + COLUMN_KEYS[38] = "EventLength_9"; + COLUMN_KEYS[39] = "EventLength_10orMore"; + COLUMN_KEYS[40] = "NumRepetitions_1"; + COLUMN_KEYS[41] = "NumRepetitions_2"; + COLUMN_KEYS[42] = "NumRepetitions_3"; + COLUMN_KEYS[43] = "NumRepetitions_4"; + COLUMN_KEYS[44] = "NumRepetitions_5"; + COLUMN_KEYS[45] = "NumRepetitions_6"; + COLUMN_KEYS[46] = "NumRepetitions_7"; + COLUMN_KEYS[47] = "NumRepetitions_8"; + COLUMN_KEYS[48] = "NumRepetitions_9"; + COLUMN_KEYS[49] = "NumRepetitions_10orMore"; + COLUMN_KEYS[50] = "Other"; } @@ -75,7 +85,9 @@ public class IndelUtils { private static final int STOP_IND_FOR_REPEAT_EXPANSION_2 = 29; private static final int START_IND_FOR_REPEAT_EXPANSION_COUNTS = 30; private static final int STOP_IND_FOR_REPEAT_EXPANSION_COUNTS = 39; - private static final int IND_FOR_OTHER_EVENT = 40; + private static final int START_IND_FOR_NUM_REPETITION_COUNTS = 40; + private static final int STOP_IND_FOR_NUM_REPETITION_COUNTS = 49; + private static final int IND_FOR_OTHER_EVENT = 50; private static final int START_IND_NOVEL_PER_BASE = 0; private static final int STOP_IND_NOVEL_PER_BASE = 3; @@ -182,11 +194,18 @@ public class IndelUtils { } } else { - int ind = START_IND_FOR_REPEAT_EXPANSION_COUNTS + (numRepetitions-1); - if (ind > STOP_IND_FOR_REPEAT_EXPANSION_COUNTS) - ind = STOP_IND_FOR_REPEAT_EXPANSION_COUNTS; + // log number of repetition counts + int ind = START_IND_FOR_NUM_REPETITION_COUNTS + (numRepetitions-1); + if (ind > STOP_IND_FOR_NUM_REPETITION_COUNTS) + ind = STOP_IND_FOR_NUM_REPETITION_COUNTS; inds.add(ind); + ind = START_IND_FOR_REPEAT_EXPANSION_COUNTS + (eventLength - 1); + if (ind > STOP_IND_FOR_REPEAT_EXPANSION_COUNTS) + ind = STOP_IND_FOR_REPEAT_EXPANSION_COUNTS; + inds.add(ind); + + // log event length if (eventLength<=2) { // for single or dinucleotide indels, we further log the base in which they occurred String keyStr = "RepeatExpansion_" + indelAlleleString; @@ -199,6 +218,7 @@ public class IndelUtils { inds.add(k); } + } return inds;