diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java index 350020111..0b2187dcd 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java @@ -63,6 +63,53 @@ public class GenomicAnnotation implements InfoFieldAnnotation { /** Replacement for each character in ILLEGAL_INFO_FIELD_VALUES */ public static final char[] ILLEGAL_INFO_FIELD_VALUE_SUBSTITUTES = { '_', '-', '!' }; + + private void modifyAnnotationsForIndels(VariantContext vc, String featureName, Map annotationsForRecord) { + String inCodingRegionKey = featureName + ".inCodingRegion"; + String referenceCodonKey = featureName + ".referenceCodon"; + String variantCodonKey = featureName + ".variantCodon"; + String codingCoordStrKey = featureName + ".codingCoordStr"; + String proteinCoordStrKey = featureName + ".proteinCoordStr"; + String haplotypeReferenceKey = featureName + "." + HAPLOTYPE_REFERENCE_COLUMN; + String haplotypeAlternateKey = featureName + "." + HAPLOTYPE_ALTERNATE_COLUMN; + String functionalClassKey = featureName + ".functionalClass"; + String startKey = featureName + "." + START_COLUMN; + String endKey = featureName + "." + END_COLUMN; + String referenceAAKey = featureName + ".referenceAA"; + String variantAAKey = featureName + ".variantAA"; + String changesAAKey = featureName + ".changesAA"; + + annotationsForRecord.put(variantCodonKey, "unknown"); + annotationsForRecord.put(codingCoordStrKey, "unknown"); + annotationsForRecord.put(proteinCoordStrKey, "unknown"); + annotationsForRecord.put(referenceAAKey, "unknown"); + annotationsForRecord.put(variantAAKey, "unknown"); + + String refAllele = vc.getReference().getDisplayString(); + if (refAllele.length() == 0) { refAllele = "-"; } + + String altAllele = vc.getAlternateAllele(0).toString(); + if (altAllele.length() == 0) { altAllele = "-"; } + + annotationsForRecord.put(haplotypeReferenceKey, refAllele); + annotationsForRecord.put(haplotypeAlternateKey, altAllele); + annotationsForRecord.put(startKey, String.format("%d", vc.getStart())); + annotationsForRecord.put(endKey, String.format("%d", vc.getEnd())); + + boolean isCodingRegion = annotationsForRecord.containsKey(inCodingRegionKey) && annotationsForRecord.get(inCodingRegionKey).equalsIgnoreCase("true") ? true : false; + boolean isFrameshift = (vc.getIndelLengths().get(0) % 3 == 0) ? false : true; + + String functionalClass; + if (isCodingRegion) { + functionalClass = isFrameshift ? "frameshift" : "inframe"; + annotationsForRecord.put(changesAAKey, "true"); + } else { + functionalClass = "noncoding"; + } + + annotationsForRecord.put(functionalClassKey, functionalClass); + } + /** * For each -B input file, for each record which overlaps the current locus, generates a * set of annotations of the form: @@ -170,6 +217,10 @@ public class GenomicAnnotation implements InfoFieldAnnotation { } } + if (vc.isIndel()) { + modifyAnnotationsForIndels(vc, name, annotationsForRecord); + } + //filters passed, so add this record. List> listOfMatchingRecords = (List>) annotations.get( name ); if(listOfMatchingRecords == null) { diff --git a/java/test/org/broadinstitute/sting/WalkerTest.java b/java/test/org/broadinstitute/sting/WalkerTest.java index 84ce40029..5d4346427 100755 --- a/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/java/test/org/broadinstitute/sting/WalkerTest.java @@ -71,6 +71,20 @@ public class WalkerTest extends BaseTest { return md5s; } + public String cmdLineBuilder(String ... arguments) { + String cmdline = ""; + + for ( int argIndex = 0; argIndex < arguments.length; argIndex++ ) { + cmdline += arguments[argIndex]; + + if (argIndex < arguments.length - 1) { + cmdline += " "; + } + } + + return cmdline; + } + public class WalkerTestSpec { String args = ""; int nOutputFiles = -1; diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java index 369624c6b..223307dc4 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java @@ -6,8 +6,10 @@ import java.util.Arrays; import org.broadinstitute.sting.WalkerTest; import org.testng.annotations.Test; -public class - GenomicAnnotatorIntegrationTest extends WalkerTest { +public class GenomicAnnotatorIntegrationTest extends WalkerTest { + String testFileWithIndels = validationDataLocation + "/GenomicAnnotatorValidation/1KGBroadWEx.cleaned.indels.vcf"; + String testFileWithSNPsAndIndels = validationDataLocation + "/GenomicAnnotatorValidation/1KGBroadWEx.variants.vcf"; + @Test public void testGenomicAnnotatorOnDbSNP() { @@ -42,4 +44,38 @@ public class executeTest("test with dbSNP and -s arg", specWithSArg); } + + @Test + public void testGenomicAnnotatorOnIndels() { + WalkerTestSpec testOnIndels = new WalkerTestSpec( + cmdLineBuilder( + "-T GenomicAnnotator", + "-R " + b37KGReference, + "-L 22:10000000-20000000", + "-B:refseq,AnnotatorInputTable " + b37Refseq, + "-B:variant,VCF " + testFileWithIndels, + "-o %s" + ), + 1, + Arrays.asList("1f8189433e87cc0b986cddb6a9a74585") + ); + executeTest("testGenomicAnnotatorOnIndels", testOnIndels); + } + + @Test + public void testGenomicAnnotatorOnSNPsAndIndels() { + WalkerTestSpec testOnSNPsAndIndels = new WalkerTestSpec( + cmdLineBuilder( + "-T GenomicAnnotator", + "-R " + b37KGReference, + "-L 22:10000000-20000000", + "-B:refseq,AnnotatorInputTable " + b37Refseq, + "-B:variant,VCF " + testFileWithSNPsAndIndels, + "-o %s" + ), + 1, + Arrays.asList("e9e93fb1d2700e000bd0e9493524dc4c") + ); + executeTest("testGenomicAnnotatorOnSNPsAndIndels", testOnSNPsAndIndels); + } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 6fd8bddc6..fbbfa297a 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -27,6 +27,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { private static String[] testsEnumerations = {root, rootGZ}; + /* private String cmdLineBuilder(String ... arguments) { String cmdline = ""; @@ -40,6 +41,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { return cmdline; } + */ @Test public void testFundamentalsCountVariantsSNPsAndIndels() {