Very basic functionality for annotating indels (specifies whether the indel is frameshift, inframe, or non-coding). Does not attempt to recalculate the variant codon, variant amino acid, or whether the site falls within a splice region. Added a convenience method to WalkerTest for building command-line arguments with the proper spacing (so that I stop getting annoyed when I've gotten it wrong and the test system yells at me.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5235 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2011-02-13 17:58:20 +00:00
parent 8d6db5d188
commit d3660aa00e
4 changed files with 105 additions and 2 deletions

View File

@ -63,6 +63,53 @@ public class GenomicAnnotation implements InfoFieldAnnotation {
/** Replacement for each character in ILLEGAL_INFO_FIELD_VALUES */
public static final char[] ILLEGAL_INFO_FIELD_VALUE_SUBSTITUTES = { '_', '-', '!' };
private void modifyAnnotationsForIndels(VariantContext vc, String featureName, Map<String, String> annotationsForRecord) {
String inCodingRegionKey = featureName + ".inCodingRegion";
String referenceCodonKey = featureName + ".referenceCodon";
String variantCodonKey = featureName + ".variantCodon";
String codingCoordStrKey = featureName + ".codingCoordStr";
String proteinCoordStrKey = featureName + ".proteinCoordStr";
String haplotypeReferenceKey = featureName + "." + HAPLOTYPE_REFERENCE_COLUMN;
String haplotypeAlternateKey = featureName + "." + HAPLOTYPE_ALTERNATE_COLUMN;
String functionalClassKey = featureName + ".functionalClass";
String startKey = featureName + "." + START_COLUMN;
String endKey = featureName + "." + END_COLUMN;
String referenceAAKey = featureName + ".referenceAA";
String variantAAKey = featureName + ".variantAA";
String changesAAKey = featureName + ".changesAA";
annotationsForRecord.put(variantCodonKey, "unknown");
annotationsForRecord.put(codingCoordStrKey, "unknown");
annotationsForRecord.put(proteinCoordStrKey, "unknown");
annotationsForRecord.put(referenceAAKey, "unknown");
annotationsForRecord.put(variantAAKey, "unknown");
String refAllele = vc.getReference().getDisplayString();
if (refAllele.length() == 0) { refAllele = "-"; }
String altAllele = vc.getAlternateAllele(0).toString();
if (altAllele.length() == 0) { altAllele = "-"; }
annotationsForRecord.put(haplotypeReferenceKey, refAllele);
annotationsForRecord.put(haplotypeAlternateKey, altAllele);
annotationsForRecord.put(startKey, String.format("%d", vc.getStart()));
annotationsForRecord.put(endKey, String.format("%d", vc.getEnd()));
boolean isCodingRegion = annotationsForRecord.containsKey(inCodingRegionKey) && annotationsForRecord.get(inCodingRegionKey).equalsIgnoreCase("true") ? true : false;
boolean isFrameshift = (vc.getIndelLengths().get(0) % 3 == 0) ? false : true;
String functionalClass;
if (isCodingRegion) {
functionalClass = isFrameshift ? "frameshift" : "inframe";
annotationsForRecord.put(changesAAKey, "true");
} else {
functionalClass = "noncoding";
}
annotationsForRecord.put(functionalClassKey, functionalClass);
}
/**
* For each -B input file, for each record which overlaps the current locus, generates a
* set of annotations of the form:
@ -170,6 +217,10 @@ public class GenomicAnnotation implements InfoFieldAnnotation {
}
}
if (vc.isIndel()) {
modifyAnnotationsForIndels(vc, name, annotationsForRecord);
}
//filters passed, so add this record.
List<Map<String, String>> listOfMatchingRecords = (List<Map<String, String>>) annotations.get( name );
if(listOfMatchingRecords == null) {

View File

@ -71,6 +71,20 @@ public class WalkerTest extends BaseTest {
return md5s;
}
public String cmdLineBuilder(String ... arguments) {
String cmdline = "";
for ( int argIndex = 0; argIndex < arguments.length; argIndex++ ) {
cmdline += arguments[argIndex];
if (argIndex < arguments.length - 1) {
cmdline += " ";
}
}
return cmdline;
}
public class WalkerTestSpec {
String args = "";
int nOutputFiles = -1;

View File

@ -6,8 +6,10 @@ import java.util.Arrays;
import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
public class
GenomicAnnotatorIntegrationTest extends WalkerTest {
public class GenomicAnnotatorIntegrationTest extends WalkerTest {
String testFileWithIndels = validationDataLocation + "/GenomicAnnotatorValidation/1KGBroadWEx.cleaned.indels.vcf";
String testFileWithSNPsAndIndels = validationDataLocation + "/GenomicAnnotatorValidation/1KGBroadWEx.variants.vcf";
@Test
public void testGenomicAnnotatorOnDbSNP() {
@ -42,4 +44,38 @@ public class
executeTest("test with dbSNP and -s arg", specWithSArg);
}
@Test
public void testGenomicAnnotatorOnIndels() {
WalkerTestSpec testOnIndels = new WalkerTestSpec(
cmdLineBuilder(
"-T GenomicAnnotator",
"-R " + b37KGReference,
"-L 22:10000000-20000000",
"-B:refseq,AnnotatorInputTable " + b37Refseq,
"-B:variant,VCF " + testFileWithIndels,
"-o %s"
),
1,
Arrays.asList("1f8189433e87cc0b986cddb6a9a74585")
);
executeTest("testGenomicAnnotatorOnIndels", testOnIndels);
}
@Test
public void testGenomicAnnotatorOnSNPsAndIndels() {
WalkerTestSpec testOnSNPsAndIndels = new WalkerTestSpec(
cmdLineBuilder(
"-T GenomicAnnotator",
"-R " + b37KGReference,
"-L 22:10000000-20000000",
"-B:refseq,AnnotatorInputTable " + b37Refseq,
"-B:variant,VCF " + testFileWithSNPsAndIndels,
"-o %s"
),
1,
Arrays.asList("e9e93fb1d2700e000bd0e9493524dc4c")
);
executeTest("testGenomicAnnotatorOnSNPsAndIndels", testOnSNPsAndIndels);
}
}

View File

@ -27,6 +27,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
private static String[] testsEnumerations = {root, rootGZ};
/*
private String cmdLineBuilder(String ... arguments) {
String cmdline = "";
@ -40,6 +41,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
return cmdline;
}
*/
@Test
public void testFundamentalsCountVariantsSNPsAndIndels() {