Very basic functionality for annotating indels (specifies whether the indel is frameshift, inframe, or non-coding). Does not attempt to recalculate the variant codon, variant amino acid, or whether the site falls within a splice region. Added a convenience method to WalkerTest for building command-line arguments with the proper spacing (so that I stop getting annoyed when I've gotten it wrong and the test system yells at me.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5235 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8d6db5d188
commit
d3660aa00e
|
|
@ -63,6 +63,53 @@ public class GenomicAnnotation implements InfoFieldAnnotation {
|
|||
/** Replacement for each character in ILLEGAL_INFO_FIELD_VALUES */
|
||||
public static final char[] ILLEGAL_INFO_FIELD_VALUE_SUBSTITUTES = { '_', '-', '!' };
|
||||
|
||||
|
||||
private void modifyAnnotationsForIndels(VariantContext vc, String featureName, Map<String, String> annotationsForRecord) {
|
||||
String inCodingRegionKey = featureName + ".inCodingRegion";
|
||||
String referenceCodonKey = featureName + ".referenceCodon";
|
||||
String variantCodonKey = featureName + ".variantCodon";
|
||||
String codingCoordStrKey = featureName + ".codingCoordStr";
|
||||
String proteinCoordStrKey = featureName + ".proteinCoordStr";
|
||||
String haplotypeReferenceKey = featureName + "." + HAPLOTYPE_REFERENCE_COLUMN;
|
||||
String haplotypeAlternateKey = featureName + "." + HAPLOTYPE_ALTERNATE_COLUMN;
|
||||
String functionalClassKey = featureName + ".functionalClass";
|
||||
String startKey = featureName + "." + START_COLUMN;
|
||||
String endKey = featureName + "." + END_COLUMN;
|
||||
String referenceAAKey = featureName + ".referenceAA";
|
||||
String variantAAKey = featureName + ".variantAA";
|
||||
String changesAAKey = featureName + ".changesAA";
|
||||
|
||||
annotationsForRecord.put(variantCodonKey, "unknown");
|
||||
annotationsForRecord.put(codingCoordStrKey, "unknown");
|
||||
annotationsForRecord.put(proteinCoordStrKey, "unknown");
|
||||
annotationsForRecord.put(referenceAAKey, "unknown");
|
||||
annotationsForRecord.put(variantAAKey, "unknown");
|
||||
|
||||
String refAllele = vc.getReference().getDisplayString();
|
||||
if (refAllele.length() == 0) { refAllele = "-"; }
|
||||
|
||||
String altAllele = vc.getAlternateAllele(0).toString();
|
||||
if (altAllele.length() == 0) { altAllele = "-"; }
|
||||
|
||||
annotationsForRecord.put(haplotypeReferenceKey, refAllele);
|
||||
annotationsForRecord.put(haplotypeAlternateKey, altAllele);
|
||||
annotationsForRecord.put(startKey, String.format("%d", vc.getStart()));
|
||||
annotationsForRecord.put(endKey, String.format("%d", vc.getEnd()));
|
||||
|
||||
boolean isCodingRegion = annotationsForRecord.containsKey(inCodingRegionKey) && annotationsForRecord.get(inCodingRegionKey).equalsIgnoreCase("true") ? true : false;
|
||||
boolean isFrameshift = (vc.getIndelLengths().get(0) % 3 == 0) ? false : true;
|
||||
|
||||
String functionalClass;
|
||||
if (isCodingRegion) {
|
||||
functionalClass = isFrameshift ? "frameshift" : "inframe";
|
||||
annotationsForRecord.put(changesAAKey, "true");
|
||||
} else {
|
||||
functionalClass = "noncoding";
|
||||
}
|
||||
|
||||
annotationsForRecord.put(functionalClassKey, functionalClass);
|
||||
}
|
||||
|
||||
/**
|
||||
* For each -B input file, for each record which overlaps the current locus, generates a
|
||||
* set of annotations of the form:
|
||||
|
|
@ -170,6 +217,10 @@ public class GenomicAnnotation implements InfoFieldAnnotation {
|
|||
}
|
||||
}
|
||||
|
||||
if (vc.isIndel()) {
|
||||
modifyAnnotationsForIndels(vc, name, annotationsForRecord);
|
||||
}
|
||||
|
||||
//filters passed, so add this record.
|
||||
List<Map<String, String>> listOfMatchingRecords = (List<Map<String, String>>) annotations.get( name );
|
||||
if(listOfMatchingRecords == null) {
|
||||
|
|
|
|||
|
|
@ -71,6 +71,20 @@ public class WalkerTest extends BaseTest {
|
|||
return md5s;
|
||||
}
|
||||
|
||||
public String cmdLineBuilder(String ... arguments) {
|
||||
String cmdline = "";
|
||||
|
||||
for ( int argIndex = 0; argIndex < arguments.length; argIndex++ ) {
|
||||
cmdline += arguments[argIndex];
|
||||
|
||||
if (argIndex < arguments.length - 1) {
|
||||
cmdline += " ";
|
||||
}
|
||||
}
|
||||
|
||||
return cmdline;
|
||||
}
|
||||
|
||||
public class WalkerTestSpec {
|
||||
String args = "";
|
||||
int nOutputFiles = -1;
|
||||
|
|
|
|||
|
|
@ -6,8 +6,10 @@ import java.util.Arrays;
|
|||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class
|
||||
GenomicAnnotatorIntegrationTest extends WalkerTest {
|
||||
public class GenomicAnnotatorIntegrationTest extends WalkerTest {
|
||||
String testFileWithIndels = validationDataLocation + "/GenomicAnnotatorValidation/1KGBroadWEx.cleaned.indels.vcf";
|
||||
String testFileWithSNPsAndIndels = validationDataLocation + "/GenomicAnnotatorValidation/1KGBroadWEx.variants.vcf";
|
||||
|
||||
@Test
|
||||
public void testGenomicAnnotatorOnDbSNP() {
|
||||
|
||||
|
|
@ -42,4 +44,38 @@ public class
|
|||
executeTest("test with dbSNP and -s arg", specWithSArg);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGenomicAnnotatorOnIndels() {
|
||||
WalkerTestSpec testOnIndels = new WalkerTestSpec(
|
||||
cmdLineBuilder(
|
||||
"-T GenomicAnnotator",
|
||||
"-R " + b37KGReference,
|
||||
"-L 22:10000000-20000000",
|
||||
"-B:refseq,AnnotatorInputTable " + b37Refseq,
|
||||
"-B:variant,VCF " + testFileWithIndels,
|
||||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("1f8189433e87cc0b986cddb6a9a74585")
|
||||
);
|
||||
executeTest("testGenomicAnnotatorOnIndels", testOnIndels);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGenomicAnnotatorOnSNPsAndIndels() {
|
||||
WalkerTestSpec testOnSNPsAndIndels = new WalkerTestSpec(
|
||||
cmdLineBuilder(
|
||||
"-T GenomicAnnotator",
|
||||
"-R " + b37KGReference,
|
||||
"-L 22:10000000-20000000",
|
||||
"-B:refseq,AnnotatorInputTable " + b37Refseq,
|
||||
"-B:variant,VCF " + testFileWithSNPsAndIndels,
|
||||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("e9e93fb1d2700e000bd0e9493524dc4c")
|
||||
);
|
||||
executeTest("testGenomicAnnotatorOnSNPsAndIndels", testOnSNPsAndIndels);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
|
||||
private static String[] testsEnumerations = {root, rootGZ};
|
||||
|
||||
/*
|
||||
private String cmdLineBuilder(String ... arguments) {
|
||||
String cmdline = "";
|
||||
|
||||
|
|
@ -40,6 +41,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
|
||||
return cmdline;
|
||||
}
|
||||
*/
|
||||
|
||||
@Test
|
||||
public void testFundamentalsCountVariantsSNPsAndIndels() {
|
||||
|
|
|
|||
Loading…
Reference in New Issue