Added unit tests for the SnpEff codec, and made minor adjustments to the codec itself.
This commit is contained in:
parent
dd974040af
commit
5e288136e0
|
|
@ -80,6 +80,7 @@ public class SnpEffCodec implements FeatureCodec {
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
trimAllFields(tokens);
|
||||||
checkForRequiredFields(tokens, line);
|
checkForRequiredFields(tokens, line);
|
||||||
|
|
||||||
String contig = tokens[0];
|
String contig = tokens[0];
|
||||||
|
|
@ -126,6 +127,12 @@ public class SnpEffCodec implements FeatureCodec {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void trimAllFields ( String[] tokens ) {
|
||||||
|
for ( int i = 0; i < tokens.length; i++ ) {
|
||||||
|
tokens[i] = tokens[i].trim();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void checkForRequiredFields ( String[] tokens, String line ) {
|
private void checkForRequiredFields ( String[] tokens, String line ) {
|
||||||
for ( int requiredFieldIndex : REQUIRED_FIELDS ) {
|
for ( int requiredFieldIndex : REQUIRED_FIELDS ) {
|
||||||
if ( tokens[requiredFieldIndex].isEmpty() ) {
|
if ( tokens[requiredFieldIndex].isEmpty() ) {
|
||||||
|
|
@ -155,7 +162,7 @@ public class SnpEffCodec implements FeatureCodec {
|
||||||
|
|
||||||
private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) {
|
private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) {
|
||||||
if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) {
|
if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) {
|
||||||
return effectFieldTokens[effectFieldTokens.length - 1];
|
return effectFieldTokens[effectFieldTokens.length - 1].trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
|
|
|
||||||
|
|
@ -314,4 +314,67 @@ public class SnpEffFeature implements Feature {
|
||||||
public String getCustomIntervalID() {
|
public String getCustomIntervalID() {
|
||||||
return customIntervalID;
|
return customIntervalID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean equals ( Object o ) {
|
||||||
|
if ( o == null || ! (o instanceof SnpEffFeature) ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
SnpEffFeature other = (SnpEffFeature)o;
|
||||||
|
|
||||||
|
return contig.equals(other.contig) &&
|
||||||
|
position == other.position &&
|
||||||
|
(reference == null ? other.reference == null : reference.equals(other.reference)) &&
|
||||||
|
(change == null ? other.change == null : change.equals(other.change)) &&
|
||||||
|
changeType == other.changeType &&
|
||||||
|
zygosity == other.zygosity &&
|
||||||
|
(quality == null ? other.quality == null : quality.equals(other.quality)) &&
|
||||||
|
(coverage == null ? other.coverage == null : coverage.equals(other.coverage)) &&
|
||||||
|
(warnings == null ? other.warnings == null : warnings.equals(other.warnings)) &&
|
||||||
|
(geneID == null ? other.geneID == null : geneID.equals(other.geneID)) &&
|
||||||
|
(geneName == null ? other.geneName == null : geneName.equals(other.geneName)) &&
|
||||||
|
(bioType == null ? other.bioType == null : bioType.equals(other.bioType)) &&
|
||||||
|
(transcriptID == null ? other.transcriptID == null : transcriptID.equals(other.transcriptID)) &&
|
||||||
|
(exonID == null ? other.exonID == null : exonID.equals(other.exonID)) &&
|
||||||
|
(exonRank == null ? other.exonRank == null : exonRank.equals(other.exonRank)) &&
|
||||||
|
isNonCodingGene == other.isNonCodingGene &&
|
||||||
|
effect == other.effect &&
|
||||||
|
(effectExtraInformation == null ? other.effectExtraInformation == null : effectExtraInformation.equals(other.effectExtraInformation)) &&
|
||||||
|
(oldAndNewAA == null ? other.oldAndNewAA == null : oldAndNewAA.equals(other.oldAndNewAA)) &&
|
||||||
|
(oldAndNewCodon == null ? other.oldAndNewCodon == null : oldAndNewCodon.equals(other.oldAndNewCodon)) &&
|
||||||
|
(codonNum == null ? other.codonNum == null : codonNum.equals(other.codonNum)) &&
|
||||||
|
(cdsSize == null ? other.cdsSize == null : cdsSize.equals(other.cdsSize)) &&
|
||||||
|
(codonsAround == null ? other.codonsAround == null : codonsAround.equals(other.codonsAround)) &&
|
||||||
|
(aasAround == null ? other.aasAround == null : aasAround.equals(other.aasAround)) &&
|
||||||
|
(customIntervalID == null ? other.customIntervalID == null : customIntervalID.equals(other.customIntervalID));
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return "[Contig: " + contig +
|
||||||
|
" Position: " + position +
|
||||||
|
" Reference: " + reference +
|
||||||
|
" Change: " + change +
|
||||||
|
" Change Type: " + changeType +
|
||||||
|
" Zygosity: " + zygosity +
|
||||||
|
" Quality: " + quality +
|
||||||
|
" Coverage: " + coverage +
|
||||||
|
" Warnings: " + warnings +
|
||||||
|
" Gene ID: " + geneID +
|
||||||
|
" Gene Name: " + geneName +
|
||||||
|
" Bio Type: " + bioType +
|
||||||
|
" Transcript ID: " + transcriptID +
|
||||||
|
" Exon ID: " + exonID +
|
||||||
|
" Exon Rank: " + exonRank +
|
||||||
|
" Non-Coding Gene: " + isNonCodingGene +
|
||||||
|
" Effect: " + effect +
|
||||||
|
" Effect Extra Information: " + effectExtraInformation +
|
||||||
|
" Old/New AA: " + oldAndNewAA +
|
||||||
|
" Old/New Codon: " + oldAndNewCodon +
|
||||||
|
" Codon Num: " + codonNum +
|
||||||
|
" CDS Size: " + cdsSize +
|
||||||
|
" Codons Around: " + codonsAround +
|
||||||
|
" AAs Around: " + aasAround +
|
||||||
|
" Custom Interval ID: " + customIntervalID +
|
||||||
|
"]";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,259 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.codecs.snpEff;
|
||||||
|
|
||||||
|
import org.apache.commons.io.input.ReaderInputStream;
|
||||||
|
import org.broad.tribble.TribbleException;
|
||||||
|
import org.broad.tribble.readers.AsciiLineReader;
|
||||||
|
import org.broad.tribble.readers.LineReader;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
|
||||||
|
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
|
||||||
|
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
|
||||||
|
|
||||||
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
public class SnpEffCodecUnitTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testParseWellFormedSnpEffHeaderLine() {
|
||||||
|
String wellFormedSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" +
|
||||||
|
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
|
||||||
|
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
|
||||||
|
"AAs around\tCustom_interval_ID";
|
||||||
|
|
||||||
|
SnpEffCodec codec = new SnpEffCodec();
|
||||||
|
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wellFormedSnpEffHeaderLine)));
|
||||||
|
String headerReturned = (String)codec.readHeader(reader);
|
||||||
|
|
||||||
|
Assert.assertEquals(headerReturned, wellFormedSnpEffHeaderLine);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expectedExceptions = TribbleException.InvalidHeader.class)
|
||||||
|
public void testParseWrongNumberOfFieldsSnpEffHeaderLine() {
|
||||||
|
String wrongNumberOfFieldsSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" +
|
||||||
|
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
|
||||||
|
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
|
||||||
|
"AAs around";
|
||||||
|
|
||||||
|
SnpEffCodec codec = new SnpEffCodec();
|
||||||
|
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wrongNumberOfFieldsSnpEffHeaderLine)));
|
||||||
|
codec.readHeader(reader);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expectedExceptions = TribbleException.InvalidHeader.class)
|
||||||
|
public void testParseMisnamedColumnSnpEffHeaderLine() {
|
||||||
|
String misnamedColumnSnpEffHeaderLine = "# Chromo\tPosition\tRef\tChange\tChange type\t" +
|
||||||
|
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
|
||||||
|
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
|
||||||
|
"AAs around\tCustom_interval_ID";
|
||||||
|
|
||||||
|
SnpEffCodec codec = new SnpEffCodec();
|
||||||
|
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(misnamedColumnSnpEffHeaderLine)));
|
||||||
|
codec.readHeader(reader);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testParseSimpleEffectSnpEffLine() {
|
||||||
|
String simpleEffectSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
|
||||||
|
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t\t";
|
||||||
|
|
||||||
|
SnpEffFeature expectedFeature = new SnpEffFeature("1",
|
||||||
|
69428l,
|
||||||
|
"T",
|
||||||
|
"G",
|
||||||
|
ChangeType.SNP,
|
||||||
|
Zygosity.Hom,
|
||||||
|
6049.69,
|
||||||
|
61573l,
|
||||||
|
null,
|
||||||
|
"ENSG00000177693",
|
||||||
|
"OR4F5",
|
||||||
|
"mRNA",
|
||||||
|
"ENST00000326183",
|
||||||
|
"exon_1_69055_70108",
|
||||||
|
1,
|
||||||
|
false,
|
||||||
|
EffectType.NON_SYNONYMOUS_CODING,
|
||||||
|
null,
|
||||||
|
"F/C",
|
||||||
|
"TTT/TGT",
|
||||||
|
113,
|
||||||
|
918,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
SnpEffCodec codec = new SnpEffCodec();
|
||||||
|
SnpEffFeature feature = (SnpEffFeature)codec.decode(simpleEffectSnpEffLine);
|
||||||
|
|
||||||
|
Assert.assertEquals(feature, expectedFeature);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testParseNonCodingRegionSnpEffLine() {
|
||||||
|
String nonCodingRegionSnpEffLine = "1\t1337592\tG\tC\tSNP\tHom\t1935.52\t21885\t\tENSG00000250188\t" +
|
||||||
|
"RP4-758J18.5\tmRNA\tENST00000514958\texon_1_1337454_1338076\t2\tWITHIN_NON_CODING_GENE, NON_SYNONYMOUS_CODING\t" +
|
||||||
|
"L/V\tCTA/GTA\t272\t952\t\t\t";
|
||||||
|
|
||||||
|
SnpEffFeature expectedFeature = new SnpEffFeature("1",
|
||||||
|
1337592l,
|
||||||
|
"G",
|
||||||
|
"C",
|
||||||
|
ChangeType.SNP,
|
||||||
|
Zygosity.Hom,
|
||||||
|
1935.52,
|
||||||
|
21885l,
|
||||||
|
null,
|
||||||
|
"ENSG00000250188",
|
||||||
|
"RP4-758J18.5",
|
||||||
|
"mRNA",
|
||||||
|
"ENST00000514958",
|
||||||
|
"exon_1_1337454_1338076",
|
||||||
|
2,
|
||||||
|
true,
|
||||||
|
EffectType.NON_SYNONYMOUS_CODING,
|
||||||
|
null,
|
||||||
|
"L/V",
|
||||||
|
"CTA/GTA",
|
||||||
|
272,
|
||||||
|
952,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
SnpEffCodec codec = new SnpEffCodec();
|
||||||
|
SnpEffFeature feature = (SnpEffFeature)codec.decode(nonCodingRegionSnpEffLine);
|
||||||
|
|
||||||
|
Assert.assertEquals(feature, expectedFeature);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testParseExtraEffectInformationSnpEffLine() {
|
||||||
|
String extraEffectInformationSnpEffLine = "1\t879537\tT\tC\tSNP\tHom\t341.58\t13733\t\tENSG00000187634\tSAMD11\t" +
|
||||||
|
"mRNA\tENST00000341065\t\t\tUTR_3_PRIME: 4 bases from transcript end\t\t\t\t\t\t\t";
|
||||||
|
|
||||||
|
SnpEffFeature expectedFeature = new SnpEffFeature("1",
|
||||||
|
879537l,
|
||||||
|
"T",
|
||||||
|
"C",
|
||||||
|
ChangeType.SNP,
|
||||||
|
Zygosity.Hom,
|
||||||
|
341.58,
|
||||||
|
13733l,
|
||||||
|
null,
|
||||||
|
"ENSG00000187634",
|
||||||
|
"SAMD11",
|
||||||
|
"mRNA",
|
||||||
|
"ENST00000341065",
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
false,
|
||||||
|
EffectType.UTR_3_PRIME,
|
||||||
|
"4 bases from transcript end",
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
SnpEffCodec codec = new SnpEffCodec();
|
||||||
|
SnpEffFeature feature = (SnpEffFeature)codec.decode(extraEffectInformationSnpEffLine);
|
||||||
|
|
||||||
|
Assert.assertEquals(feature, expectedFeature);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testParseMultiEffectSnpEffLine() {
|
||||||
|
String multiEffectSnpEffLine = "1\t901901\tC\tT\tSNP\tHom\t162.91\t4646\t\tENSG00000187583\tPLEKHN1\tmRNA\t" +
|
||||||
|
"ENST00000379410\texon_1_901877_901994\t1\tSTART_GAINED: ATG, UTR_5_PRIME: 11 bases from TSS\t\t\t\t\t\t\t";
|
||||||
|
|
||||||
|
SnpEffFeature expectedFeature = new SnpEffFeature("1",
|
||||||
|
901901l,
|
||||||
|
"C",
|
||||||
|
"T",
|
||||||
|
ChangeType.SNP,
|
||||||
|
Zygosity.Hom,
|
||||||
|
162.91,
|
||||||
|
4646l,
|
||||||
|
null,
|
||||||
|
"ENSG00000187583",
|
||||||
|
"PLEKHN1",
|
||||||
|
"mRNA",
|
||||||
|
"ENST00000379410",
|
||||||
|
"exon_1_901877_901994",
|
||||||
|
1,
|
||||||
|
false,
|
||||||
|
EffectType.START_GAINED,
|
||||||
|
"ATG, UTR_5_PRIME: 11 bases from TSS",
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
SnpEffCodec codec = new SnpEffCodec();
|
||||||
|
SnpEffFeature feature = (SnpEffFeature)codec.decode(multiEffectSnpEffLine);
|
||||||
|
|
||||||
|
Assert.assertEquals(feature, expectedFeature);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
|
||||||
|
public void testParseWrongNumberOfFieldsSnpEffLine() {
|
||||||
|
String wrongNumberOfFieldsSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
|
||||||
|
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t";
|
||||||
|
|
||||||
|
SnpEffCodec codec = new SnpEffCodec();
|
||||||
|
SnpEffFeature feature = (SnpEffFeature)codec.decode(wrongNumberOfFieldsSnpEffLine);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
|
||||||
|
public void testParseBlankEffectFieldSnpEffLine() {
|
||||||
|
String blankEffectFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
|
||||||
|
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\t\tF/C\tTTT/TGT\t113\t918\t\t\t";
|
||||||
|
|
||||||
|
SnpEffCodec codec = new SnpEffCodec();
|
||||||
|
SnpEffFeature feature = (SnpEffFeature)codec.decode(blankEffectFieldSnpEffLine);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
|
||||||
|
public void testParseInvalidNumericFieldSnpEffLine() {
|
||||||
|
String invalidNumericFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
|
||||||
|
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\tfoo\t\t\t";;
|
||||||
|
|
||||||
|
SnpEffCodec codec = new SnpEffCodec();
|
||||||
|
SnpEffFeature feature = (SnpEffFeature)codec.decode(invalidNumericFieldSnpEffLine);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue