Added unit tests for the SnpEff codec, and made minor adjustments to the codec itself.
This commit is contained in:
parent
dd974040af
commit
5e288136e0
|
|
@ -80,6 +80,7 @@ public class SnpEffCodec implements FeatureCodec {
|
|||
}
|
||||
|
||||
try {
|
||||
trimAllFields(tokens);
|
||||
checkForRequiredFields(tokens, line);
|
||||
|
||||
String contig = tokens[0];
|
||||
|
|
@ -126,6 +127,12 @@ public class SnpEffCodec implements FeatureCodec {
|
|||
}
|
||||
}
|
||||
|
||||
private void trimAllFields ( String[] tokens ) {
|
||||
for ( int i = 0; i < tokens.length; i++ ) {
|
||||
tokens[i] = tokens[i].trim();
|
||||
}
|
||||
}
|
||||
|
||||
private void checkForRequiredFields ( String[] tokens, String line ) {
|
||||
for ( int requiredFieldIndex : REQUIRED_FIELDS ) {
|
||||
if ( tokens[requiredFieldIndex].isEmpty() ) {
|
||||
|
|
@ -155,7 +162,7 @@ public class SnpEffCodec implements FeatureCodec {
|
|||
|
||||
private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) {
|
||||
if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) {
|
||||
return effectFieldTokens[effectFieldTokens.length - 1];
|
||||
return effectFieldTokens[effectFieldTokens.length - 1].trim();
|
||||
}
|
||||
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -314,4 +314,67 @@ public class SnpEffFeature implements Feature {
|
|||
public String getCustomIntervalID() {
|
||||
return customIntervalID;
|
||||
}
|
||||
|
||||
public boolean equals ( Object o ) {
|
||||
if ( o == null || ! (o instanceof SnpEffFeature) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
SnpEffFeature other = (SnpEffFeature)o;
|
||||
|
||||
return contig.equals(other.contig) &&
|
||||
position == other.position &&
|
||||
(reference == null ? other.reference == null : reference.equals(other.reference)) &&
|
||||
(change == null ? other.change == null : change.equals(other.change)) &&
|
||||
changeType == other.changeType &&
|
||||
zygosity == other.zygosity &&
|
||||
(quality == null ? other.quality == null : quality.equals(other.quality)) &&
|
||||
(coverage == null ? other.coverage == null : coverage.equals(other.coverage)) &&
|
||||
(warnings == null ? other.warnings == null : warnings.equals(other.warnings)) &&
|
||||
(geneID == null ? other.geneID == null : geneID.equals(other.geneID)) &&
|
||||
(geneName == null ? other.geneName == null : geneName.equals(other.geneName)) &&
|
||||
(bioType == null ? other.bioType == null : bioType.equals(other.bioType)) &&
|
||||
(transcriptID == null ? other.transcriptID == null : transcriptID.equals(other.transcriptID)) &&
|
||||
(exonID == null ? other.exonID == null : exonID.equals(other.exonID)) &&
|
||||
(exonRank == null ? other.exonRank == null : exonRank.equals(other.exonRank)) &&
|
||||
isNonCodingGene == other.isNonCodingGene &&
|
||||
effect == other.effect &&
|
||||
(effectExtraInformation == null ? other.effectExtraInformation == null : effectExtraInformation.equals(other.effectExtraInformation)) &&
|
||||
(oldAndNewAA == null ? other.oldAndNewAA == null : oldAndNewAA.equals(other.oldAndNewAA)) &&
|
||||
(oldAndNewCodon == null ? other.oldAndNewCodon == null : oldAndNewCodon.equals(other.oldAndNewCodon)) &&
|
||||
(codonNum == null ? other.codonNum == null : codonNum.equals(other.codonNum)) &&
|
||||
(cdsSize == null ? other.cdsSize == null : cdsSize.equals(other.cdsSize)) &&
|
||||
(codonsAround == null ? other.codonsAround == null : codonsAround.equals(other.codonsAround)) &&
|
||||
(aasAround == null ? other.aasAround == null : aasAround.equals(other.aasAround)) &&
|
||||
(customIntervalID == null ? other.customIntervalID == null : customIntervalID.equals(other.customIntervalID));
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "[Contig: " + contig +
|
||||
" Position: " + position +
|
||||
" Reference: " + reference +
|
||||
" Change: " + change +
|
||||
" Change Type: " + changeType +
|
||||
" Zygosity: " + zygosity +
|
||||
" Quality: " + quality +
|
||||
" Coverage: " + coverage +
|
||||
" Warnings: " + warnings +
|
||||
" Gene ID: " + geneID +
|
||||
" Gene Name: " + geneName +
|
||||
" Bio Type: " + bioType +
|
||||
" Transcript ID: " + transcriptID +
|
||||
" Exon ID: " + exonID +
|
||||
" Exon Rank: " + exonRank +
|
||||
" Non-Coding Gene: " + isNonCodingGene +
|
||||
" Effect: " + effect +
|
||||
" Effect Extra Information: " + effectExtraInformation +
|
||||
" Old/New AA: " + oldAndNewAA +
|
||||
" Old/New Codon: " + oldAndNewCodon +
|
||||
" Codon Num: " + codonNum +
|
||||
" CDS Size: " + cdsSize +
|
||||
" Codons Around: " + codonsAround +
|
||||
" AAs Around: " + aasAround +
|
||||
" Custom Interval ID: " + customIntervalID +
|
||||
"]";
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,259 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.snpEff;
|
||||
|
||||
import org.apache.commons.io.input.ReaderInputStream;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
public class SnpEffCodecUnitTest {
|
||||
|
||||
@Test
|
||||
public void testParseWellFormedSnpEffHeaderLine() {
|
||||
String wellFormedSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" +
|
||||
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
|
||||
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
|
||||
"AAs around\tCustom_interval_ID";
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wellFormedSnpEffHeaderLine)));
|
||||
String headerReturned = (String)codec.readHeader(reader);
|
||||
|
||||
Assert.assertEquals(headerReturned, wellFormedSnpEffHeaderLine);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = TribbleException.InvalidHeader.class)
|
||||
public void testParseWrongNumberOfFieldsSnpEffHeaderLine() {
|
||||
String wrongNumberOfFieldsSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" +
|
||||
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
|
||||
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
|
||||
"AAs around";
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wrongNumberOfFieldsSnpEffHeaderLine)));
|
||||
codec.readHeader(reader);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = TribbleException.InvalidHeader.class)
|
||||
public void testParseMisnamedColumnSnpEffHeaderLine() {
|
||||
String misnamedColumnSnpEffHeaderLine = "# Chromo\tPosition\tRef\tChange\tChange type\t" +
|
||||
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
|
||||
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
|
||||
"AAs around\tCustom_interval_ID";
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(misnamedColumnSnpEffHeaderLine)));
|
||||
codec.readHeader(reader);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseSimpleEffectSnpEffLine() {
|
||||
String simpleEffectSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
|
||||
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t\t";
|
||||
|
||||
SnpEffFeature expectedFeature = new SnpEffFeature("1",
|
||||
69428l,
|
||||
"T",
|
||||
"G",
|
||||
ChangeType.SNP,
|
||||
Zygosity.Hom,
|
||||
6049.69,
|
||||
61573l,
|
||||
null,
|
||||
"ENSG00000177693",
|
||||
"OR4F5",
|
||||
"mRNA",
|
||||
"ENST00000326183",
|
||||
"exon_1_69055_70108",
|
||||
1,
|
||||
false,
|
||||
EffectType.NON_SYNONYMOUS_CODING,
|
||||
null,
|
||||
"F/C",
|
||||
"TTT/TGT",
|
||||
113,
|
||||
918,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(simpleEffectSnpEffLine);
|
||||
|
||||
Assert.assertEquals(feature, expectedFeature);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseNonCodingRegionSnpEffLine() {
|
||||
String nonCodingRegionSnpEffLine = "1\t1337592\tG\tC\tSNP\tHom\t1935.52\t21885\t\tENSG00000250188\t" +
|
||||
"RP4-758J18.5\tmRNA\tENST00000514958\texon_1_1337454_1338076\t2\tWITHIN_NON_CODING_GENE, NON_SYNONYMOUS_CODING\t" +
|
||||
"L/V\tCTA/GTA\t272\t952\t\t\t";
|
||||
|
||||
SnpEffFeature expectedFeature = new SnpEffFeature("1",
|
||||
1337592l,
|
||||
"G",
|
||||
"C",
|
||||
ChangeType.SNP,
|
||||
Zygosity.Hom,
|
||||
1935.52,
|
||||
21885l,
|
||||
null,
|
||||
"ENSG00000250188",
|
||||
"RP4-758J18.5",
|
||||
"mRNA",
|
||||
"ENST00000514958",
|
||||
"exon_1_1337454_1338076",
|
||||
2,
|
||||
true,
|
||||
EffectType.NON_SYNONYMOUS_CODING,
|
||||
null,
|
||||
"L/V",
|
||||
"CTA/GTA",
|
||||
272,
|
||||
952,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(nonCodingRegionSnpEffLine);
|
||||
|
||||
Assert.assertEquals(feature, expectedFeature);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseExtraEffectInformationSnpEffLine() {
|
||||
String extraEffectInformationSnpEffLine = "1\t879537\tT\tC\tSNP\tHom\t341.58\t13733\t\tENSG00000187634\tSAMD11\t" +
|
||||
"mRNA\tENST00000341065\t\t\tUTR_3_PRIME: 4 bases from transcript end\t\t\t\t\t\t\t";
|
||||
|
||||
SnpEffFeature expectedFeature = new SnpEffFeature("1",
|
||||
879537l,
|
||||
"T",
|
||||
"C",
|
||||
ChangeType.SNP,
|
||||
Zygosity.Hom,
|
||||
341.58,
|
||||
13733l,
|
||||
null,
|
||||
"ENSG00000187634",
|
||||
"SAMD11",
|
||||
"mRNA",
|
||||
"ENST00000341065",
|
||||
null,
|
||||
null,
|
||||
false,
|
||||
EffectType.UTR_3_PRIME,
|
||||
"4 bases from transcript end",
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(extraEffectInformationSnpEffLine);
|
||||
|
||||
Assert.assertEquals(feature, expectedFeature);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseMultiEffectSnpEffLine() {
|
||||
String multiEffectSnpEffLine = "1\t901901\tC\tT\tSNP\tHom\t162.91\t4646\t\tENSG00000187583\tPLEKHN1\tmRNA\t" +
|
||||
"ENST00000379410\texon_1_901877_901994\t1\tSTART_GAINED: ATG, UTR_5_PRIME: 11 bases from TSS\t\t\t\t\t\t\t";
|
||||
|
||||
SnpEffFeature expectedFeature = new SnpEffFeature("1",
|
||||
901901l,
|
||||
"C",
|
||||
"T",
|
||||
ChangeType.SNP,
|
||||
Zygosity.Hom,
|
||||
162.91,
|
||||
4646l,
|
||||
null,
|
||||
"ENSG00000187583",
|
||||
"PLEKHN1",
|
||||
"mRNA",
|
||||
"ENST00000379410",
|
||||
"exon_1_901877_901994",
|
||||
1,
|
||||
false,
|
||||
EffectType.START_GAINED,
|
||||
"ATG, UTR_5_PRIME: 11 bases from TSS",
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(multiEffectSnpEffLine);
|
||||
|
||||
Assert.assertEquals(feature, expectedFeature);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
|
||||
public void testParseWrongNumberOfFieldsSnpEffLine() {
|
||||
String wrongNumberOfFieldsSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
|
||||
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t";
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(wrongNumberOfFieldsSnpEffLine);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
|
||||
public void testParseBlankEffectFieldSnpEffLine() {
|
||||
String blankEffectFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
|
||||
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\t\tF/C\tTTT/TGT\t113\t918\t\t\t";
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(blankEffectFieldSnpEffLine);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
|
||||
public void testParseInvalidNumericFieldSnpEffLine() {
|
||||
String invalidNumericFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
|
||||
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\tfoo\t\t\t";;
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(invalidNumericFieldSnpEffLine);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue