Added unit tests for the SnpEff codec, and made minor adjustments to the codec itself.

This commit is contained in:
David Roazen 2011-08-08 16:51:43 -04:00
parent dd974040af
commit 5e288136e0
3 changed files with 330 additions and 1 deletions

View File

@ -80,6 +80,7 @@ public class SnpEffCodec implements FeatureCodec {
}
try {
trimAllFields(tokens);
checkForRequiredFields(tokens, line);
String contig = tokens[0];
@ -126,6 +127,12 @@ public class SnpEffCodec implements FeatureCodec {
}
}
private void trimAllFields ( String[] tokens ) {
for ( int i = 0; i < tokens.length; i++ ) {
tokens[i] = tokens[i].trim();
}
}
private void checkForRequiredFields ( String[] tokens, String line ) {
for ( int requiredFieldIndex : REQUIRED_FIELDS ) {
if ( tokens[requiredFieldIndex].isEmpty() ) {
@ -155,7 +162,7 @@ public class SnpEffCodec implements FeatureCodec {
private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) {
if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) {
return effectFieldTokens[effectFieldTokens.length - 1];
return effectFieldTokens[effectFieldTokens.length - 1].trim();
}
return null;

View File

@ -314,4 +314,67 @@ public class SnpEffFeature implements Feature {
public String getCustomIntervalID() {
return customIntervalID;
}
public boolean equals ( Object o ) {
if ( o == null || ! (o instanceof SnpEffFeature) ) {
return false;
}
SnpEffFeature other = (SnpEffFeature)o;
return contig.equals(other.contig) &&
position == other.position &&
(reference == null ? other.reference == null : reference.equals(other.reference)) &&
(change == null ? other.change == null : change.equals(other.change)) &&
changeType == other.changeType &&
zygosity == other.zygosity &&
(quality == null ? other.quality == null : quality.equals(other.quality)) &&
(coverage == null ? other.coverage == null : coverage.equals(other.coverage)) &&
(warnings == null ? other.warnings == null : warnings.equals(other.warnings)) &&
(geneID == null ? other.geneID == null : geneID.equals(other.geneID)) &&
(geneName == null ? other.geneName == null : geneName.equals(other.geneName)) &&
(bioType == null ? other.bioType == null : bioType.equals(other.bioType)) &&
(transcriptID == null ? other.transcriptID == null : transcriptID.equals(other.transcriptID)) &&
(exonID == null ? other.exonID == null : exonID.equals(other.exonID)) &&
(exonRank == null ? other.exonRank == null : exonRank.equals(other.exonRank)) &&
isNonCodingGene == other.isNonCodingGene &&
effect == other.effect &&
(effectExtraInformation == null ? other.effectExtraInformation == null : effectExtraInformation.equals(other.effectExtraInformation)) &&
(oldAndNewAA == null ? other.oldAndNewAA == null : oldAndNewAA.equals(other.oldAndNewAA)) &&
(oldAndNewCodon == null ? other.oldAndNewCodon == null : oldAndNewCodon.equals(other.oldAndNewCodon)) &&
(codonNum == null ? other.codonNum == null : codonNum.equals(other.codonNum)) &&
(cdsSize == null ? other.cdsSize == null : cdsSize.equals(other.cdsSize)) &&
(codonsAround == null ? other.codonsAround == null : codonsAround.equals(other.codonsAround)) &&
(aasAround == null ? other.aasAround == null : aasAround.equals(other.aasAround)) &&
(customIntervalID == null ? other.customIntervalID == null : customIntervalID.equals(other.customIntervalID));
}
public String toString() {
return "[Contig: " + contig +
" Position: " + position +
" Reference: " + reference +
" Change: " + change +
" Change Type: " + changeType +
" Zygosity: " + zygosity +
" Quality: " + quality +
" Coverage: " + coverage +
" Warnings: " + warnings +
" Gene ID: " + geneID +
" Gene Name: " + geneName +
" Bio Type: " + bioType +
" Transcript ID: " + transcriptID +
" Exon ID: " + exonID +
" Exon Rank: " + exonRank +
" Non-Coding Gene: " + isNonCodingGene +
" Effect: " + effect +
" Effect Extra Information: " + effectExtraInformation +
" Old/New AA: " + oldAndNewAA +
" Old/New Codon: " + oldAndNewCodon +
" Codon Num: " + codonNum +
" CDS Size: " + cdsSize +
" Codons Around: " + codonsAround +
" AAs Around: " + aasAround +
" Custom Interval ID: " + customIntervalID +
"]";
}
}

View File

@ -0,0 +1,259 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.snpEff;
import org.apache.commons.io.input.ReaderInputStream;
import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import org.testng.Assert;
import org.testng.annotations.Test;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
import java.io.StringReader;
public class SnpEffCodecUnitTest {
@Test
public void testParseWellFormedSnpEffHeaderLine() {
String wellFormedSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" +
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
"AAs around\tCustom_interval_ID";
SnpEffCodec codec = new SnpEffCodec();
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wellFormedSnpEffHeaderLine)));
String headerReturned = (String)codec.readHeader(reader);
Assert.assertEquals(headerReturned, wellFormedSnpEffHeaderLine);
}
@Test(expectedExceptions = TribbleException.InvalidHeader.class)
public void testParseWrongNumberOfFieldsSnpEffHeaderLine() {
String wrongNumberOfFieldsSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" +
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
"AAs around";
SnpEffCodec codec = new SnpEffCodec();
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wrongNumberOfFieldsSnpEffHeaderLine)));
codec.readHeader(reader);
}
@Test(expectedExceptions = TribbleException.InvalidHeader.class)
public void testParseMisnamedColumnSnpEffHeaderLine() {
String misnamedColumnSnpEffHeaderLine = "# Chromo\tPosition\tRef\tChange\tChange type\t" +
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
"AAs around\tCustom_interval_ID";
SnpEffCodec codec = new SnpEffCodec();
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(misnamedColumnSnpEffHeaderLine)));
codec.readHeader(reader);
}
@Test
public void testParseSimpleEffectSnpEffLine() {
String simpleEffectSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t\t";
SnpEffFeature expectedFeature = new SnpEffFeature("1",
69428l,
"T",
"G",
ChangeType.SNP,
Zygosity.Hom,
6049.69,
61573l,
null,
"ENSG00000177693",
"OR4F5",
"mRNA",
"ENST00000326183",
"exon_1_69055_70108",
1,
false,
EffectType.NON_SYNONYMOUS_CODING,
null,
"F/C",
"TTT/TGT",
113,
918,
null,
null,
null
);
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(simpleEffectSnpEffLine);
Assert.assertEquals(feature, expectedFeature);
}
@Test
public void testParseNonCodingRegionSnpEffLine() {
String nonCodingRegionSnpEffLine = "1\t1337592\tG\tC\tSNP\tHom\t1935.52\t21885\t\tENSG00000250188\t" +
"RP4-758J18.5\tmRNA\tENST00000514958\texon_1_1337454_1338076\t2\tWITHIN_NON_CODING_GENE, NON_SYNONYMOUS_CODING\t" +
"L/V\tCTA/GTA\t272\t952\t\t\t";
SnpEffFeature expectedFeature = new SnpEffFeature("1",
1337592l,
"G",
"C",
ChangeType.SNP,
Zygosity.Hom,
1935.52,
21885l,
null,
"ENSG00000250188",
"RP4-758J18.5",
"mRNA",
"ENST00000514958",
"exon_1_1337454_1338076",
2,
true,
EffectType.NON_SYNONYMOUS_CODING,
null,
"L/V",
"CTA/GTA",
272,
952,
null,
null,
null
);
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(nonCodingRegionSnpEffLine);
Assert.assertEquals(feature, expectedFeature);
}
@Test
public void testParseExtraEffectInformationSnpEffLine() {
String extraEffectInformationSnpEffLine = "1\t879537\tT\tC\tSNP\tHom\t341.58\t13733\t\tENSG00000187634\tSAMD11\t" +
"mRNA\tENST00000341065\t\t\tUTR_3_PRIME: 4 bases from transcript end\t\t\t\t\t\t\t";
SnpEffFeature expectedFeature = new SnpEffFeature("1",
879537l,
"T",
"C",
ChangeType.SNP,
Zygosity.Hom,
341.58,
13733l,
null,
"ENSG00000187634",
"SAMD11",
"mRNA",
"ENST00000341065",
null,
null,
false,
EffectType.UTR_3_PRIME,
"4 bases from transcript end",
null,
null,
null,
null,
null,
null,
null
);
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(extraEffectInformationSnpEffLine);
Assert.assertEquals(feature, expectedFeature);
}
@Test
public void testParseMultiEffectSnpEffLine() {
String multiEffectSnpEffLine = "1\t901901\tC\tT\tSNP\tHom\t162.91\t4646\t\tENSG00000187583\tPLEKHN1\tmRNA\t" +
"ENST00000379410\texon_1_901877_901994\t1\tSTART_GAINED: ATG, UTR_5_PRIME: 11 bases from TSS\t\t\t\t\t\t\t";
SnpEffFeature expectedFeature = new SnpEffFeature("1",
901901l,
"C",
"T",
ChangeType.SNP,
Zygosity.Hom,
162.91,
4646l,
null,
"ENSG00000187583",
"PLEKHN1",
"mRNA",
"ENST00000379410",
"exon_1_901877_901994",
1,
false,
EffectType.START_GAINED,
"ATG, UTR_5_PRIME: 11 bases from TSS",
null,
null,
null,
null,
null,
null,
null
);
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(multiEffectSnpEffLine);
Assert.assertEquals(feature, expectedFeature);
}
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
public void testParseWrongNumberOfFieldsSnpEffLine() {
String wrongNumberOfFieldsSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t";
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(wrongNumberOfFieldsSnpEffLine);
}
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
public void testParseBlankEffectFieldSnpEffLine() {
String blankEffectFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\t\tF/C\tTTT/TGT\t113\t918\t\t\t";
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(blankEffectFieldSnpEffLine);
}
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
public void testParseInvalidNumericFieldSnpEffLine() {
String invalidNumericFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\tfoo\t\t\t";;
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(invalidNumericFieldSnpEffLine);
}
}