Potential bug fix for bad references where some codons may have Ns

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4075 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-08-21 12:09:33 +00:00
parent 121b4f23b6
commit b6989289fc
3 changed files with 44 additions and 40 deletions

View File

@ -82,6 +82,11 @@ public class AminoAcid {
return "*".equals(getLetter()); return "*".equals(getLetter());
} }
/** Returns true if the amino acid is really just a stop codon. */
public boolean isUnknown() {
return "X".equals(getLetter());
}
public String toString() { public String toString() {
return name; return name;
} }

View File

@ -34,6 +34,7 @@ import java.util.HashMap;
public class AminoAcidTable { public class AminoAcidTable {
protected static final AminoAcid UNKNOWN = new AminoAcid("X" , "Unknown", "Unk");
protected static final AminoAcid ISOLEUCINE = new AminoAcid("I" , "Isoleucine", "Ile"); protected static final AminoAcid ISOLEUCINE = new AminoAcid("I" , "Isoleucine", "Ile");
protected static final AminoAcid LEUCINE = new AminoAcid("L" , "Leucine", "Leu"); protected static final AminoAcid LEUCINE = new AminoAcid("L" , "Leucine", "Leu");
protected static final AminoAcid VALINE = new AminoAcid("V" , "Valine", "Val"); protected static final AminoAcid VALINE = new AminoAcid("V" , "Valine", "Val");
@ -177,24 +178,17 @@ public class AminoAcidTable {
mitochondrialAminoAcidTable.put("TGA", TRYPTOPHAN); mitochondrialAminoAcidTable.put("TGA", TRYPTOPHAN);
} }
/** /**
* Returns the amino acid encoded by the given codon in a eukaryotic genome. * Returns the amino acid encoded by the given codon in a eukaryotic genome.
* *
* @param codon The 3-letter mRNA nucleotide codon 5' to 3'. Expects T's instead of U's. Not case sensitive. * @param codon The 3-letter mRNA nucleotide codon 5' to 3'. Expects T's instead of U's. Not case sensitive.
* *
* @return The amino acid matching the given codon. * @return The amino acid matching the given codon, or the UNKNOWN amino acid if the codon string doesn't match anything
*/ */
public static AminoAcid getEukaryoticAA(String codon) { public static AminoAcid getEukaryoticAA(String codon) {
codon = codon.toUpperCase(); codon = codon.toUpperCase();
final AminoAcid aa = aminoAcidTable.get(codon); final AminoAcid aa = aminoAcidTable.get(codon);
if(aa == null) { return aa == null ? UNKNOWN : aa;
throw new IllegalArgumentException("Invalid codon: " + codon);
} else {
return aa;
}
} }
@ -204,13 +198,13 @@ public class AminoAcidTable {
* @param codon The 3-letter mRNA nucleotide codon 5' to 3'. Expects T's instead of U's. Not case sensitive. * @param codon The 3-letter mRNA nucleotide codon 5' to 3'. Expects T's instead of U's. Not case sensitive.
* @param isFirstCodon If this is the 1st codon in the gene, then "ATT" encodes Methyonine * @param isFirstCodon If this is the 1st codon in the gene, then "ATT" encodes Methyonine
* *
* @return The amino acid matching the given codon in mitochondrial genes. * @return The amino acid matching the given codon in mitochondrial genes, or the UNKNOWN amino acid if the codon string doesn't match anything
*/ */
public static AminoAcid getMitochondrialAA(String codon, boolean isFirstCodon) { public static AminoAcid getMitochondrialAA(String codon, boolean isFirstCodon) {
codon = codon.toUpperCase(); codon = codon.toUpperCase();
final AminoAcid aa = mitochondrialAminoAcidTable.get(codon); final AminoAcid aa = mitochondrialAminoAcidTable.get(codon);
if(aa == null) { if(aa == null) {
throw new IllegalArgumentException("Invalid codon: " + codon); return UNKNOWN;
} else if(isFirstCodon && codon.equals("ATT")) { } else if(isFirstCodon && codon.equals("ATT")) {
return METHIONINE; //special case - 'ATT' in the first codon of a mitochondrial gene codes for methionine instead of isoleucine return METHIONINE; //special case - 'ATT' in the first codon of a mitochondrial gene codes for methionine instead of isoleucine
} else { } else {

View File

@ -753,17 +753,22 @@ public class TranscriptToInfo extends RodWalker<TreeMap<String, String>, TreeMap
else if(positionType == PositionType.CDS) else if(positionType == PositionType.CDS)
{ {
final String referenceCodon = Character.toString(currentCodon_5to3[0]) + Character.toString(currentCodon_5to3[1]) + currentCodon_5to3[2]; final String referenceCodon = Character.toString(currentCodon_5to3[0]) + Character.toString(currentCodon_5to3[1]) + currentCodon_5to3[2];
outputLineFields.put(OUTPUT_FRAME, Integer.toString(frame) ); final String variantCodon = Character.toString(currentCodon_5to3[0]) + Character.toString(currentCodon_5to3[1]) + currentCodon_5to3[2];
outputLineFields.put(OUTPUT_CODON_NUMBER, Integer.toString(codonCount_from5) );
final AminoAcid refAA = isMitochondrialTranscript ? AminoAcidTable.getMitochondrialAA( referenceCodon, codonCount_from5 == 1 ) : AminoAcidTable.getEukaryoticAA( referenceCodon ) ; final AminoAcid refAA = isMitochondrialTranscript ? AminoAcidTable.getMitochondrialAA( referenceCodon, codonCount_from5 == 1 ) : AminoAcidTable.getEukaryoticAA( referenceCodon ) ;
final AminoAcid variantAA = isMitochondrialTranscript ? AminoAcidTable.getMitochondrialAA( variantCodon, codonCount_from5 == 1 ) : AminoAcidTable.getEukaryoticAA( variantCodon ) ;
if ( refAA.isUnknown() || variantAA.isUnknown() ) {
logger.warn("Illegal amino acid detected: refCodon=" + referenceCodon + " altCodon=" + variantCodon);
}
outputLineFields.put(OUTPUT_FRAME, Integer.toString(frame) );
outputLineFields.put(OUTPUT_CODON_NUMBER, Integer.toString(codonCount_from5) );
outputLineFields.put(OUTPUT_REFERENCE_CODON, referenceCodon ); outputLineFields.put(OUTPUT_REFERENCE_CODON, referenceCodon );
outputLineFields.put(OUTPUT_REFERENCE_AA, refAA.getCode()); outputLineFields.put(OUTPUT_REFERENCE_AA, refAA.getCode());
final char temp = currentCodon_5to3[frame]; final char temp = currentCodon_5to3[frame];
currentCodon_5to3[frame] = haplotypeAlternate; currentCodon_5to3[frame] = haplotypeAlternate;
final String variantCodon = Character.toString(currentCodon_5to3[0]) + Character.toString(currentCodon_5to3[1]) + currentCodon_5to3[2];
final AminoAcid variantAA = isMitochondrialTranscript ? AminoAcidTable.getMitochondrialAA( variantCodon, codonCount_from5 == 1 ) : AminoAcidTable.getEukaryoticAA( variantCodon ) ;
outputLineFields.put(OUTPUT_VARIANT_CODON, variantCodon ); outputLineFields.put(OUTPUT_VARIANT_CODON, variantCodon );
outputLineFields.put(OUTPUT_VARIANT_AA, variantAA.getCode()); outputLineFields.put(OUTPUT_VARIANT_AA, variantAA.getCode());