Potential bug fix for bad references where some codons may have Ns
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4075 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
121b4f23b6
commit
b6989289fc
|
|
@ -82,6 +82,11 @@ public class AminoAcid {
|
||||||
return "*".equals(getLetter());
|
return "*".equals(getLetter());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns true if the amino acid is really just a stop codon. */
|
||||||
|
public boolean isUnknown() {
|
||||||
|
return "X".equals(getLetter());
|
||||||
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ import java.util.HashMap;
|
||||||
public class AminoAcidTable {
|
public class AminoAcidTable {
|
||||||
|
|
||||||
|
|
||||||
|
protected static final AminoAcid UNKNOWN = new AminoAcid("X" , "Unknown", "Unk");
|
||||||
protected static final AminoAcid ISOLEUCINE = new AminoAcid("I" , "Isoleucine", "Ile");
|
protected static final AminoAcid ISOLEUCINE = new AminoAcid("I" , "Isoleucine", "Ile");
|
||||||
protected static final AminoAcid LEUCINE = new AminoAcid("L" , "Leucine", "Leu");
|
protected static final AminoAcid LEUCINE = new AminoAcid("L" , "Leucine", "Leu");
|
||||||
protected static final AminoAcid VALINE = new AminoAcid("V" , "Valine", "Val");
|
protected static final AminoAcid VALINE = new AminoAcid("V" , "Valine", "Val");
|
||||||
|
|
@ -177,24 +178,17 @@ public class AminoAcidTable {
|
||||||
mitochondrialAminoAcidTable.put("TGA", TRYPTOPHAN);
|
mitochondrialAminoAcidTable.put("TGA", TRYPTOPHAN);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the amino acid encoded by the given codon in a eukaryotic genome.
|
* Returns the amino acid encoded by the given codon in a eukaryotic genome.
|
||||||
*
|
*
|
||||||
* @param codon The 3-letter mRNA nucleotide codon 5' to 3'. Expects T's instead of U's. Not case sensitive.
|
* @param codon The 3-letter mRNA nucleotide codon 5' to 3'. Expects T's instead of U's. Not case sensitive.
|
||||||
*
|
*
|
||||||
* @return The amino acid matching the given codon.
|
* @return The amino acid matching the given codon, or the UNKNOWN amino acid if the codon string doesn't match anything
|
||||||
*/
|
*/
|
||||||
public static AminoAcid getEukaryoticAA(String codon) {
|
public static AminoAcid getEukaryoticAA(String codon) {
|
||||||
codon = codon.toUpperCase();
|
codon = codon.toUpperCase();
|
||||||
final AminoAcid aa = aminoAcidTable.get(codon);
|
final AminoAcid aa = aminoAcidTable.get(codon);
|
||||||
if(aa == null) {
|
return aa == null ? UNKNOWN : aa;
|
||||||
throw new IllegalArgumentException("Invalid codon: " + codon);
|
|
||||||
} else {
|
|
||||||
return aa;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -204,13 +198,13 @@ public class AminoAcidTable {
|
||||||
* @param codon The 3-letter mRNA nucleotide codon 5' to 3'. Expects T's instead of U's. Not case sensitive.
|
* @param codon The 3-letter mRNA nucleotide codon 5' to 3'. Expects T's instead of U's. Not case sensitive.
|
||||||
* @param isFirstCodon If this is the 1st codon in the gene, then "ATT" encodes Methyonine
|
* @param isFirstCodon If this is the 1st codon in the gene, then "ATT" encodes Methyonine
|
||||||
*
|
*
|
||||||
* @return The amino acid matching the given codon in mitochondrial genes.
|
* @return The amino acid matching the given codon in mitochondrial genes, or the UNKNOWN amino acid if the codon string doesn't match anything
|
||||||
*/
|
*/
|
||||||
public static AminoAcid getMitochondrialAA(String codon, boolean isFirstCodon) {
|
public static AminoAcid getMitochondrialAA(String codon, boolean isFirstCodon) {
|
||||||
codon = codon.toUpperCase();
|
codon = codon.toUpperCase();
|
||||||
final AminoAcid aa = mitochondrialAminoAcidTable.get(codon);
|
final AminoAcid aa = mitochondrialAminoAcidTable.get(codon);
|
||||||
if(aa == null) {
|
if(aa == null) {
|
||||||
throw new IllegalArgumentException("Invalid codon: " + codon);
|
return UNKNOWN;
|
||||||
} else if(isFirstCodon && codon.equals("ATT")) {
|
} else if(isFirstCodon && codon.equals("ATT")) {
|
||||||
return METHIONINE; //special case - 'ATT' in the first codon of a mitochondrial gene codes for methionine instead of isoleucine
|
return METHIONINE; //special case - 'ATT' in the first codon of a mitochondrial gene codes for methionine instead of isoleucine
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -620,10 +620,10 @@ public class TranscriptToInfo extends RodWalker<TreeMap<String, String>, TreeMap
|
||||||
|
|
||||||
//check for bad bases
|
//check for bad bases
|
||||||
if( (utr5NucBuffer_5to3[0] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[0])) ||
|
if( (utr5NucBuffer_5to3[0] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[0])) ||
|
||||||
(utr5NucBuffer_5to3[1] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[1])) ||
|
(utr5NucBuffer_5to3[1] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[1])) ||
|
||||||
(utr5NucBuffer_5to3[2] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[2])) ||
|
(utr5NucBuffer_5to3[2] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[2])) ||
|
||||||
(utr5NucBuffer_5to3[3] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[3])) ||
|
(utr5NucBuffer_5to3[3] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[3])) ||
|
||||||
(utr5NucBuffer_5to3[4] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[4])))
|
(utr5NucBuffer_5to3[4] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[4])))
|
||||||
{
|
{
|
||||||
logger.debug("Skipping current position [" + parsedTranscriptRod.txChrom + ":" +txCoord_5to3 + "] in transcript " + parsedTranscriptRod.geneNames.toString() +". utr5NucBuffer_5to3 contains irregular base:" + utr5NucBuffer_5to3[0] + utr5NucBuffer_5to3[1] + utr5NucBuffer_5to3[2] + utr5NucBuffer_5to3[3] + utr5NucBuffer_5to3[4]);// +". Transcript is: " + parsedTranscriptRod);
|
logger.debug("Skipping current position [" + parsedTranscriptRod.txChrom + ":" +txCoord_5to3 + "] in transcript " + parsedTranscriptRod.geneNames.toString() +". utr5NucBuffer_5to3 contains irregular base:" + utr5NucBuffer_5to3[0] + utr5NucBuffer_5to3[1] + utr5NucBuffer_5to3[2] + utr5NucBuffer_5to3[3] + utr5NucBuffer_5to3[4]);// +". Transcript is: " + parsedTranscriptRod);
|
||||||
++skippedPositionsCounter;
|
++skippedPositionsCounter;
|
||||||
|
|
@ -735,8 +735,8 @@ public class TranscriptToInfo extends RodWalker<TreeMap<String, String>, TreeMap
|
||||||
//check for +1 (eg. addition of new ATG uORF) and -1 (eg. disruption of existing ATG uORF)
|
//check for +1 (eg. addition of new ATG uORF) and -1 (eg. disruption of existing ATG uORF)
|
||||||
String uORFChangeStr = null;
|
String uORFChangeStr = null;
|
||||||
if( (refCodon1.equals("ATG") && !varCodon1.equals("ATG")) ||
|
if( (refCodon1.equals("ATG") && !varCodon1.equals("ATG")) ||
|
||||||
(refCodon2.equals("ATG") && !varCodon2.equals("ATG")) ||
|
(refCodon2.equals("ATG") && !varCodon2.equals("ATG")) ||
|
||||||
(refCodon3.equals("ATG") && !varCodon3.equals("ATG")))
|
(refCodon3.equals("ATG") && !varCodon3.equals("ATG")))
|
||||||
{
|
{
|
||||||
uORFChangeStr = "-1";
|
uORFChangeStr = "-1";
|
||||||
}
|
}
|
||||||
|
|
@ -753,17 +753,22 @@ public class TranscriptToInfo extends RodWalker<TreeMap<String, String>, TreeMap
|
||||||
else if(positionType == PositionType.CDS)
|
else if(positionType == PositionType.CDS)
|
||||||
{
|
{
|
||||||
final String referenceCodon = Character.toString(currentCodon_5to3[0]) + Character.toString(currentCodon_5to3[1]) + currentCodon_5to3[2];
|
final String referenceCodon = Character.toString(currentCodon_5to3[0]) + Character.toString(currentCodon_5to3[1]) + currentCodon_5to3[2];
|
||||||
outputLineFields.put(OUTPUT_FRAME, Integer.toString(frame) );
|
final String variantCodon = Character.toString(currentCodon_5to3[0]) + Character.toString(currentCodon_5to3[1]) + currentCodon_5to3[2];
|
||||||
outputLineFields.put(OUTPUT_CODON_NUMBER, Integer.toString(codonCount_from5) );
|
|
||||||
|
|
||||||
final AminoAcid refAA = isMitochondrialTranscript ? AminoAcidTable.getMitochondrialAA( referenceCodon, codonCount_from5 == 1 ) : AminoAcidTable.getEukaryoticAA( referenceCodon ) ;
|
final AminoAcid refAA = isMitochondrialTranscript ? AminoAcidTable.getMitochondrialAA( referenceCodon, codonCount_from5 == 1 ) : AminoAcidTable.getEukaryoticAA( referenceCodon ) ;
|
||||||
|
final AminoAcid variantAA = isMitochondrialTranscript ? AminoAcidTable.getMitochondrialAA( variantCodon, codonCount_from5 == 1 ) : AminoAcidTable.getEukaryoticAA( variantCodon ) ;
|
||||||
|
|
||||||
|
if ( refAA.isUnknown() || variantAA.isUnknown() ) {
|
||||||
|
logger.warn("Illegal amino acid detected: refCodon=" + referenceCodon + " altCodon=" + variantCodon);
|
||||||
|
}
|
||||||
|
|
||||||
|
outputLineFields.put(OUTPUT_FRAME, Integer.toString(frame) );
|
||||||
|
outputLineFields.put(OUTPUT_CODON_NUMBER, Integer.toString(codonCount_from5) );
|
||||||
outputLineFields.put(OUTPUT_REFERENCE_CODON, referenceCodon );
|
outputLineFields.put(OUTPUT_REFERENCE_CODON, referenceCodon );
|
||||||
outputLineFields.put(OUTPUT_REFERENCE_AA, refAA.getCode());
|
outputLineFields.put(OUTPUT_REFERENCE_AA, refAA.getCode());
|
||||||
|
|
||||||
final char temp = currentCodon_5to3[frame];
|
final char temp = currentCodon_5to3[frame];
|
||||||
currentCodon_5to3[frame] = haplotypeAlternate;
|
currentCodon_5to3[frame] = haplotypeAlternate;
|
||||||
final String variantCodon = Character.toString(currentCodon_5to3[0]) + Character.toString(currentCodon_5to3[1]) + currentCodon_5to3[2];
|
|
||||||
final AminoAcid variantAA = isMitochondrialTranscript ? AminoAcidTable.getMitochondrialAA( variantCodon, codonCount_from5 == 1 ) : AminoAcidTable.getEukaryoticAA( variantCodon ) ;
|
|
||||||
outputLineFields.put(OUTPUT_VARIANT_CODON, variantCodon );
|
outputLineFields.put(OUTPUT_VARIANT_CODON, variantCodon );
|
||||||
outputLineFields.put(OUTPUT_VARIANT_AA, variantAA.getCode());
|
outputLineFields.put(OUTPUT_VARIANT_AA, variantAA.getCode());
|
||||||
|
|
||||||
|
|
@ -915,23 +920,23 @@ public class TranscriptToInfo extends RodWalker<TreeMap<String, String>, TreeMap
|
||||||
long key;
|
long key;
|
||||||
final char lastChromChar = txChrom.charAt(txChrom.length() -1);
|
final char lastChromChar = txChrom.charAt(txChrom.length() -1);
|
||||||
switch( Character.toLowerCase(lastChromChar) ) {
|
switch( Character.toLowerCase(lastChromChar) ) {
|
||||||
case 'm':
|
case 'm':
|
||||||
case 't': // for hg19 or b36
|
case 't': // for hg19 or b36
|
||||||
key = 0;
|
key = 0;
|
||||||
break;
|
break;
|
||||||
case 'x':
|
case 'x':
|
||||||
key = 23;
|
key = 23;
|
||||||
break;
|
break;
|
||||||
case 'y':
|
case 'y':
|
||||||
key = 24;
|
key = 24;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
if( txChrom.startsWith("chr") ) {
|
if( txChrom.startsWith("chr") ) {
|
||||||
key = Integer.parseInt(txChrom.substring(3));
|
key = Integer.parseInt(txChrom.substring(3));
|
||||||
} else {
|
} else {
|
||||||
key = Integer.parseInt(txChrom);
|
key = Integer.parseInt(txChrom);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
key++; //shift so there's no zero (otherwise, multiplication is screwed up in the next step)
|
key++; //shift so there's no zero (otherwise, multiplication is screwed up in the next step)
|
||||||
|
|
@ -1108,8 +1113,8 @@ public class TranscriptToInfo extends RodWalker<TreeMap<String, String>, TreeMap
|
||||||
//exonFrames = new int[exonCount];
|
//exonFrames = new int[exonCount];
|
||||||
for(int i = 0; i < exonCount; i++) {
|
for(int i = 0; i < exonCount; i++) {
|
||||||
exonStarts[i] = Integer.parseInt(exonStartStrs[i]);
|
exonStarts[i] = Integer.parseInt(exonStartStrs[i]);
|
||||||
exonEnds[i] = Integer.parseInt(exonEndStrs[i]);
|
exonEnds[i] = Integer.parseInt(exonEndStrs[i]);
|
||||||
//exonFrames[i] = Integer.parseInt(exonFrameStrs[i]);
|
//exonFrames[i] = Integer.parseInt(exonFrameStrs[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue