Initial commit of items for analyzing amino acid transitions in variant eval. Blew up my subversion by coding locally while i did not have internet. I hope this doesn't bust any integrationtests since I changed no existing code but...who knows. Crossing my fingers.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3672 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-06-29 20:57:18 +00:00
parent e3fb4d5c70
commit 3017f82550
4 changed files with 388 additions and 0 deletions

View File

@ -0,0 +1,178 @@
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluator;
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
import org.broadinstitute.sting.playground.utils.report.utils.TableType;
import org.broadinstitute.sting.utils.StingException;
import java.util.ArrayList;
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @author depristo
* @since Apr 11, 2010
*/
@Analysis(name = "BLOSUM Matrix", description = "Calculates the BLOSUM Matrix for coding variants")
public class BLOSUMMatrix extends VariantEvaluator {
// a mapping from quality score histogram bin to Ti/Tv ratio
@DataPoint(name="TiTv by AC", description = "TiTv by allele count")
MetricsByAc metrics = null;
//@DataPoint(name="Quality by Allele Count", description = "average variant quality for each allele count")
//AlleleCountStats alleleCountStats = null;
private final static Object[] METRIC_COLUMNS = {"AC", "nTi", "nTv", "n", "Ti/Tv"};
class MetricsAtAC {
public int ac = -1, nTi = 0, nTv = 0;
public MetricsAtAC(int ac) { this.ac = ac; }
public void update(VariantContext eval) {
if ( eval.isTransition() )
nTi++;
else
nTv++;
}
// corresponding to METRIC_COLUMNS
public String getColumn(int i) {
switch (i) {
case 0: return String.valueOf(ac);
case 1: return String.valueOf(nTi);
case 2: return String.valueOf(nTv);
case 3: return String.valueOf(nTi + nTv);
case 4: return String.valueOf(ratio(nTi, nTv));
default:
throw new StingException("Unexpected column " + i);
}
}
}
class MetricsByAc implements TableType {
ArrayList<MetricsAtAC> metrics = new ArrayList<MetricsAtAC>();
Object[] rows = null;
public MetricsByAc( int nchromosomes ) {
rows = new Object[nchromosomes+1];
metrics = new ArrayList<MetricsAtAC>(nchromosomes+1);
for ( int i = 0; i < nchromosomes + 1; i++ ) {
metrics.add(new MetricsAtAC(i));
rows[i] = "ac" + i;
}
}
public Object[] getRowKeys() {
return rows;
}
public Object[] getColumnKeys() {
return METRIC_COLUMNS;
}
public String getName() {
return "MetricsByAc";
}
//
public String getCell(int ac, int y) {
return metrics.get(ac).getColumn(y);
}
public String toString() {
String returnString = "";
return returnString;
}
public void incrValue( VariantContext eval ) {
int ac = -1;
if ( eval.hasGenotypes() )
ac = eval.getChromosomeCount(eval.getAlternateAllele(0));
else if ( eval.hasAttribute("AC") ) {
ac = Integer.valueOf(eval.getAttributeAsString("AC"));
}
if ( ac != -1 )
metrics.get(ac).update(eval);
}
}
public BLOSUMMatrix(VariantEvalWalker parent) {
super(parent);
// don't do anything
}
public String getName() {
return "SimpleMetricsByAC";
}
public int getComparisonOrder() {
return 1; // we only need to see each eval track
}
public boolean enabled() {
return false;
}
public String toString() {
return getName();
}
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
final String interesting = null;
if (eval != null ) {
if ( metrics == null ) {
int nSamples = this.getVEWalker().getNSamplesForEval(eval);
if ( nSamples != -1 )
metrics = new MetricsByAc(2 * nSamples);
}
if ( eval.isSNP() &&
eval.isBiallelic() &&
metrics != null ) {
metrics.incrValue(eval);
}
}
return interesting; // This module doesn't capture any interesting sites, so return null
}
//public void finalizeEvaluation() {
//
//}
}

View File

@ -0,0 +1,89 @@
package org.broadinstitute.sting.utils.analysis;
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @author chartl
* @since June 28, 2010
*/
public enum AminoAcid {
Alanine("Alanine","Ala","A",new String[]{"GCA","GCC","GCG","GCT"}),
Arganine("Arganine","Arg","R",new String[]{"AGA","AGG","CGA","CGC","CGG","CGT"}),
Asparagine("Asparagine","Asn","N",new String[]{"AAC","AAT"}),
Aspartic_acid("Aspartic acid","Asp","D",new String[]{"GAT","GAC"}),
Cysteine("Cysteine","Cys","C",new String[]{"TGC","TGC"}),
Glutamic_acid("Glutamic acid","Glu","E",new String[]{"GAA","GAG"}),
Glutamine("Glutamine","Gln","Q",new String[]{"CAA","CAG"}),
Glycine("Glycine","Gly","G",new String[]{"GGA","GGC","GGG","GGT"}),
Histidine("Histidine","His","H",new String[]{"CAC","CAT"}),
Isoleucine("Isoleucine","Ile","I",new String[]{"ATA","ATC","ATT"}),
Leucine("Leucine","Leu","L",new String[]{"CTA","CTC","CTG","CTT","TTA","TTG"}),
Lysine("Lysine","Lys","K", new String[]{"AAA","AAG"}),
Methionine("Methionine","Met","M",new String[]{"ATG"}),
Phenylalanine("Phenylalanine","Phe","F",new String[]{"TTC","TTT"}),
Proline("Proline","Pro","P",new String[]{"CCA","CCC","CCG","CCT"}),
Serine("Serine","Ser","S",new String[]{"AGC","AGT","TCA","TCC","TCG","TCT"}),
Stop_codon("Stop codon","Stop","*",new String[]{"TAA","TAG","TGA"}),
Threonine("Threonine","Thr","T",new String[]{"ACA","ACC","ACG","ACT"}),
Tryptophan("Tryptophan","Trp","W",new String[]{"TGG"}),
Tyrosine("Tyrosine","Tyr","Y",new String[]{"TAC","TAT"}),
Valine("Valine","Val","V",new String[]{"GTA","GTC","GTG","GTT"});
String[] codons;
String fullName;
String code;
String letter;
AminoAcid(String name, String shortName, String abbrev, String[] myCodons) {
codons = myCodons;
fullName = name;
code = shortName;
letter = abbrev;
}
public String getName() {
return fullName;
}
public String getLetter() {
return letter;
}
public String getCode() {
return code;
}
public boolean isStop() {
return this == Stop_codon;
}
public String toString() {
return getName();
}
}

View File

@ -0,0 +1,69 @@
package org.broadinstitute.sting.utils.analysis;
import java.util.HashMap;
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @author chartl
* @since June 28, 2010
*/
public class AminoAcidTable {
public HashMap<String,AminoAcid> tableByCodon = new HashMap<String,AminoAcid>(21);
public HashMap<String,AminoAcid> tableByCode = new HashMap<String,AminoAcid>(21);
public AminoAcidTable() {
for ( AminoAcid acid : AminoAcid.values() ) {
tableByCode.put(acid.getCode(),acid);
for ( String codon : acid.codons ) {
tableByCodon.put(codon,acid);
}
}
}
// todo -- these functions are for the genomic annotator and are named too generally -- they are
// todo -- actually accessors by codon; thus should be more specific.
public AminoAcid getEukaryoticAA(String codon) {
return tableByCodon.get(codon.toUpperCase());
}
public AminoAcid getMitochondrialAA(String codon, boolean isFirst) {
String upperCodon = codon.toUpperCase();
if ( isFirst && upperCodon.equals("ATT") || upperCodon.equals("ATA") ) {
return AminoAcid.Methionine;
} else if ( upperCodon.equals("AGA") || upperCodon.equals("AGG") ) {
return AminoAcid.Stop_codon;
} else if ( upperCodon.equals("TGA") ) {
return AminoAcid.Tryptophan;
} else {
return tableByCodon.get(upperCodon);
}
}
public AminoAcid getAminoAcidByCode(String code) {
return tableByCode.get(code);
}
}

View File

@ -0,0 +1,52 @@
package org.broadinstitute.sting.utils.analysis;
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @author chartl
* @since June 28, 2010
*/
public class AminoAcidUtils {
public static String[] getAminoAcidNames() {
String[] names = new String[AminoAcid.values().length];
for ( AminoAcid acid : AminoAcid.values() ) {
names[acid.ordinal()] = acid.getName();
}
return names;
}
public static String[] getAminoAcidCodes() {
String[] codes = new String[AminoAcid.values().length];
for ( AminoAcid acid : AminoAcid.values() ) {
codes[acid.ordinal()] = acid.getCode();
}
return codes;
}
}