diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/BLOSUMMatrix.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/BLOSUMMatrix.java new file mode 100755 index 000000000..53211146c --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/BLOSUMMatrix.java @@ -0,0 +1,178 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.varianteval; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; +import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluator; +import org.broadinstitute.sting.playground.utils.report.tags.Analysis; +import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; +import org.broadinstitute.sting.playground.utils.report.utils.TableType; +import org.broadinstitute.sting.utils.StingException; + +import java.util.ArrayList; + +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @author depristo + * @since Apr 11, 2010 + */ + +@Analysis(name = "BLOSUM Matrix", description = "Calculates the BLOSUM Matrix for coding variants") +public class BLOSUMMatrix extends VariantEvaluator { + + + // a mapping from quality score histogram bin to Ti/Tv ratio + @DataPoint(name="TiTv by AC", description = "TiTv by allele count") + MetricsByAc metrics = null; + + //@DataPoint(name="Quality by Allele Count", description = "average variant quality for each allele count") + //AlleleCountStats alleleCountStats = null; + + private final static Object[] METRIC_COLUMNS = {"AC", "nTi", "nTv", "n", "Ti/Tv"}; + + class MetricsAtAC { + public int ac = -1, nTi = 0, nTv = 0; + + public MetricsAtAC(int ac) { this.ac = ac; } + + public void update(VariantContext eval) { + if ( eval.isTransition() ) + nTi++; + else + nTv++; + } + + // corresponding to METRIC_COLUMNS + public String getColumn(int i) { + switch (i) { + case 0: return String.valueOf(ac); + case 1: return String.valueOf(nTi); + case 2: return String.valueOf(nTv); + case 3: return String.valueOf(nTi + nTv); + case 4: return String.valueOf(ratio(nTi, nTv)); + default: + throw new StingException("Unexpected column " + i); + } + } + } + + class MetricsByAc implements TableType { + ArrayList metrics = new ArrayList(); + Object[] rows = null; + + public MetricsByAc( int nchromosomes ) { + rows = new Object[nchromosomes+1]; + metrics = new ArrayList(nchromosomes+1); + for ( int i = 0; i < nchromosomes + 1; i++ ) { + metrics.add(new MetricsAtAC(i)); + rows[i] = "ac" + i; + } + } + + public Object[] getRowKeys() { + return rows; + } + + public Object[] getColumnKeys() { + return METRIC_COLUMNS; + } + + public String getName() { + return "MetricsByAc"; + } + + // + public String getCell(int ac, int y) { + return metrics.get(ac).getColumn(y); + } + + public String toString() { + String returnString = ""; + return returnString; + } + + public void incrValue( VariantContext eval ) { + int ac = -1; + + if ( eval.hasGenotypes() ) + ac = eval.getChromosomeCount(eval.getAlternateAllele(0)); + else if ( eval.hasAttribute("AC") ) { + ac = Integer.valueOf(eval.getAttributeAsString("AC")); + } + + if ( ac != -1 ) + metrics.get(ac).update(eval); + } + } + + public BLOSUMMatrix(VariantEvalWalker parent) { + super(parent); + // don't do anything + } + + public String getName() { + return "SimpleMetricsByAC"; + } + + public int getComparisonOrder() { + return 1; // we only need to see each eval track + } + + public boolean enabled() { + return false; + } + + public String toString() { + return getName(); + } + + public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + final String interesting = null; + + if (eval != null ) { + if ( metrics == null ) { + int nSamples = this.getVEWalker().getNSamplesForEval(eval); + if ( nSamples != -1 ) + metrics = new MetricsByAc(2 * nSamples); + } + + if ( eval.isSNP() && + eval.isBiallelic() && + metrics != null ) { + metrics.incrValue(eval); + } + } + + return interesting; // This module doesn't capture any interesting sites, so return null + } + + //public void finalizeEvaluation() { + // + //} +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/analysis/AminoAcid.java b/java/src/org/broadinstitute/sting/utils/analysis/AminoAcid.java new file mode 100644 index 000000000..daade253d --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/analysis/AminoAcid.java @@ -0,0 +1,89 @@ +package org.broadinstitute.sting.utils.analysis; + +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @author chartl + * @since June 28, 2010 + */ + +public enum AminoAcid { + + Alanine("Alanine","Ala","A",new String[]{"GCA","GCC","GCG","GCT"}), + Arganine("Arganine","Arg","R",new String[]{"AGA","AGG","CGA","CGC","CGG","CGT"}), + Asparagine("Asparagine","Asn","N",new String[]{"AAC","AAT"}), + Aspartic_acid("Aspartic acid","Asp","D",new String[]{"GAT","GAC"}), + Cysteine("Cysteine","Cys","C",new String[]{"TGC","TGC"}), + Glutamic_acid("Glutamic acid","Glu","E",new String[]{"GAA","GAG"}), + Glutamine("Glutamine","Gln","Q",new String[]{"CAA","CAG"}), + Glycine("Glycine","Gly","G",new String[]{"GGA","GGC","GGG","GGT"}), + Histidine("Histidine","His","H",new String[]{"CAC","CAT"}), + Isoleucine("Isoleucine","Ile","I",new String[]{"ATA","ATC","ATT"}), + Leucine("Leucine","Leu","L",new String[]{"CTA","CTC","CTG","CTT","TTA","TTG"}), + Lysine("Lysine","Lys","K", new String[]{"AAA","AAG"}), + Methionine("Methionine","Met","M",new String[]{"ATG"}), + Phenylalanine("Phenylalanine","Phe","F",new String[]{"TTC","TTT"}), + Proline("Proline","Pro","P",new String[]{"CCA","CCC","CCG","CCT"}), + Serine("Serine","Ser","S",new String[]{"AGC","AGT","TCA","TCC","TCG","TCT"}), + Stop_codon("Stop codon","Stop","*",new String[]{"TAA","TAG","TGA"}), + Threonine("Threonine","Thr","T",new String[]{"ACA","ACC","ACG","ACT"}), + Tryptophan("Tryptophan","Trp","W",new String[]{"TGG"}), + Tyrosine("Tyrosine","Tyr","Y",new String[]{"TAC","TAT"}), + Valine("Valine","Val","V",new String[]{"GTA","GTC","GTG","GTT"}); + + String[] codons; + String fullName; + String code; + String letter; + + AminoAcid(String name, String shortName, String abbrev, String[] myCodons) { + codons = myCodons; + fullName = name; + code = shortName; + letter = abbrev; + } + + public String getName() { + return fullName; + } + + public String getLetter() { + return letter; + } + + public String getCode() { + return code; + } + + public boolean isStop() { + return this == Stop_codon; + } + + public String toString() { + return getName(); + } + +} diff --git a/java/src/org/broadinstitute/sting/utils/analysis/AminoAcidTable.java b/java/src/org/broadinstitute/sting/utils/analysis/AminoAcidTable.java new file mode 100644 index 000000000..b11d3249e --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/analysis/AminoAcidTable.java @@ -0,0 +1,69 @@ +package org.broadinstitute.sting.utils.analysis; + +import java.util.HashMap; + +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @author chartl + * @since June 28, 2010 + */ + +public class AminoAcidTable { + public HashMap tableByCodon = new HashMap(21); + public HashMap tableByCode = new HashMap(21); + public AminoAcidTable() { + for ( AminoAcid acid : AminoAcid.values() ) { + tableByCode.put(acid.getCode(),acid); + for ( String codon : acid.codons ) { + tableByCodon.put(codon,acid); + } + } + } + + // todo -- these functions are for the genomic annotator and are named too generally -- they are + // todo -- actually accessors by codon; thus should be more specific. + public AminoAcid getEukaryoticAA(String codon) { + return tableByCodon.get(codon.toUpperCase()); + } + + public AminoAcid getMitochondrialAA(String codon, boolean isFirst) { + String upperCodon = codon.toUpperCase(); + if ( isFirst && upperCodon.equals("ATT") || upperCodon.equals("ATA") ) { + return AminoAcid.Methionine; + } else if ( upperCodon.equals("AGA") || upperCodon.equals("AGG") ) { + return AminoAcid.Stop_codon; + } else if ( upperCodon.equals("TGA") ) { + return AminoAcid.Tryptophan; + } else { + return tableByCodon.get(upperCodon); + } + } + + public AminoAcid getAminoAcidByCode(String code) { + return tableByCode.get(code); + } +} diff --git a/java/src/org/broadinstitute/sting/utils/analysis/AminoAcidUtils.java b/java/src/org/broadinstitute/sting/utils/analysis/AminoAcidUtils.java new file mode 100644 index 000000000..7bb078468 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/analysis/AminoAcidUtils.java @@ -0,0 +1,52 @@ +package org.broadinstitute.sting.utils.analysis; + +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @author chartl + * @since June 28, 2010 + */ + +public class AminoAcidUtils { + + public static String[] getAminoAcidNames() { + String[] names = new String[AminoAcid.values().length]; + for ( AminoAcid acid : AminoAcid.values() ) { + names[acid.ordinal()] = acid.getName(); + } + + return names; + } + + public static String[] getAminoAcidCodes() { + String[] codes = new String[AminoAcid.values().length]; + for ( AminoAcid acid : AminoAcid.values() ) { + codes[acid.ordinal()] = acid.getCode(); + } + + return codes; + } +}