Removed unused importants, but some of these scripts are now out of date (they have been for a long time) so they don't compile anyway
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5837 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f608ed6d5a
commit
72ad8ded19
|
|
@ -0,0 +1,273 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.variantcontext.Genotype;
|
||||||
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.report.tags.Analysis;
|
||||||
|
import org.broadinstitute.sting.utils.report.tags.DataPoint;
|
||||||
|
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: chartl
|
||||||
|
* Date: Nov 22, 2010
|
||||||
|
* Time: 12:22:08 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
@Analysis(name = "ACTransitionMatrix", description = "Number of additional genotypes from each new sample; random permutations")
|
||||||
|
public class ACTransitionTable extends VariantEvaluator {
|
||||||
|
private final int NUM_PERMUTATIONS = 50;
|
||||||
|
private final double LOW_GQ_PCT = 0.95;
|
||||||
|
private final double LOW_GQ_THRSH = 30.0;
|
||||||
|
private boolean initialized = false;
|
||||||
|
private long skipped = 0l;
|
||||||
|
|
||||||
|
@DataPoint(name="Het transitions",description="AC[s] = AC[s-1]+1 and AC[s] = AC[s-1]+2 transitions")
|
||||||
|
TransitionTable transitions = null;
|
||||||
|
@DataPoint(name="Private permutations",description="Marginal increase in number of sites per sample")
|
||||||
|
PermutationCounts privatePermutations;
|
||||||
|
@DataPoint(name="AC2 Permutations",description="Marginal increase in number of AC=2 sites, per sample")
|
||||||
|
PermutationCounts doubletonPermutations;
|
||||||
|
@DataPoint(name="AC3 Permutations",description="Marginal increase in number of tripleton sites, per sample")
|
||||||
|
PermutationCounts tripletonPermutations;
|
||||||
|
|
||||||
|
String[][] permutations;
|
||||||
|
|
||||||
|
public boolean enabled() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getComparisonOrder() {
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "ACTransitionTable";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
if ( eval != null && ! initialized ) {
|
||||||
|
//this.veWalker.getLogger().warn("Initializing...");
|
||||||
|
initialize(eval);
|
||||||
|
initialized = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( isGood(eval) ) {
|
||||||
|
if ( comp != null && ! comp.isFiltered() ) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
int order_offset = 0;
|
||||||
|
for ( String[] ordering : permutations ) {
|
||||||
|
int sample_offset = 0;
|
||||||
|
int variant_ac = 0;
|
||||||
|
for ( String sample : ordering ) {
|
||||||
|
if ( eval.getGenotype(sample).isHet() ) {
|
||||||
|
variant_ac++;
|
||||||
|
transitions.hetTransitionCounts[order_offset][variant_ac-1][sample_offset]++;
|
||||||
|
} else if ( eval.getGenotype(sample).isHomVar() ) {
|
||||||
|
variant_ac += 2;
|
||||||
|
transitions.homTransitionCounts[order_offset][variant_ac-1][sample_offset]++;
|
||||||
|
} else {
|
||||||
|
// todo -- note, unclear how to treat no calls. Is the hom in het,ref,ref,nocall,hom sample 4 or 5?
|
||||||
|
// todo -- do we want to tabulate P[sample i is not variant | some variant]? This is just combinatorics so i left it out
|
||||||
|
if ( variant_ac > 0 ) {
|
||||||
|
transitions.stationaryCounts[order_offset][variant_ac-1][sample_offset]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sample_offset ++;
|
||||||
|
}
|
||||||
|
order_offset++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
skipped++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isGood(VariantContext vc) {
|
||||||
|
if ( vc == null || vc.isFiltered() || (vc.getHetCount() + vc.getHomVarCount() == 0) ) { // todo -- should be is variant, but need to ensure no alt alleles at ref sites
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
Collection<Genotype> gtypes = vc.getGenotypes().values();
|
||||||
|
int ngood = 0;
|
||||||
|
for ( Genotype g : gtypes) {
|
||||||
|
if ( g.isCalled() && g.getPhredScaledQual() >= LOW_GQ_THRSH ) {
|
||||||
|
ngood ++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ( (0.0+ngood)/(0.0+gtypes.size()) >= LOW_GQ_PCT );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public ACTransitionTable(VariantEvalWalker parent) {
|
||||||
|
//super(parent);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initialize(VariantContext vc) {
|
||||||
|
Set<String> permuteSamples = vc.getSampleNames();
|
||||||
|
permutations = new String[NUM_PERMUTATIONS][permuteSamples.size()];
|
||||||
|
//veWalker.getLogger().warn(String.format("Num samples: %d",permuteSamples.size()));
|
||||||
|
int offset = 0;
|
||||||
|
for ( String s : permuteSamples ) {
|
||||||
|
permutations[0][offset] = s;
|
||||||
|
offset ++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( int p = 1; p < NUM_PERMUTATIONS ; p++ ) {
|
||||||
|
permutations[p] = permutations[0].clone();
|
||||||
|
for ( int o = 0; o < permutations[p].length; o ++ ) {
|
||||||
|
int r = (int) Math.floor(Math.random()*(o+1));
|
||||||
|
String swap = permutations[p][r];
|
||||||
|
permutations[p][r] = permutations[p][o];
|
||||||
|
permutations[p][o] = swap;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
transitions = new TransitionTable();
|
||||||
|
transitions.hetTransitionCounts = new int[NUM_PERMUTATIONS][permuteSamples.size()*2][permuteSamples.size()];
|
||||||
|
transitions.homTransitionCounts = new int[NUM_PERMUTATIONS][permuteSamples.size()*2][permuteSamples.size()];
|
||||||
|
transitions.stationaryCounts = new int[NUM_PERMUTATIONS][permuteSamples.size()*2][permuteSamples.size()];
|
||||||
|
privatePermutations = new PermutationCounts(1,transitions);
|
||||||
|
doubletonPermutations = new PermutationCounts(2,transitions);
|
||||||
|
tripletonPermutations = new PermutationCounts(3,transitions);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void finalizeEvaluation() { // note: data points are null when this is called (wtf?)
|
||||||
|
//veWalker.getLogger().info(String.format("Skipped: %d",skipped));
|
||||||
|
}
|
||||||
|
|
||||||
|
class TransitionTable implements TableType {
|
||||||
|
int[][][] hetTransitionCounts;
|
||||||
|
int[][][] homTransitionCounts;
|
||||||
|
int[][][] stationaryCounts;
|
||||||
|
String[][] countAverages;
|
||||||
|
String[] rowKeys = null;
|
||||||
|
String[] colKeys = null;
|
||||||
|
|
||||||
|
public Object[] getRowKeys() {
|
||||||
|
if ( rowKeys == null ) {
|
||||||
|
rowKeys = new String[3*hetTransitionCounts[0].length];
|
||||||
|
for ( int i = 0; i < hetTransitionCounts[0].length; i ++ ) {
|
||||||
|
rowKeys[i] = String.format("%s%d%s","AC_",i,"_(het)");
|
||||||
|
}
|
||||||
|
for ( int i = 0; i < hetTransitionCounts[0].length; i ++ ) {
|
||||||
|
rowKeys[hetTransitionCounts[0].length+i] = String.format("%s%d%s","AC_",i,"_(hom)");
|
||||||
|
}
|
||||||
|
for ( int i = 0; i < hetTransitionCounts[0].length; i ++ ) {
|
||||||
|
rowKeys[2*hetTransitionCounts[0].length+i] = String.format("%s%d%s","AC_",i,"_(ref)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return rowKeys;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCell(int x, int y) {
|
||||||
|
if ( countAverages == null ) {
|
||||||
|
countAverages = new String[hetTransitionCounts[0].length*3][hetTransitionCounts[0][0].length];
|
||||||
|
for ( int sam = 0; sam < hetTransitionCounts[0][0].length; sam ++) {
|
||||||
|
for ( int idx = 0 ; idx < hetTransitionCounts[0].length; idx ++ ) {
|
||||||
|
int totalTimesAtACSample = 0;
|
||||||
|
int totalStationary = 0;
|
||||||
|
int totalAC1Shift = 0;
|
||||||
|
int totalAC2Shift = 0;
|
||||||
|
for ( int p = 0; p < hetTransitionCounts.length; p++ ) {
|
||||||
|
totalStationary += stationaryCounts[p][idx][sam];
|
||||||
|
totalAC2Shift += (idx+2 >= hetTransitionCounts[0][0].length) ? 0 : homTransitionCounts[p][idx+2][sam];
|
||||||
|
totalAC1Shift += (idx+1 >= hetTransitionCounts[0][0].length) ? 0 : hetTransitionCounts[p][idx+1][sam];
|
||||||
|
}
|
||||||
|
totalTimesAtACSample = totalStationary+totalAC1Shift+totalAC2Shift;
|
||||||
|
countAverages[idx][sam] = formatProp(totalAC1Shift,totalTimesAtACSample);
|
||||||
|
countAverages[hetTransitionCounts[0].length+idx][sam] = formatProp(totalAC2Shift,totalTimesAtACSample);
|
||||||
|
countAverages[hetTransitionCounts[0].length*2+idx][sam] = formatProp(totalStationary,totalTimesAtACSample);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return countAverages[x][y] == null ? "0.00" : countAverages[x][y];
|
||||||
|
}
|
||||||
|
|
||||||
|
private String formatProp(int num, int denom) {
|
||||||
|
return (denom != 0) ? String.format("%.4f", ((double) num)/denom) : "0.0";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() { return "AC Transition Tables"; }
|
||||||
|
|
||||||
|
public Object[] getColumnKeys() {
|
||||||
|
if ( colKeys == null ) {
|
||||||
|
colKeys = new String[hetTransitionCounts[0][0].length];
|
||||||
|
for ( int ac = 0; ac < hetTransitionCounts[0][0].length; ac ++ ) {
|
||||||
|
colKeys[ac] = String.format("Sample_%d",ac);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return colKeys;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PermutationCounts implements TableType {
|
||||||
|
int acToExtract;
|
||||||
|
TransitionTable table;
|
||||||
|
String[] rowNames;
|
||||||
|
String[] colNames;
|
||||||
|
|
||||||
|
public PermutationCounts(int ac, TransitionTable tTable) {
|
||||||
|
acToExtract = ac;
|
||||||
|
table = tTable;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String[] getRowKeys() {
|
||||||
|
//System.out.printf("%s%n",table);
|
||||||
|
if ( rowNames == null ) {
|
||||||
|
rowNames = new String[table.stationaryCounts.length];
|
||||||
|
for ( int p = 0 ; p < rowNames.length; p ++ ) {
|
||||||
|
rowNames[p] = String.format("Perm%d",p+1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return rowNames;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String[] getColumnKeys() {
|
||||||
|
if ( colNames == null ) {
|
||||||
|
colNames = new String[table.stationaryCounts[0][0].length];
|
||||||
|
for ( int s = 0 ; s < colNames.length; s ++ ) {
|
||||||
|
colNames[s] = String.format("Sample%d",s+1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return colNames;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getCell(int x, int y) {
|
||||||
|
return table.hetTransitionCounts[x][acToExtract-1][y] +
|
||||||
|
( (acToExtract > table.homTransitionCounts[0][0].length) ? 0 : table.homTransitionCounts[x][acToExtract-1][y]);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return String.format("PermutationCountsAC%d",acToExtract);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void init() {
|
||||||
|
getRowKeys();
|
||||||
|
getColumnKeys();
|
||||||
|
getCell(1,1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -0,0 +1,212 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
|
import org.broad.tribble.vcf.VCFConstants;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.report.tags.DataPoint;
|
||||||
|
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
@Analysis(name = "Allele Frequency Comparison", description = "Compare allele frequency and counts between eval and comp")
|
||||||
|
public class AlleleFrequencyComparison extends VariantEvaluator {
|
||||||
|
private static int MAX_AC_COUNT = 100; // todo -- command line argument?
|
||||||
|
|
||||||
|
@DataPoint(description="Counts of eval frequency versus comp frequency")
|
||||||
|
AFTable afTable = new AFTable();
|
||||||
|
|
||||||
|
@DataPoint(description="Counts of eval AC versus comp AC")
|
||||||
|
ACTable acTable = new ACTable(MAX_AC_COUNT);
|
||||||
|
|
||||||
|
public boolean enabled() { return true; }
|
||||||
|
|
||||||
|
public int getComparisonOrder() { return 2; }
|
||||||
|
|
||||||
|
public String getName() { return "Allele Frequency Comparison"; }
|
||||||
|
|
||||||
|
public AlleleFrequencyComparison(VariantEvalWalker parent) {
|
||||||
|
//super(parent);
|
||||||
|
}
|
||||||
|
|
||||||
|
//public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantEvalWalker.EvaluationContext group) {
|
||||||
|
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
if ( ! (isValidVC(eval) && isValidVC(comp)) ) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
// todo -- this is a godawful hack. The "right way" isn't working, so do it the unsafe way for now. Note that
|
||||||
|
// todo -- this precludes getting the AC/AF values from the info field because some may not be there...
|
||||||
|
/*if ( missingField(eval) ) {
|
||||||
|
recalculateCounts(eval);
|
||||||
|
}
|
||||||
|
if ( missingField(comp) ) {
|
||||||
|
recalculateCounts(comp);
|
||||||
|
}*/
|
||||||
|
HashMap<String,Object> evalCounts = new HashMap<String,Object>(2);
|
||||||
|
HashMap<String,Object> compCounts = new HashMap<String,Object>(2);
|
||||||
|
|
||||||
|
VariantContextUtils.calculateChromosomeCounts(eval,evalCounts,false);
|
||||||
|
VariantContextUtils.calculateChromosomeCounts(comp,compCounts,false);
|
||||||
|
afTable.update(((List<Double>)evalCounts.get("AF")).get(0),((List<Double>)compCounts.get("AF")).get(0));
|
||||||
|
acTable.update(((List<Integer>)evalCounts.get("AC")).get(0),((List<Integer>)compCounts.get("AC")).get(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
return null; // there is nothing interesting
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean missingField(final VariantContext vc) {
|
||||||
|
return ! ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) && vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) );
|
||||||
|
}
|
||||||
|
|
||||||
|
private void recalculateCounts(VariantContext vc) {
|
||||||
|
Map<String,Object> attributes = new HashMap<String,Object>();
|
||||||
|
VariantContextUtils.calculateChromosomeCounts(vc,attributes,false);
|
||||||
|
vc = VariantContext.modifyAttributes(vc,attributes);
|
||||||
|
//getLogger().debug(String.format("%s %s | %s %s",attributes.get("AC"),attributes.get("AF"),vc.getAttribute("AC"),vc.getAttribute("AF")));
|
||||||
|
if ( attributes.size() == 2 && missingField(vc) ) {
|
||||||
|
throw new org.broadinstitute.sting.utils.exceptions.StingException("VariantContext should have had attributes modified but did not");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isValidVC(final VariantContext vc) {
|
||||||
|
return (vc != null && !vc.isFiltered() && vc.getAlternateAlleles().size() == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static double getAF(VariantContext vc) {
|
||||||
|
Object af = vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY);
|
||||||
|
if ( af == null ) {
|
||||||
|
//throw new UserException("Variant context "+vc.getName()+" does not have allele frequency entry which is required for this walker");
|
||||||
|
// still none after being re-computed; this is 0.00
|
||||||
|
return 0.00;
|
||||||
|
} else if ( List.class.isAssignableFrom(af.getClass())) {
|
||||||
|
return ( (List<Double>) af ).get(0);
|
||||||
|
} else if ( String.class.isAssignableFrom(af.getClass())) {
|
||||||
|
// two possibilities
|
||||||
|
String s = (String) af;
|
||||||
|
try {
|
||||||
|
if ( s.startsWith("[") ) {
|
||||||
|
return Double.parseDouble(s.replace("\\[","").replace("\\]",""));
|
||||||
|
} else {
|
||||||
|
return Double.parseDouble(s);
|
||||||
|
}
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
throw new UserException("Allele frequency field may be improperly formatted, found AF="+s,e);
|
||||||
|
}
|
||||||
|
} else if ( Double.class.isAssignableFrom(vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY).getClass())) {
|
||||||
|
return (Double) af;
|
||||||
|
} else {
|
||||||
|
throw new UserException(String.format("Class of Allele Frequency does not appear to be formated, had AF=%s, of class %s",af.toString(),af.getClass()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int getAC(VariantContext vc) {
|
||||||
|
Object ac = vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY);
|
||||||
|
if ( ac == null ) {
|
||||||
|
// still none after being re computed; this is 0
|
||||||
|
return 0;
|
||||||
|
} else if ( List.class.isAssignableFrom(ac.getClass())) {
|
||||||
|
return ( (List<Integer>) ac ).get(0);
|
||||||
|
} else if ( String.class.isAssignableFrom(ac.getClass())) {
|
||||||
|
// two possibilities
|
||||||
|
String s = (String) ac;
|
||||||
|
try {
|
||||||
|
if ( s.startsWith("[") ) {
|
||||||
|
return Integer.parseInt(s.replace("\\[","").replace("\\]",""));
|
||||||
|
} else {
|
||||||
|
return Integer.parseInt(s);
|
||||||
|
}
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
throw new UserException(String.format("Allele count field may be improperly formatted, found AC=%s for record %s:%d",ac,vc.getChr(),vc.getStart()),e);
|
||||||
|
}
|
||||||
|
} else if ( Integer.class.isAssignableFrom(ac.getClass())) {
|
||||||
|
return (Integer) ac;
|
||||||
|
} else {
|
||||||
|
throw new UserException(String.format("Class of Allele Frequency does not appear to be formated, had AF=%s, of class %s",ac.toString(),ac.getClass()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class AFTable implements TableType {
|
||||||
|
|
||||||
|
protected int[][] afCounts = new int[101][101];
|
||||||
|
|
||||||
|
public Object[] getRowKeys() {
|
||||||
|
String[] afKeys = new String[101];
|
||||||
|
for ( int f = 0; f < 101; f ++ ) {
|
||||||
|
afKeys[f] = String.format("%.2f",(f+0.0)/100.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return afKeys;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object[] getColumnKeys() {
|
||||||
|
return getRowKeys(); // nice thing about symmetric tables
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object getCell(int i, int j) {
|
||||||
|
return afCounts[i][j];
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "Allele Frequency Concordance";
|
||||||
|
}
|
||||||
|
|
||||||
|
public void update(double eval, double comp) {
|
||||||
|
afCounts[af2index(eval)][af2index(comp)]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int af2index(double d) {
|
||||||
|
return (int) Math.round(100*d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class ACTable implements TableType {
|
||||||
|
protected int[][] acCounts;
|
||||||
|
protected int maxAC;
|
||||||
|
|
||||||
|
public ACTable(int acMaximum) {
|
||||||
|
maxAC = acMaximum;
|
||||||
|
acCounts = new int[acMaximum+1][acMaximum+1];
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object[] getRowKeys() {
|
||||||
|
String[] acKeys = new String[maxAC+1];
|
||||||
|
for ( int i = 0 ; i <= maxAC ; i ++ ) {
|
||||||
|
acKeys[i] = String.format("%d",i);
|
||||||
|
}
|
||||||
|
|
||||||
|
return acKeys;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object[] getColumnKeys() {
|
||||||
|
return getRowKeys();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object getCell(int i, int j) {
|
||||||
|
return acCounts[i][j];
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "Allele Counts Concordance";
|
||||||
|
}
|
||||||
|
|
||||||
|
public void update(int eval, int comp) {
|
||||||
|
eval = eval > maxAC ? maxAC : eval;
|
||||||
|
comp = comp > maxAC ? maxAC : comp;
|
||||||
|
|
||||||
|
acCounts[eval][comp]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,219 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||||
|
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||||
|
import org.broadinstitute.sting.utils.analysis.AminoAcid;
|
||||||
|
import org.broadinstitute.sting.utils.analysis.AminoAcidTable;
|
||||||
|
import org.broadinstitute.sting.utils.analysis.AminoAcidUtils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author chartl
|
||||||
|
* @since June 28, 2010
|
||||||
|
*/
|
||||||
|
|
||||||
|
@Analysis(name = "Amino Acid Transition", description = "Calculates the Transition Matrix for coding variants; entries are Total, Num. Ti, Num. Tv, Ratio")
|
||||||
|
public class AminoAcidTransition extends VariantEvaluator {
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
//// INTERNAL DATA POINT CLASSES
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
// a mapping from amino acid transition score histogram bin to Ti/Tv ratio
|
||||||
|
@DataPoint(description = "TiTv counts by amino acid change")
|
||||||
|
AminoAcidTiTvTable acidTable = null;
|
||||||
|
|
||||||
|
class TiTvCount {
|
||||||
|
public int ti;
|
||||||
|
public int tv;
|
||||||
|
|
||||||
|
public TiTvCount() {
|
||||||
|
ti = 0;
|
||||||
|
tv = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getTotal() {
|
||||||
|
return ti + tv;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getRatio() {
|
||||||
|
return ( (double) ti )/(1.0+tv);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("%d:%d:%d:%.2f",getTotal(),ti,tv,getRatio());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class AminoAcidTiTvTable implements TableType {
|
||||||
|
|
||||||
|
private TiTvCount[][] countsByAAChange;
|
||||||
|
|
||||||
|
public AminoAcidTiTvTable() {
|
||||||
|
countsByAAChange = new TiTvCount[AminoAcid.values().length][AminoAcid.values().length];
|
||||||
|
for ( int i = 0; i < AminoAcid.values().length; i ++ ) {
|
||||||
|
for ( int j = 0; j < AminoAcid.values().length; j++ ) {
|
||||||
|
countsByAAChange[i][j] = new TiTvCount();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object[] getRowKeys() {
|
||||||
|
return AminoAcidUtils.getAminoAcidCodes();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object[] getColumnKeys() {
|
||||||
|
return AminoAcidUtils.getAminoAcidCodes();
|
||||||
|
}
|
||||||
|
|
||||||
|
public TiTvCount getCell(int x, int y) {
|
||||||
|
return countsByAAChange[x][y];
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "AminoAcidTransitionTable";
|
||||||
|
}
|
||||||
|
|
||||||
|
public void update(AminoAcid reference, AminoAcid alternate, boolean isTransition) {
|
||||||
|
TiTvCount counter = countsByAAChange[reference.ordinal()][alternate.ordinal()];
|
||||||
|
if ( isTransition ) {
|
||||||
|
counter.ti++;
|
||||||
|
} else {
|
||||||
|
counter.tv++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
//// CORE VARIANT EVALUATOR DATA AND METHODS
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
private String infoKey;
|
||||||
|
private String infoValueSplit;
|
||||||
|
private boolean useCodons;
|
||||||
|
private boolean enabled;
|
||||||
|
private AminoAcidTable lookup;
|
||||||
|
|
||||||
|
public AminoAcidTransition(VariantEvalWalker parent) {
|
||||||
|
//super(parent);
|
||||||
|
//enabled = parent.aminoAcidTransitionKey != null;
|
||||||
|
enabled = true;
|
||||||
|
if ( enabled ) {
|
||||||
|
getParsingInformation(parent);
|
||||||
|
lookup = new AminoAcidTable();
|
||||||
|
acidTable = new AminoAcidTiTvTable();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void getParsingInformation(VariantEvalWalker parent) {
|
||||||
|
if ( enabled() ) {
|
||||||
|
// infoKey = parent.aminoAcidTransitionKey;
|
||||||
|
// infoValueSplit = parent.aminoAcidTransitionSplit;
|
||||||
|
// useCodons = parent.aatUseCodons;
|
||||||
|
|
||||||
|
infoKey = null;
|
||||||
|
infoValueSplit = null;
|
||||||
|
useCodons = false;
|
||||||
|
|
||||||
|
if ( infoKey == null ) {
|
||||||
|
throw new UserException.CommandLineException("No info-field key provided for amino acid tabulation. Please provide the appropriate key with -aatk.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( infoValueSplit == null ) {
|
||||||
|
throw new UserException.CommandLineException("No split string provided for amino acid tabulation. Please provide the split string with -aats");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "AminoAcidTransitionTable";
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getComparisonOrder() {
|
||||||
|
return 1; // we only need to see each eval track
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean enabled() {
|
||||||
|
return enabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return getName();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
String interesting = null;
|
||||||
|
//if ( eval != null && eval.hasAttribute(infoKey) ) {
|
||||||
|
if ( enabled && eval != null && eval.hasAttribute(infoKey) ) {
|
||||||
|
String[] parsedNames = ( (String) eval.getAttribute(infoKey)).split(infoValueSplit);
|
||||||
|
String first = "none";
|
||||||
|
String second = "none";
|
||||||
|
try {
|
||||||
|
first = parsedNames [0];
|
||||||
|
second = parsedNames [1];
|
||||||
|
} catch (ArrayIndexOutOfBoundsException e) {
|
||||||
|
//getLogger().warn("Error parsing variant context with value "+eval.getAttribute(infoKey));
|
||||||
|
}
|
||||||
|
AminoAcid reference;
|
||||||
|
AminoAcid alternate;
|
||||||
|
if ( useCodons ) {
|
||||||
|
reference = lookup.getEukaryoticAA(first);
|
||||||
|
alternate = lookup.getEukaryoticAA(second);
|
||||||
|
} else {
|
||||||
|
reference = lookup.getAminoAcidByCode(first);
|
||||||
|
alternate = lookup.getAminoAcidByCode(second);
|
||||||
|
}
|
||||||
|
|
||||||
|
//veWalker.getLogger().info(String.format("%s\t%s\t%s\t%s",first,second,reference,alternate));
|
||||||
|
|
||||||
|
if ( reference == null ) {
|
||||||
|
interesting = "Unknown Reference Codon";
|
||||||
|
} else if ( alternate == null ) {
|
||||||
|
interesting = "Unknown Alternate Codon";
|
||||||
|
} else {
|
||||||
|
acidTable.update(reference,alternate, VariantContextUtils.isTransition(eval));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return interesting; // This module doesn't capture any interesting sites, so return null
|
||||||
|
}
|
||||||
|
|
||||||
|
//public void finalizeEvaluation() {
|
||||||
|
//
|
||||||
|
//}
|
||||||
|
}
|
||||||
|
|
@ -7,7 +7,6 @@ import org.broadinstitute.sting.queue.extensions.samtools._
|
||||||
import org.broadinstitute.sting.queue.{QException, QScript}
|
import org.broadinstitute.sting.queue.{QException, QScript}
|
||||||
import collection.JavaConversions._
|
import collection.JavaConversions._
|
||||||
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
||||||
import org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType
|
|
||||||
|
|
||||||
class RefineGenotypesAndMerge extends QScript {
|
class RefineGenotypesAndMerge extends QScript {
|
||||||
qscript =>
|
qscript =>
|
||||||
|
|
|
||||||
|
|
@ -98,7 +98,7 @@ class expanded_targets extends QScript {
|
||||||
eval.rodBind :+= new RodBind("compHiSeq_atSites","vcf",callHiseq.out)
|
eval.rodBind :+= new RodBind("compHiSeq_atSites","vcf",callHiseq.out)
|
||||||
eval.rodBind :+= new RodBind("compOMNI","vcf",new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Omni2.5_chip/764samples.deduped.b37.annot.vcf"))
|
eval.rodBind :+= new RodBind("compOMNI","vcf",new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Omni2.5_chip/764samples.deduped.b37.annot.vcf"))
|
||||||
eval.out = swapExt(iList,".interval_list",".eval")
|
eval.out = swapExt(iList,".interval_list",".eval")
|
||||||
eval.reportType = org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV
|
//eval.reportType = org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV
|
||||||
eval.memoryLimit = 4
|
eval.memoryLimit = 4
|
||||||
|
|
||||||
add(eval)
|
add(eval)
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,6 @@ import org.broadinstitute.sting.queue.extensions.samtools._
|
||||||
import org.broadinstitute.sting.queue.{QException, QScript}
|
import org.broadinstitute.sting.queue.{QException, QScript}
|
||||||
import collection.JavaConversions._
|
import collection.JavaConversions._
|
||||||
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
||||||
import org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType
|
|
||||||
import scala.collection.mutable.HashMap
|
import scala.collection.mutable.HashMap
|
||||||
|
|
||||||
class omni_qc extends QScript {
|
class omni_qc extends QScript {
|
||||||
|
|
|
||||||
|
|
@ -61,7 +61,7 @@ class private_mutations extends QScript {
|
||||||
eval_all.noStandard = true
|
eval_all.noStandard = true
|
||||||
eval_all.E :+= "ACTransitionTable"
|
eval_all.E :+= "ACTransitionTable"
|
||||||
eval_all.out = swapExt(finalMergedVCF,".vcf",".perm.csv")
|
eval_all.out = swapExt(finalMergedVCF,".vcf",".perm.csv")
|
||||||
eval_all.reportType = org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV
|
//eval_all.reportType = org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV
|
||||||
|
|
||||||
add(eval_all)
|
add(eval_all)
|
||||||
|
|
||||||
|
|
@ -70,7 +70,7 @@ class private_mutations extends QScript {
|
||||||
eval_afr.rodBind :+= new RodBind("compEUR","VCF",extract_eur.outputVCF)
|
eval_afr.rodBind :+= new RodBind("compEUR","VCF",extract_eur.outputVCF)
|
||||||
eval_afr.E :+= "ACTransitionTable"
|
eval_afr.E :+= "ACTransitionTable"
|
||||||
eval_afr.out = swapExt(extract_afr.outputVCF,".vcf",".perm.csv")
|
eval_afr.out = swapExt(extract_afr.outputVCF,".vcf",".perm.csv")
|
||||||
eval_afr.reportType = org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV
|
//eval_afr.reportType = org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV
|
||||||
eval_afr.noStandard = true
|
eval_afr.noStandard = true
|
||||||
|
|
||||||
add(eval_afr)
|
add(eval_afr)
|
||||||
|
|
@ -80,7 +80,7 @@ class private_mutations extends QScript {
|
||||||
eval_eur.rodBind :+= new RodBind("evalEUR","VCF",extract_eur.outputVCF)
|
eval_eur.rodBind :+= new RodBind("evalEUR","VCF",extract_eur.outputVCF)
|
||||||
eval_eur.E :+= "ACTransitionTable"
|
eval_eur.E :+= "ACTransitionTable"
|
||||||
eval_eur.out = swapExt(extract_eur.outputVCF,".vcf",".perm.csv")
|
eval_eur.out = swapExt(extract_eur.outputVCF,".vcf",".perm.csv")
|
||||||
eval_eur.reportType = org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV
|
//eval_eur.reportType = org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV
|
||||||
eval_eur.noStandard = true
|
eval_eur.noStandard = true
|
||||||
|
|
||||||
add(eval_eur)
|
add(eval_eur)
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,6 @@ import org.broadinstitute.sting.queue.function.scattergather.{GatherFunction, Cl
|
||||||
import org.broadinstitute.sting.queue.{QException, QScript}
|
import org.broadinstitute.sting.queue.{QException, QScript}
|
||||||
import collection.JavaConversions._
|
import collection.JavaConversions._
|
||||||
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
||||||
import org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType
|
|
||||||
|
|
||||||
class Phase1Calling extends QScript {
|
class Phase1Calling extends QScript {
|
||||||
qscript =>
|
qscript =>
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,6 @@ import org.broadinstitute.sting.queue.extensions.samtools._
|
||||||
import org.broadinstitute.sting.queue.{QException, QScript}
|
import org.broadinstitute.sting.queue.{QException, QScript}
|
||||||
import collection.JavaConversions._
|
import collection.JavaConversions._
|
||||||
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
||||||
import org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType
|
|
||||||
|
|
||||||
class ASHGcalling extends QScript {
|
class ASHGcalling extends QScript {
|
||||||
qscript =>
|
qscript =>
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,6 @@ import org.broadinstitute.sting.queue.extensions.samtools._
|
||||||
import org.broadinstitute.sting.queue.{QException, QScript}
|
import org.broadinstitute.sting.queue.{QException, QScript}
|
||||||
import collection.JavaConversions._
|
import collection.JavaConversions._
|
||||||
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
||||||
import org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType
|
|
||||||
|
|
||||||
class Phase1Calling extends QScript {
|
class Phase1Calling extends QScript {
|
||||||
qscript =>
|
qscript =>
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,6 @@ import org.broadinstitute.sting.queue.extensions.samtools._
|
||||||
import org.broadinstitute.sting.queue.{QException, QScript}
|
import org.broadinstitute.sting.queue.{QException, QScript}
|
||||||
import collection.JavaConversions._
|
import collection.JavaConversions._
|
||||||
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
||||||
import org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType
|
|
||||||
|
|
||||||
class Phase1Cleaning extends QScript {
|
class Phase1Cleaning extends QScript {
|
||||||
qscript =>
|
qscript =>
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,6 @@ import org.broadinstitute.sting.queue.extensions.samtools._
|
||||||
import org.broadinstitute.sting.queue.{QException, QScript}
|
import org.broadinstitute.sting.queue.{QException, QScript}
|
||||||
import collection.JavaConversions._
|
import collection.JavaConversions._
|
||||||
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
||||||
import org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType
|
|
||||||
|
|
||||||
class Phase1ProjectConsensus extends QScript {
|
class Phase1ProjectConsensus extends QScript {
|
||||||
qscript =>
|
qscript =>
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue