Adding pairwise AF table. Not polished at all, but usable none-the-less.

This commit is contained in:
Eric Banks 2012-01-26 00:37:06 -05:00
parent 774e540042
commit c5e81be978
1 changed files with 95 additions and 43 deletions

View File

@ -31,10 +31,10 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*; import java.util.*;
@ -90,7 +90,59 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
@DataPoint(description = "Multi-allelic Indel Novelty Rate") @DataPoint(description = "Multi-allelic Indel Novelty Rate")
public String indelNoveltyRate = "NA"; public String indelNoveltyRate = "NA";
// TODO -- Also, AF distributions (pairwise like TiTv) @DataPoint(description="Histogram of allele frequencies")
AFHistogram AFhistogram = new AFHistogram();
/*
* AF histogram table object
*/
static class AFHistogram implements TableType {
private Object[] colKeys, rowKeys = {"pairwise_AF"};
private int[] AFhistogram;
private static final double AFincrement = 0.01;
private static final int numBins = (int)(1.00 / AFincrement);
public AFHistogram() {
colKeys = initColKeys();
AFhistogram = new int[colKeys.length];
}
public Object[] getColumnKeys() {
return colKeys;
}
public Object[] getRowKeys() {
return rowKeys;
}
public Object getCell(int row, int col) {
return AFhistogram[col];
}
private static Object[] initColKeys() {
ArrayList<String> keyList = new ArrayList<String>(numBins + 1);
for ( double a = 0.00; a <= 1.01; a += AFincrement ) {
keyList.add(String.format("%.2f", a));
}
return keyList.toArray();
}
public String getName() { return "AFHistTable"; }
public void update(VariantContext vc) {
final Object obj = vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY, null);
if ( obj == null || !(obj instanceof List) )
return;
List<String> list = (List<String>)obj;
for ( String str : list ) {
final double AF = Double.valueOf(str);
final int bin = (int)(numBins * MathUtils.round(AF, 2));
AFhistogram[bin]++;
}
}
}
public void initialize(VariantEvalWalker walker) {} public void initialize(VariantEvalWalker walker) {}
@ -104,58 +156,58 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1); nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
} }
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( eval == null || eval.isMonomorphicInSamples() ) if ( eval == null || eval.isMonomorphicInSamples() )
return null; return null;
// update counts // update counts
switch ( eval.getType() ) { switch ( eval.getType() ) {
case SNP: case SNP:
nSNPs++; nSNPs++;
if ( !eval.isBiallelic() ) { if ( !eval.isBiallelic() ) {
nMultiSNPs++; nMultiSNPs++;
calculatePairwiseTiTv(eval); calculatePairwiseTiTv(eval);
calculateSNPPairwiseNovelty(eval, comp); calculateSNPPairwiseNovelty(eval, comp);
} }
break; break;
case INDEL: case INDEL:
nIndels++; nIndels++;
if ( !eval.isBiallelic() ) { if ( !eval.isBiallelic() ) {
nMultiIndels++; nMultiIndels++;
calculateIndelPairwiseNovelty(eval, comp); calculateIndelPairwiseNovelty(eval, comp);
} }
break; break;
default: default:
throw new UserException.BadInput("Unexpected variant context type: " + eval); throw new UserException.BadInput("Unexpected variant context type: " + eval);
} }
AFhistogram.update(eval);
return null; // we don't capture any interesting sites return null; // we don't capture any interesting sites
} }
private void calculatePairwiseTiTv(VariantContext vc) { private void calculatePairwiseTiTv(VariantContext vc) {
for ( Allele alt : vc.getAlternateAlleles() ) { for ( Allele alt : vc.getAlternateAlleles() ) {
if ( VariantContextUtils.isTransition(vc.getReference(), alt) ) if ( VariantContextUtils.isTransition(vc.getReference(), alt) )
nTi++; nTi++;
else else
nTv++; nTv++;
} }
} }
private void calculateSNPPairwiseNovelty(VariantContext eval, VariantContext comp) { private void calculateSNPPairwiseNovelty(VariantContext eval, VariantContext comp) {
if ( comp == null ) if ( comp == null )
return; return;
int knownAlleles = 0; int knownAlleles = 0;
for ( Allele alt : eval.getAlternateAlleles() ) { for ( Allele alt : eval.getAlternateAlleles() ) {
if ( comp.getAlternateAlleles().contains(alt) ) if ( comp.getAlternateAlleles().contains(alt) )
knownAlleles++; knownAlleles++;
} }
if ( knownAlleles == eval.getAlternateAlleles().size() ) if ( knownAlleles == eval.getAlternateAlleles().size() )
knownSNPsComplete++; knownSNPsComplete++;
else if ( knownAlleles > 0 ) else if ( knownAlleles > 0 )
knownSNPsPartial++; knownSNPsPartial++;
} }
private void calculateIndelPairwiseNovelty(VariantContext eval, VariantContext comp) { private void calculateIndelPairwiseNovelty(VariantContext eval, VariantContext comp) {
@ -168,14 +220,14 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
} }
public void finalizeEvaluation() { public void finalizeEvaluation() {
processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci; processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci;
variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs; variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs;
processedMultiIndelRatio = (double)nMultiIndels / (double)nProcessedLoci; processedMultiIndelRatio = (double)nMultiIndels / (double)nProcessedLoci;
variantMultiIndelRatio = (double)nMultiIndels / (double)nIndels; variantMultiIndelRatio = (double)nMultiIndels / (double)nIndels;
TiTvRatio = (double)nTi / (double)nTv; TiTvRatio = (double)nTi / (double)nTv;
SNPNoveltyRate = noveltyRate(nMultiSNPs, knownSNPsPartial + knownSNPsComplete); SNPNoveltyRate = noveltyRate(nMultiSNPs, knownSNPsPartial + knownSNPsComplete);
indelNoveltyRate = noveltyRate(nMultiSNPs, knownIndelsPartial + knownIndelsComplete); indelNoveltyRate = noveltyRate(nMultiSNPs, knownIndelsPartial + knownIndelsComplete);
} }
} }