Adding pairwise AF table. Not polished at all, but usable none-the-less.
This commit is contained in:
parent
774e540042
commit
c5e81be978
|
|
@ -31,10 +31,10 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -90,7 +90,59 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
|
|||
@DataPoint(description = "Multi-allelic Indel Novelty Rate")
|
||||
public String indelNoveltyRate = "NA";
|
||||
|
||||
// TODO -- Also, AF distributions (pairwise like TiTv)
|
||||
@DataPoint(description="Histogram of allele frequencies")
|
||||
AFHistogram AFhistogram = new AFHistogram();
|
||||
|
||||
/*
|
||||
* AF histogram table object
|
||||
*/
|
||||
static class AFHistogram implements TableType {
|
||||
private Object[] colKeys, rowKeys = {"pairwise_AF"};
|
||||
private int[] AFhistogram;
|
||||
|
||||
private static final double AFincrement = 0.01;
|
||||
private static final int numBins = (int)(1.00 / AFincrement);
|
||||
|
||||
public AFHistogram() {
|
||||
colKeys = initColKeys();
|
||||
AFhistogram = new int[colKeys.length];
|
||||
}
|
||||
|
||||
public Object[] getColumnKeys() {
|
||||
return colKeys;
|
||||
}
|
||||
|
||||
public Object[] getRowKeys() {
|
||||
return rowKeys;
|
||||
}
|
||||
|
||||
public Object getCell(int row, int col) {
|
||||
return AFhistogram[col];
|
||||
}
|
||||
|
||||
private static Object[] initColKeys() {
|
||||
ArrayList<String> keyList = new ArrayList<String>(numBins + 1);
|
||||
for ( double a = 0.00; a <= 1.01; a += AFincrement ) {
|
||||
keyList.add(String.format("%.2f", a));
|
||||
}
|
||||
return keyList.toArray();
|
||||
}
|
||||
|
||||
public String getName() { return "AFHistTable"; }
|
||||
|
||||
public void update(VariantContext vc) {
|
||||
final Object obj = vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY, null);
|
||||
if ( obj == null || !(obj instanceof List) )
|
||||
return;
|
||||
|
||||
List<String> list = (List<String>)obj;
|
||||
for ( String str : list ) {
|
||||
final double AF = Double.valueOf(str);
|
||||
final int bin = (int)(numBins * MathUtils.round(AF, 2));
|
||||
AFhistogram[bin]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void initialize(VariantEvalWalker walker) {}
|
||||
|
||||
|
|
@ -104,58 +156,58 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
|
|||
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
|
||||
}
|
||||
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( eval == null || eval.isMonomorphicInSamples() )
|
||||
return null;
|
||||
return null;
|
||||
|
||||
// update counts
|
||||
switch ( eval.getType() ) {
|
||||
case SNP:
|
||||
nSNPs++;
|
||||
if ( !eval.isBiallelic() ) {
|
||||
nMultiSNPs++;
|
||||
calculatePairwiseTiTv(eval);
|
||||
calculateSNPPairwiseNovelty(eval, comp);
|
||||
}
|
||||
break;
|
||||
case SNP:
|
||||
nSNPs++;
|
||||
if ( !eval.isBiallelic() ) {
|
||||
nMultiSNPs++;
|
||||
calculatePairwiseTiTv(eval);
|
||||
calculateSNPPairwiseNovelty(eval, comp);
|
||||
}
|
||||
break;
|
||||
case INDEL:
|
||||
nIndels++;
|
||||
if ( !eval.isBiallelic() ) {
|
||||
nMultiIndels++;
|
||||
calculateIndelPairwiseNovelty(eval, comp);
|
||||
}
|
||||
break;
|
||||
nIndels++;
|
||||
if ( !eval.isBiallelic() ) {
|
||||
nMultiIndels++;
|
||||
calculateIndelPairwiseNovelty(eval, comp);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new UserException.BadInput("Unexpected variant context type: " + eval);
|
||||
}
|
||||
|
||||
AFhistogram.update(eval);
|
||||
|
||||
return null; // we don't capture any interesting sites
|
||||
}
|
||||
|
||||
private void calculatePairwiseTiTv(VariantContext vc) {
|
||||
for ( Allele alt : vc.getAlternateAlleles() ) {
|
||||
if ( VariantContextUtils.isTransition(vc.getReference(), alt) )
|
||||
nTi++;
|
||||
else
|
||||
nTv++;
|
||||
}
|
||||
for ( Allele alt : vc.getAlternateAlleles() ) {
|
||||
if ( VariantContextUtils.isTransition(vc.getReference(), alt) )
|
||||
nTi++;
|
||||
else
|
||||
nTv++;
|
||||
}
|
||||
}
|
||||
|
||||
private void calculateSNPPairwiseNovelty(VariantContext eval, VariantContext comp) {
|
||||
if ( comp == null )
|
||||
return;
|
||||
if ( comp == null )
|
||||
return;
|
||||
|
||||
int knownAlleles = 0;
|
||||
for ( Allele alt : eval.getAlternateAlleles() ) {
|
||||
if ( comp.getAlternateAlleles().contains(alt) )
|
||||
knownAlleles++;
|
||||
}
|
||||
int knownAlleles = 0;
|
||||
for ( Allele alt : eval.getAlternateAlleles() ) {
|
||||
if ( comp.getAlternateAlleles().contains(alt) )
|
||||
knownAlleles++;
|
||||
}
|
||||
|
||||
if ( knownAlleles == eval.getAlternateAlleles().size() )
|
||||
knownSNPsComplete++;
|
||||
else if ( knownAlleles > 0 )
|
||||
knownSNPsPartial++;
|
||||
if ( knownAlleles == eval.getAlternateAlleles().size() )
|
||||
knownSNPsComplete++;
|
||||
else if ( knownAlleles > 0 )
|
||||
knownSNPsPartial++;
|
||||
}
|
||||
|
||||
private void calculateIndelPairwiseNovelty(VariantContext eval, VariantContext comp) {
|
||||
|
|
@ -168,14 +220,14 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
|
|||
}
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci;
|
||||
variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs;
|
||||
processedMultiIndelRatio = (double)nMultiIndels / (double)nProcessedLoci;
|
||||
variantMultiIndelRatio = (double)nMultiIndels / (double)nIndels;
|
||||
processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci;
|
||||
variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs;
|
||||
processedMultiIndelRatio = (double)nMultiIndels / (double)nProcessedLoci;
|
||||
variantMultiIndelRatio = (double)nMultiIndels / (double)nIndels;
|
||||
|
||||
TiTvRatio = (double)nTi / (double)nTv;
|
||||
|
||||
SNPNoveltyRate = noveltyRate(nMultiSNPs, knownSNPsPartial + knownSNPsComplete);
|
||||
indelNoveltyRate = noveltyRate(nMultiSNPs, knownIndelsPartial + knownIndelsComplete);
|
||||
SNPNoveltyRate = noveltyRate(nMultiSNPs, knownSNPsPartial + knownSNPsComplete);
|
||||
indelNoveltyRate = noveltyRate(nMultiSNPs, knownIndelsPartial + knownIndelsComplete);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue