Splitting up the MultiallelicSummary module into the standard part for use by all and the dev piece used just by me

This commit is contained in:
Eric Banks 2012-03-13 16:31:51 -04:00
parent f76da1efd2
commit 77243d0df1
2 changed files with 9 additions and 187 deletions

View File

@ -40,57 +40,13 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*; import java.util.*;
@Analysis(description = "Evaluation summary for multi-allelic variants") @Analysis(description = "Evaluation summary for multi-allelic variants")
public class MultiallelicSummary extends VariantEvaluator { // implements StandardEval { public class MultiallelicAFs extends VariantEvaluator {
final protected static Logger logger = Logger.getLogger(MultiallelicSummary.class); final protected static Logger logger = Logger.getLogger(MultiallelicAFs.class);
public enum Type { public enum Type {
SNP, INDEL SNP, INDEL
} }
// basic counts on various rates found
@DataPoint(description = "Number of processed loci", format = "%d")
public long nProcessedLoci = 0;
@DataPoint(description = "Number of SNPs", format = "%d")
public int nSNPs = 0;
@DataPoint(description = "Number of multi-allelic SNPs", format = "%d")
public int nMultiSNPs = 0;
@DataPoint(description = "% processed sites that are multi-allelic SNPs", format = "%.5f")
public double processedMultiSnpRatio = 0;
@DataPoint(description = "% SNP sites that are multi-allelic", format = "%.3f")
public double variantMultiSnpRatio = 0;
@DataPoint(description = "Number of Indels", format = "%d")
public int nIndels = 0;
@DataPoint(description = "Number of multi-allelic Indels", format = "%d")
public int nMultiIndels = 0;
@DataPoint(description = "% processed sites that are multi-allelic Indels", format = "%.5f")
public double processedMultiIndelRatio = 0;
@DataPoint(description = "% Indel sites that are multi-allelic", format = "%.3f")
public double variantMultiIndelRatio = 0;
@DataPoint(description = "Number of Transitions", format = "%d")
public int nTi = 0;
@DataPoint(description = "Number of Transversions", format = "%d")
public int nTv = 0;
@DataPoint(description = "Overall TiTv ratio", format = "%.2f")
public double TiTvRatio = 0;
@DataPoint(description = "Multi-allelic SNPs partially known", format = "%d")
public int knownSNPsPartial = 0;
@DataPoint(description = "Multi-allelic SNPs completely known", format = "%d")
public int knownSNPsComplete = 0;
@DataPoint(description = "Multi-allelic SNP Novelty Rate")
public String SNPNoveltyRate = "NA";
//TODO -- implement me
//@DataPoint(description = "Multi-allelic Indels partially known", format = "%d")
public int knownIndelsPartial = 0;
//@DataPoint(description = "Multi-allelic Indels completely known", format = "%d")
public int knownIndelsComplete = 0;
//@DataPoint(description = "Multi-allelic Indel Novelty Rate")
public String indelNoveltyRate = "NA";
@DataPoint(description="Histogram of allele frequencies for most common SNP alternate allele") @DataPoint(description="Histogram of allele frequencies for most common SNP alternate allele")
AFHistogram AFhistogramMaxSnp = new AFHistogram(); AFHistogram AFhistogramMaxSnp = new AFHistogram();
@ -154,32 +110,22 @@ public class MultiallelicSummary extends VariantEvaluator { // implements Standa
return 2; return 2;
} }
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {}
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
}
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( eval == null || eval.isMonomorphicInSamples() ) if ( eval == null || eval.isMonomorphicInSamples() )
return null; return null;
if ( !eval.isBiallelic() )
return null;
// update counts // update counts
switch ( eval.getType() ) { switch ( eval.getType() ) {
case SNP: case SNP:
nSNPs++;
if ( !eval.isBiallelic() ) {
nMultiSNPs++;
calculatePairwiseTiTv(eval);
calculateSNPPairwiseNovelty(eval, comp);
updateAFhistogram(eval, AFhistogramMaxSnp, AFhistogramMinSnp); updateAFhistogram(eval, AFhistogramMaxSnp, AFhistogramMinSnp);
}
break; break;
case INDEL: case INDEL:
nIndels++;
if ( !eval.isBiallelic() ) {
nMultiIndels++;
calculateIndelPairwiseNovelty(eval, comp);
updateAFhistogram(eval, AFhistogramMaxIndel, AFhistogramMinIndel); updateAFhistogram(eval, AFhistogramMaxIndel, AFhistogramMinIndel);
}
break; break;
default: default:
throw new UserException.BadInput("Unexpected variant context type: " + eval); throw new UserException.BadInput("Unexpected variant context type: " + eval);
@ -188,34 +134,6 @@ public class MultiallelicSummary extends VariantEvaluator { // implements Standa
return null; // we don't capture any interesting sites return null; // we don't capture any interesting sites
} }
private void calculatePairwiseTiTv(VariantContext vc) {
for ( Allele alt : vc.getAlternateAlleles() ) {
if ( VariantContextUtils.isTransition(vc.getReference(), alt) )
nTi++;
else
nTv++;
}
}
private void calculateSNPPairwiseNovelty(VariantContext eval, VariantContext comp) {
if ( comp == null )
return;
int knownAlleles = 0;
for ( Allele alt : eval.getAlternateAlleles() ) {
if ( comp.getAlternateAlleles().contains(alt) )
knownAlleles++;
}
if ( knownAlleles == eval.getAlternateAlleles().size() )
knownSNPsComplete++;
else if ( knownAlleles > 0 )
knownSNPsPartial++;
}
private void calculateIndelPairwiseNovelty(VariantContext eval, VariantContext comp) {
}
private void updateAFhistogram(VariantContext vc, AFHistogram max, AFHistogram min) { private void updateAFhistogram(VariantContext vc, AFHistogram max, AFHistogram min) {
final Object obj = vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY, null); final Object obj = vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY, null);
@ -233,22 +151,4 @@ public class MultiallelicSummary extends VariantEvaluator { // implements Standa
for ( int i = 0; i < AFs.size() - 1; i++ ) for ( int i = 0; i < AFs.size() - 1; i++ )
min.update(AFs.get(i)); min.update(AFs.get(i));
} }
private final String noveltyRate(final int all, final int known) {
final int novel = all - known;
final double rate = (novel / (1.0 * all));
return all == 0 ? "NA" : String.format("%.2f", rate);
}
public void finalizeEvaluation() {
processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci;
variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs;
processedMultiIndelRatio = (double)nMultiIndels / (double)nProcessedLoci;
variantMultiIndelRatio = (double)nMultiIndels / (double)nIndels;
TiTvRatio = (double)nTi / (double)nTv;
SNPNoveltyRate = noveltyRate(nMultiSNPs, knownSNPsPartial + knownSNPsComplete);
indelNoveltyRate = noveltyRate(nMultiSNPs, knownIndelsPartial + knownIndelsComplete);
}
} }

View File

@ -31,14 +31,9 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
@Analysis(description = "Evaluation summary for multi-allelic variants") @Analysis(description = "Evaluation summary for multi-allelic variants")
public class MultiallelicSummary extends VariantEvaluator implements StandardEval { public class MultiallelicSummary extends VariantEvaluator implements StandardEval {
final protected static Logger logger = Logger.getLogger(MultiallelicSummary.class); final protected static Logger logger = Logger.getLogger(MultiallelicSummary.class);
@ -91,60 +86,6 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
//@DataPoint(description = "Multi-allelic Indel Novelty Rate") //@DataPoint(description = "Multi-allelic Indel Novelty Rate")
public String indelNoveltyRate = "NA"; public String indelNoveltyRate = "NA";
@DataPoint(description="Histogram of allele frequencies for most common SNP alternate allele")
AFHistogram AFhistogramMaxSnp = new AFHistogram();
@DataPoint(description="Histogram of allele frequencies for less common SNP alternate alleles")
AFHistogram AFhistogramMinSnp = new AFHistogram();
@DataPoint(description="Histogram of allele frequencies for most common Indel alternate allele")
AFHistogram AFhistogramMaxIndel = new AFHistogram();
@DataPoint(description="Histogram of allele frequencies for less common Indel alternate alleles")
AFHistogram AFhistogramMinIndel = new AFHistogram();
/*
* AF histogram table object
*/
static class AFHistogram implements TableType {
private Object[] rowKeys, colKeys = {"count"};
private int[] AFhistogram;
private static final double AFincrement = 0.01;
private static final int numBins = (int)(1.00 / AFincrement);
public AFHistogram() {
rowKeys = initRowKeys();
AFhistogram = new int[rowKeys.length];
}
public Object[] getColumnKeys() {
return colKeys;
}
public Object[] getRowKeys() {
return rowKeys;
}
public Object getCell(int row, int col) {
return AFhistogram[row];
}
private static Object[] initRowKeys() {
ArrayList<String> keyList = new ArrayList<String>(numBins + 1);
for ( double a = 0.00; a <= 1.01; a += AFincrement ) {
keyList.add(String.format("%.2f", a));
}
return keyList.toArray();
}
public String getName() { return "AFHistTable"; }
public void update(final double AF) {
final int bin = (int)(numBins * MathUtils.round(AF, 2));
AFhistogram[bin]++;
}
}
public void initialize(VariantEvalWalker walker) {} public void initialize(VariantEvalWalker walker) {}
@ -170,7 +111,6 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
nMultiSNPs++; nMultiSNPs++;
calculatePairwiseTiTv(eval); calculatePairwiseTiTv(eval);
calculateSNPPairwiseNovelty(eval, comp); calculateSNPPairwiseNovelty(eval, comp);
updateAFhistogram(eval, AFhistogramMaxSnp, AFhistogramMinSnp);
} }
break; break;
case INDEL: case INDEL:
@ -178,7 +118,6 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
if ( !eval.isBiallelic() ) { if ( !eval.isBiallelic() ) {
nMultiIndels++; nMultiIndels++;
calculateIndelPairwiseNovelty(eval, comp); calculateIndelPairwiseNovelty(eval, comp);
updateAFhistogram(eval, AFhistogramMaxIndel, AFhistogramMinIndel);
} }
break; break;
default: default:
@ -214,24 +153,7 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
} }
private void calculateIndelPairwiseNovelty(VariantContext eval, VariantContext comp) { private void calculateIndelPairwiseNovelty(VariantContext eval, VariantContext comp) {
} // TODO -- implement me
private void updateAFhistogram(VariantContext vc, AFHistogram max, AFHistogram min) {
final Object obj = vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY, null);
if ( obj == null || !(obj instanceof List) )
return;
List<String> list = (List<String>)obj;
ArrayList<Double> AFs = new ArrayList<Double>(list.size());
for ( String str : list ) {
AFs.add(Double.valueOf(str));
}
Collections.sort(AFs);
max.update(AFs.get(AFs.size()-1));
for ( int i = 0; i < AFs.size() - 1; i++ )
min.update(AFs.get(i));
} }
private final String noveltyRate(final int all, final int known) { private final String noveltyRate(final int all, final int known) {