IndelSummary now emits all of the underlying counts for ratios, percentages, etc it computes
This commit is contained in:
parent
542a8e3306
commit
23ccf772d4
|
|
@ -32,7 +32,6 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
|||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
@ -41,51 +40,81 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
public class IndelSummary extends VariantEvaluator implements StandardEval {
|
||||
final protected static Logger logger = Logger.getLogger(IndelSummary.class);
|
||||
|
||||
//
|
||||
// counts of snps and indels
|
||||
//
|
||||
@DataPoint(description = "Number of SNPs", format = "%d")
|
||||
public int n_SNPs = 0;
|
||||
|
||||
@DataPoint(description = "Number of singleton SNPs", format = "%d")
|
||||
public int n_singleton_SNPs = 0;
|
||||
|
||||
@DataPoint(description = "Number of Indels", format = "%d")
|
||||
@DataPoint(description = "Number of indels", format = "%d")
|
||||
public int n_indels = 0;
|
||||
|
||||
// Number of Indels Sites (counts one for any number of alleles at site)
|
||||
public int nIndelSites = 0;
|
||||
|
||||
@DataPoint(description = "Number of singleton Indels", format = "%d")
|
||||
@DataPoint(description = "Number of singleton indels", format = "%d")
|
||||
public int n_singleton_indels = 0;
|
||||
|
||||
//
|
||||
// gold standard
|
||||
//
|
||||
@DataPoint(description = "Number of Indels overlapping gold standard sites", format = "%d")
|
||||
public int n_indels_matching_gold_standard = 0;
|
||||
|
||||
@DataPoint(description = "Percent of indels overlapping gold standard sites")
|
||||
public String gold_standard_matching_rate;
|
||||
|
||||
//
|
||||
// multi-allelics
|
||||
//
|
||||
// Number of Indels Sites (counts one for any number of alleles at site)
|
||||
public int nIndelSites = 0;
|
||||
|
||||
@DataPoint(description = "Number of sites with where the number of alleles is greater than 2")
|
||||
public int n_multiallelic_indel_sites = 0;
|
||||
|
||||
@DataPoint(description = "Percent of indel sites that are multi-allelic")
|
||||
public String percent_of_sites_with_more_than_2_alleles;
|
||||
|
||||
//
|
||||
// snp : indel ratios
|
||||
//
|
||||
@DataPoint(description = "SNP to indel ratio")
|
||||
public String SNP_to_indel_ratio;
|
||||
|
||||
@DataPoint(description = "Singleton SNP to indel ratio")
|
||||
public String SNP_to_indel_ratio_for_singletons;
|
||||
|
||||
//
|
||||
// novelty
|
||||
//
|
||||
@DataPoint(description = "Number of novel indels", format = "%d")
|
||||
public int n_novel_indels = 0;
|
||||
|
||||
@DataPoint(description = "Indel novelty rate")
|
||||
public String indel_novelty_rate;
|
||||
|
||||
@DataPoint(description = "Frameshift percent")
|
||||
public String frameshift_rate_for_coding_indels;
|
||||
|
||||
//
|
||||
// insertions to deletions
|
||||
//
|
||||
@DataPoint(description = "Number of insertion indels")
|
||||
public int n_insertions = 0;
|
||||
|
||||
@DataPoint(description = "Number of deletion indels")
|
||||
public int n_deletions = 0;
|
||||
|
||||
@DataPoint(description = "Insertion to deletion ratio")
|
||||
public String insertion_to_deletion_ratio;
|
||||
|
||||
@DataPoint(description = "Number of large (>10 bp) deletions")
|
||||
public int n_large_deletions = 0;
|
||||
|
||||
@DataPoint(description = "Number of large (>10 bp) insertions")
|
||||
public int n_large_insertions = 0;
|
||||
|
||||
@DataPoint(description = "Ratio of large (>10 bp) insertions to deletions")
|
||||
public String insertion_to_deletion_ratio_for_large_indels;
|
||||
|
||||
//
|
||||
// Frameshifts
|
||||
//
|
||||
|
|
@ -95,6 +124,9 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
|||
@DataPoint(description = "Number of indels in protein-coding regions not labeled as frameshift")
|
||||
public int n_coding_indels_in_frame = 0;
|
||||
|
||||
@DataPoint(description = "Frameshift percent")
|
||||
public String frameshift_rate_for_coding_indels;
|
||||
|
||||
//
|
||||
// Het : hom ratios
|
||||
//
|
||||
|
|
@ -106,8 +138,6 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
|||
|
||||
int nSNPHets = 0, nSNPHoms = 0, nIndelHets = 0, nIndelHoms = 0;
|
||||
|
||||
int nKnownIndels = 0, nInsertions = 0;
|
||||
|
||||
int[] insertionCountByLength = new int[]{0, 0, 0, 0}; // note that the first element isn't used
|
||||
int[] deletionCountByLength = new int[]{0, 0, 0, 0}; // note that the first element isn't used
|
||||
|
||||
|
|
@ -129,15 +159,6 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
|||
|
||||
public final static int LARGE_INDEL_SIZE_THRESHOLD = 10;
|
||||
|
||||
@DataPoint(description = "Number of large (>10 bp) deletions")
|
||||
public int n_large_deletions = 0;
|
||||
|
||||
@DataPoint(description = "Number of large (>10 bp) insertions")
|
||||
public int n_large_insertions = 0;
|
||||
|
||||
@DataPoint(description = "Ratio of large (>10 bp) insertions to deletions")
|
||||
public String insertion_to_deletion_ratio_for_large_indels;
|
||||
|
||||
@Override public int getComparisonOrder() { return 2; }
|
||||
|
||||
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
|
@ -171,13 +192,14 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
|||
for ( Allele alt : eval.getAlternateAlleles() ) {
|
||||
n_indels++; // +1 for each alt allele
|
||||
if ( variantWasSingleton(eval) ) n_singleton_indels++;
|
||||
if ( comp != null ) nKnownIndels++; // TODO -- make this test allele specific?
|
||||
if ( comp == null ) n_novel_indels++; // TODO -- make this test allele specific?
|
||||
if ( gold != null ) n_indels_matching_gold_standard++;
|
||||
|
||||
// ins : del ratios
|
||||
final int alleleSize = alt.length() - eval.getReference().length();
|
||||
if ( alleleSize == 0 ) throw new ReviewedStingException("Allele size not expected to be zero for indel: alt = " + alt + " ref = " + eval.getReference());
|
||||
if ( alleleSize > 0 ) nInsertions++;
|
||||
if ( alleleSize > 0 ) n_insertions++;
|
||||
if ( alleleSize < 0 ) n_deletions++;
|
||||
|
||||
// requires snpEFF annotations
|
||||
if ( eval.getAttributeAsString("SNPEFF_GENE_BIOTYPE", "missing").equals("protein_coding") ) {
|
||||
|
|
@ -220,7 +242,7 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
|||
SNP_to_indel_ratio_for_singletons = Utils.formattedRatio(n_singleton_SNPs, n_singleton_indels);
|
||||
|
||||
gold_standard_matching_rate = Utils.formattedPercent(n_indels_matching_gold_standard, n_indels);
|
||||
indel_novelty_rate = Utils.formattedNoveltyRate(nKnownIndels, n_indels);
|
||||
indel_novelty_rate = Utils.formattedNoveltyRate(n_indels - n_novel_indels, n_indels);
|
||||
frameshift_rate_for_coding_indels = Utils.formattedPercent(n_coding_indels_frameshifting, n_coding_indels_in_frame + n_coding_indels_frameshifting);
|
||||
|
||||
ratio_of_1_and_2_to_3_bp_deletions = Utils.formattedRatio(deletionCountByLength[1] + deletionCountByLength[2], deletionCountByLength[3]);
|
||||
|
|
@ -229,7 +251,7 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
|||
SNP_het_to_hom_ratio = Utils.formattedRatio(nSNPHets, nSNPHoms);
|
||||
indel_het_to_hom_ratio = Utils.formattedRatio(nIndelHets, nIndelHoms);
|
||||
|
||||
insertion_to_deletion_ratio = Utils.formattedRatio(nInsertions, n_indels - nInsertions);
|
||||
insertion_to_deletion_ratio = Utils.formattedRatio(n_insertions, n_deletions);
|
||||
insertion_to_deletion_ratio_for_large_indels = Utils.formattedRatio(n_large_insertions, n_large_deletions);
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue