Improvements to indel analysis capabilities of VariantEval
-- Now calculates the number of Indels overlapping gold standard sites, as well as the percent of indels overlapping gold standard sites
-- Removed insertion : deletion ratio for 1 bp event, replaced it with 1 + 2 : 3 bp ratio for insertions and deletions separately. This is based on an old email from Mark Daly:
// - Since 1 & 2 bp insertions and 1 & 2 bp deletions are equally likely to cause a
// downstream frameshift, if we make the simplifying assumptions that 3 bp ins
// and 3bp del (adding/subtracting 1 AA in general) are roughly comparably
// selected against, we should see a consistent 1+2 : 3 bp ratio for insertions
// as for deletions, and certainly would expect consistency between in/dels that
// multiple methods find and in/dels that are unique to one method (since deletions
// are more common and the artifacts differ, it is probably worth looking at the totals,
// overlaps and ratios for insertions and deletions separately in the methods
// comparison and in this case don't even need to make the simplifying in = del functional assumption
-- Added a new VEW argument to bind a gold standard track
-- Added two new stratifications: OneBPIndel and TandemRepeat which do exactly what you imagine they do
-- Deleted random unused functions in IndelUtils
This commit is contained in:
parent
52ef4a3e26
commit
45fc0ea98d
|
|
@ -116,6 +116,15 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
@ArgumentCollection
|
@ArgumentCollection
|
||||||
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
|
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Some analyses want to count overlap not with dbSNP (which is in general very open) but
|
||||||
|
* actually want to itemize their overlap specifically with a set of gold standard sites
|
||||||
|
* such as HapMap, OMNI, or the gold standard indels. Theis argument provides a mechanism
|
||||||
|
* for communicating which file to use
|
||||||
|
*/
|
||||||
|
@Input(fullName="goldStandard", shortName = "gold", doc="Evaluations that count calls at sites of true variation (e.g., indel calls) will use this argument as their gold standard for comparison", required=false)
|
||||||
|
public RodBinding<VariantContext> goldStandard = null;
|
||||||
|
|
||||||
// Help arguments
|
// Help arguments
|
||||||
@Argument(fullName="list", shortName="ls", doc="List the available eval modules and exit", required=false)
|
@Argument(fullName="list", shortName="ls", doc="List the available eval modules and exit", required=false)
|
||||||
protected Boolean LIST = false;
|
protected Boolean LIST = false;
|
||||||
|
|
|
||||||
|
|
@ -56,6 +56,12 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
||||||
@DataPoint(description = "Number of singleton Indels", format = "%d")
|
@DataPoint(description = "Number of singleton Indels", format = "%d")
|
||||||
public int n_singleton_indels = 0;
|
public int n_singleton_indels = 0;
|
||||||
|
|
||||||
|
@DataPoint(description = "Number of Indels overlapping gold standard sites", format = "%d")
|
||||||
|
public int n_indels_matching_gold_standard = 0;
|
||||||
|
|
||||||
|
@DataPoint(description = "Percent of indels overlapping gold standard sites")
|
||||||
|
public String gold_standard_matching_rate;
|
||||||
|
|
||||||
// counts 1 for each site where the number of alleles > 2
|
// counts 1 for each site where the number of alleles > 2
|
||||||
public int nMultiIndelSites = 0;
|
public int nMultiIndelSites = 0;
|
||||||
|
|
||||||
|
|
@ -71,18 +77,6 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
||||||
@DataPoint(description = "Indel novelty rate")
|
@DataPoint(description = "Indel novelty rate")
|
||||||
public String indel_novelty_rate;
|
public String indel_novelty_rate;
|
||||||
|
|
||||||
@DataPoint(description = "1 to 2 bp indel ratio")
|
|
||||||
public String ratio_of_1_to_2_bp_indels;
|
|
||||||
|
|
||||||
@DataPoint(description = "1 to 3 bp indel ratio")
|
|
||||||
public String ratio_of_1_to_3_bp_indels;
|
|
||||||
|
|
||||||
@DataPoint(description = "2 to 3 bp indel ratio")
|
|
||||||
public String ratio_of_2_to_3_bp_indels;
|
|
||||||
|
|
||||||
@DataPoint(description = "1 and 2 to 3 bp indel ratio")
|
|
||||||
public String ratio_of_1_and_2_to_3_bp_indels;
|
|
||||||
|
|
||||||
@DataPoint(description = "Frameshift percent")
|
@DataPoint(description = "Frameshift percent")
|
||||||
public String frameshift_rate_for_coding_indels;
|
public String frameshift_rate_for_coding_indels;
|
||||||
|
|
||||||
|
|
@ -92,9 +86,6 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
||||||
@DataPoint(description = "Insertion to deletion ratio")
|
@DataPoint(description = "Insertion to deletion ratio")
|
||||||
public String insertion_to_deletion_ratio;
|
public String insertion_to_deletion_ratio;
|
||||||
|
|
||||||
@DataPoint(description = "Insertion to deletion ratio for 1 bp events")
|
|
||||||
public String insertion_to_deletion_ratio_for_1bp_indels;
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Frameshifts
|
// Frameshifts
|
||||||
//
|
//
|
||||||
|
|
@ -116,9 +107,25 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
||||||
int nSNPHets = 0, nSNPHoms = 0, nIndelHets = 0, nIndelHoms = 0;
|
int nSNPHets = 0, nSNPHoms = 0, nIndelHets = 0, nIndelHoms = 0;
|
||||||
|
|
||||||
int nKnownIndels = 0, nInsertions = 0;
|
int nKnownIndels = 0, nInsertions = 0;
|
||||||
int n1bpInsertions = 0, n1bpDeletions = 0;
|
|
||||||
int[] countByLength = new int[]{0, 0, 0, 0}; // note that the first element isn't used
|
|
||||||
|
|
||||||
|
int[] insertionCountByLength = new int[]{0, 0, 0, 0}; // note that the first element isn't used
|
||||||
|
int[] deletionCountByLength = new int[]{0, 0, 0, 0}; // note that the first element isn't used
|
||||||
|
|
||||||
|
// - Since 1 & 2 bp insertions and 1 & 2 bp deletions are equally likely to cause a
|
||||||
|
// downstream frameshift, if we make the simplifying assumptions that 3 bp ins
|
||||||
|
// and 3bp del (adding/subtracting 1 AA in general) are roughly comparably
|
||||||
|
// selected against, we should see a consistent 1+2 : 3 bp ratio for insertions
|
||||||
|
// as for deletions, and certainly would expect consistency between in/dels that
|
||||||
|
// multiple methods find and in/dels that are unique to one method (since deletions
|
||||||
|
// are more common and the artifacts differ, it is probably worth looking at the totals,
|
||||||
|
// overlaps and ratios for insertions and deletions separately in the methods
|
||||||
|
// comparison and in this case don't even need to make the simplifying in = del functional assumption
|
||||||
|
|
||||||
|
@DataPoint(description = "ratio of 1 and 2 bp insertions to 3 bp insertions")
|
||||||
|
public String ratio_of_1_and_2_to_3_bp_insertions;
|
||||||
|
|
||||||
|
@DataPoint(description = "ratio of 1 and 2 bp deletions to 3 bp deletions")
|
||||||
|
public String ratio_of_1_and_2_to_3_bp_deletions;
|
||||||
|
|
||||||
public final static int LARGE_INDEL_SIZE_THRESHOLD = 10;
|
public final static int LARGE_INDEL_SIZE_THRESHOLD = 10;
|
||||||
|
|
||||||
|
|
@ -150,11 +157,11 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case INDEL:
|
case INDEL:
|
||||||
|
final VariantContext gold = getWalker().goldStandard == null ? null : tracker.getFirstValue(getWalker().goldStandard);
|
||||||
if ( eval.isComplexIndel() ) break; // don't count complex substitutions
|
if ( eval.isComplexIndel() ) break; // don't count complex substitutions
|
||||||
|
|
||||||
nIndelSites++;
|
nIndelSites++;
|
||||||
if ( ! eval.isBiallelic() ) nMultiIndelSites++;
|
if ( ! eval.isBiallelic() ) nMultiIndelSites++;
|
||||||
if ( variantWasSingleton(eval) ) n_singleton_indels++;
|
|
||||||
|
|
||||||
// collect information about het / hom ratio
|
// collect information about het / hom ratio
|
||||||
for ( final Genotype g : eval.getGenotypes() ) {
|
for ( final Genotype g : eval.getGenotypes() ) {
|
||||||
|
|
@ -164,15 +171,14 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
||||||
|
|
||||||
for ( Allele alt : eval.getAlternateAlleles() ) {
|
for ( Allele alt : eval.getAlternateAlleles() ) {
|
||||||
n_indels++; // +1 for each alt allele
|
n_indels++; // +1 for each alt allele
|
||||||
|
if ( variantWasSingleton(eval) ) n_singleton_indels++;
|
||||||
if ( comp != null ) nKnownIndels++; // TODO -- make this test allele specific?
|
if ( comp != null ) nKnownIndels++; // TODO -- make this test allele specific?
|
||||||
|
if ( gold != null ) n_indels_matching_gold_standard++;
|
||||||
|
|
||||||
// ins : del ratios
|
// ins : del ratios
|
||||||
final int alleleSize = alt.length() - eval.getReference().length();
|
final int alleleSize = alt.length() - eval.getReference().length();
|
||||||
if ( alleleSize == 0 ) throw new ReviewedStingException("Allele size not expected to be zero for indel: alt = " + alt + " ref = " + eval.getReference());
|
if ( alleleSize == 0 ) throw new ReviewedStingException("Allele size not expected to be zero for indel: alt = " + alt + " ref = " + eval.getReference());
|
||||||
if ( alleleSize > 0 ) nInsertions++;
|
if ( alleleSize > 0 ) nInsertions++;
|
||||||
if ( alleleSize == 1 ) n1bpInsertions++;
|
|
||||||
if ( alleleSize == -1 ) n1bpDeletions++;
|
|
||||||
|
|
||||||
// requires snpEFF annotations
|
// requires snpEFF annotations
|
||||||
if ( eval.getAttributeAsString("SNPEFF_GENE_BIOTYPE", "missing").equals("protein_coding") ) {
|
if ( eval.getAttributeAsString("SNPEFF_GENE_BIOTYPE", "missing").equals("protein_coding") ) {
|
||||||
|
|
@ -193,6 +199,7 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
||||||
n_large_deletions++;
|
n_large_deletions++;
|
||||||
|
|
||||||
// update the baby histogram
|
// update the baby histogram
|
||||||
|
final int[] countByLength = alleleSize < 0 ? deletionCountByLength : insertionCountByLength;
|
||||||
final int absSize = Math.abs(alleleSize);
|
final int absSize = Math.abs(alleleSize);
|
||||||
if ( absSize < countByLength.length ) countByLength[absSize]++;
|
if ( absSize < countByLength.length ) countByLength[absSize]++;
|
||||||
|
|
||||||
|
|
@ -210,18 +217,18 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
||||||
percent_of_sites_with_more_than_2_alleles = Utils.formattedRatio(nMultiIndelSites, nIndelSites);
|
percent_of_sites_with_more_than_2_alleles = Utils.formattedRatio(nMultiIndelSites, nIndelSites);
|
||||||
SNP_to_indel_ratio = Utils.formattedRatio(n_SNPs, n_indels);
|
SNP_to_indel_ratio = Utils.formattedRatio(n_SNPs, n_indels);
|
||||||
SNP_to_indel_ratio_for_singletons = Utils.formattedRatio(n_singleton_SNPs, n_singleton_indels);
|
SNP_to_indel_ratio_for_singletons = Utils.formattedRatio(n_singleton_SNPs, n_singleton_indels);
|
||||||
|
|
||||||
|
gold_standard_matching_rate = Utils.formattedNoveltyRate(n_indels_matching_gold_standard, n_indels);
|
||||||
indel_novelty_rate = Utils.formattedNoveltyRate(nKnownIndels, n_indels);
|
indel_novelty_rate = Utils.formattedNoveltyRate(nKnownIndels, n_indels);
|
||||||
ratio_of_1_to_2_bp_indels = Utils.formattedRatio(countByLength[1], countByLength[2]);
|
|
||||||
ratio_of_1_to_3_bp_indels = Utils.formattedRatio(countByLength[1], countByLength[3]);
|
|
||||||
ratio_of_2_to_3_bp_indels = Utils.formattedRatio(countByLength[2], countByLength[3]);
|
|
||||||
ratio_of_1_and_2_to_3_bp_indels = Utils.formattedRatio(countByLength[1] + countByLength[2], countByLength[3]);
|
|
||||||
frameshift_rate_for_coding_indels = Utils.formattedPercent(n_coding_indels_frameshifting, n_coding_indels_in_frame + n_coding_indels_frameshifting);
|
frameshift_rate_for_coding_indels = Utils.formattedPercent(n_coding_indels_frameshifting, n_coding_indels_in_frame + n_coding_indels_frameshifting);
|
||||||
|
|
||||||
|
ratio_of_1_and_2_to_3_bp_deletions = Utils.formattedRatio(deletionCountByLength[1] + deletionCountByLength[2], deletionCountByLength[3]);
|
||||||
|
ratio_of_1_and_2_to_3_bp_insertions = Utils.formattedRatio(insertionCountByLength[1] + insertionCountByLength[2], insertionCountByLength[3]);
|
||||||
|
|
||||||
SNP_het_to_hom_ratio = Utils.formattedRatio(nSNPHets, nSNPHoms);
|
SNP_het_to_hom_ratio = Utils.formattedRatio(nSNPHets, nSNPHoms);
|
||||||
indel_het_to_hom_ratio = Utils.formattedRatio(nIndelHets, nIndelHoms);
|
indel_het_to_hom_ratio = Utils.formattedRatio(nIndelHets, nIndelHoms);
|
||||||
|
|
||||||
insertion_to_deletion_ratio = Utils.formattedRatio(nInsertions, n_indels - nInsertions);
|
insertion_to_deletion_ratio = Utils.formattedRatio(nInsertions, n_indels - nInsertions);
|
||||||
insertion_to_deletion_ratio_for_1bp_indels = Utils.formattedRatio(n1bpInsertions, n1bpDeletions);
|
|
||||||
insertion_to_deletion_ratio_for_large_indels = Utils.formattedRatio(n_large_insertions, n_large_deletions);
|
insertion_to_deletion_ratio_for_large_indels = Utils.formattedRatio(n_large_insertions, n_large_deletions);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,59 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stratifies the eval RODs into sites where the indel is 1 bp in length and those where the event is 2+.
|
||||||
|
* all non indel events go into all bins, so that SNP counts can be used as contrasts in eval modules.
|
||||||
|
*/
|
||||||
|
public class OneBPIndel extends VariantStratifier {
|
||||||
|
private final static List<Object> ALL = Arrays.asList((Object)"all", (Object)"one.bp", (Object)"two.plus.bp");
|
||||||
|
private final static List<Object> ONE_BP = Arrays.asList((Object)"all", (Object)"one.bp");
|
||||||
|
private final static List<Object> TWO_PLUS_BP = Arrays.asList((Object)"all", (Object)"two.plus.bp");
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initialize() {
|
||||||
|
states.addAll(ALL);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||||
|
if (eval != null && eval.isIndel()) {
|
||||||
|
for ( int l : eval.getIndelLengths() )
|
||||||
|
if ( l > 1 )
|
||||||
|
return TWO_PLUS_BP; // someone is too long
|
||||||
|
return ONE_BP; // all lengths are one
|
||||||
|
} else
|
||||||
|
return ALL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,67 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.ParsingUtils;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stratifies the eval RODs into sites that are tandem repeats
|
||||||
|
*/
|
||||||
|
public class TandemRepeat extends VariantStratifier {
|
||||||
|
private final static List<Object> JUST_ALL = Arrays.asList((Object)"all");
|
||||||
|
private final static List<Object> ALL = Arrays.asList((Object)"all", (Object)"is.repeat", (Object)"not.repeat");
|
||||||
|
private final static List<Object> REPEAT = Arrays.asList((Object)"all", (Object)"is.repeat");
|
||||||
|
private final static List<Object> NOT_REPEAT = Arrays.asList((Object)"all", (Object)"not.repeat");
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initialize() {
|
||||||
|
states.addAll(ALL);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||||
|
if ( eval == null || ! eval.isIndel() )
|
||||||
|
return ALL;
|
||||||
|
else if ( VariantContextUtils.isTandemRepeat(eval, ref.getForwardBases()) ) {
|
||||||
|
print("REPEAT", eval, ref);
|
||||||
|
return REPEAT;
|
||||||
|
} else {
|
||||||
|
print("NOT A REPEAT", eval, ref);
|
||||||
|
return NOT_REPEAT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final void print(String prefix, VariantContext eval, ReferenceContext ref) {
|
||||||
|
// String alleles = ParsingUtils.sortList(eval.getAlleles()).toString();
|
||||||
|
// this.getVariantEvalWalker().getLogger().info(prefix + ": " + "pos=" + eval.getStart() + " alleles=" + alleles + " ref=" + new String(ref.getForwardBases()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -224,10 +224,6 @@ public class IndelUtils {
|
||||||
return inds;
|
return inds;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String[] getIndelClassificationNames() {
|
|
||||||
return COLUMN_KEYS;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String getIndelClassificationName(int k) {
|
public static String getIndelClassificationName(int k) {
|
||||||
if (k >=0 && k < COLUMN_KEYS.length)
|
if (k >=0 && k < COLUMN_KEYS.length)
|
||||||
return COLUMN_KEYS[k];
|
return COLUMN_KEYS[k];
|
||||||
|
|
@ -235,35 +231,6 @@ public class IndelUtils {
|
||||||
throw new ReviewedStingException("Invalid index when trying to get indel classification name");
|
throw new ReviewedStingException("Invalid index when trying to get indel classification name");
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isATExpansion(VariantContext vc, ReferenceContext ref) {
|
|
||||||
ArrayList<Integer> inds = findEventClassificationIndex(vc, ref);
|
|
||||||
|
|
||||||
boolean isIt = false;
|
|
||||||
for (int k : inds) {
|
|
||||||
if (k == IND_FOR_REPEAT_EXPANSION_A || k == IND_FOR_REPEAT_EXPANSION_T) {
|
|
||||||
isIt = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return isIt;
|
|
||||||
|
|
||||||
}
|
|
||||||
public static boolean isCGExpansion(VariantContext vc, ReferenceContext ref) {
|
|
||||||
ArrayList<Integer> inds = findEventClassificationIndex(vc, ref);
|
|
||||||
|
|
||||||
boolean isIt = false;
|
|
||||||
for (int k : inds) {
|
|
||||||
if (k == IND_FOR_REPEAT_EXPANSION_C || k == IND_FOR_REPEAT_EXPANSION_G) {
|
|
||||||
isIt = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return isIt;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isInsideExtendedIndel(VariantContext vc, ReferenceContext ref) {
|
public static boolean isInsideExtendedIndel(VariantContext vc, ReferenceContext ref) {
|
||||||
return (vc.getStart() != ref.getLocus().getStart());
|
return (vc.getStart() != ref.getLocus().getStart());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -302,7 +302,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
|
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
|
||||||
" --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
|
" --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
|
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
|
||||||
1, Arrays.asList("c8a782f51e094dc7be06dbfb795feab2"));
|
1, Arrays.asList("4c00cfa0fd343fef62d19af0edeb4f65"));
|
||||||
executeTestParallel("testSelect1", spec);
|
executeTestParallel("testSelect1", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -330,7 +330,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testCompVsEvalAC() {
|
public void testCompVsEvalAC() {
|
||||||
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";
|
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("5c409a2ab4517f862c6678902c0fd7a1"));
|
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("4df6654860ad63b7e24e6bc5fbbbcb00"));
|
||||||
executeTestParallel("testCompVsEvalAC",spec);
|
executeTestParallel("testCompVsEvalAC",spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -360,7 +360,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
" --dbsnp " + b37dbSNP132 +
|
" --dbsnp " + b37dbSNP132 +
|
||||||
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
||||||
" -noST -ST Novelty -o %s";
|
" -noST -ST Novelty -o %s";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("a27c700eabe6b7b3877c8fd4eabb3975"));
|
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("3b85cd0fa37539ff51d34e026f26fef2"));
|
||||||
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
|
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -372,7 +372,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
||||||
" --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
|
" --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
|
||||||
" -noST -ST Novelty -o %s";
|
" -noST -ST Novelty -o %s";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("3272a2db627d4f42bc512df49a8ea64b"));
|
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("bed8751c773b9568218f78c90f13348a"));
|
||||||
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
|
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -488,11 +488,32 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
"-o %s"
|
"-o %s"
|
||||||
),
|
),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("41a37636868a838a632559949c5216cf")
|
Arrays.asList("9726c0c8f19d271cf680f5f16f0926b3")
|
||||||
);
|
);
|
||||||
executeTest("testModernVCFWithLargeIndels", spec);
|
executeTest("testModernVCFWithLargeIndels", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testStandardIndelEval() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
buildCommandLine(
|
||||||
|
"-T VariantEval",
|
||||||
|
"-R " + b37KGReference,
|
||||||
|
"-eval " + validationDataLocation + "/NA12878.HiSeq.WGS.b37_decoy.indel.recalibrated.vcf",
|
||||||
|
"-L 20",
|
||||||
|
"-noST -ST Sample -ST OneBPIndel -ST TandemRepeat",
|
||||||
|
"-noEV -EV IndelSummary -EV IndelLengthHistogram",
|
||||||
|
"-gold " + validationDataLocation + "/Mills_and_1000G_gold_standard.indels.b37.sites.vcf",
|
||||||
|
"-D " + b37dbSNP132,
|
||||||
|
"-o %s"
|
||||||
|
),
|
||||||
|
1,
|
||||||
|
Arrays.asList("c89705147ef4233d5de3a539469bd1d1")
|
||||||
|
);
|
||||||
|
executeTest("testStandardIndelEval", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test()
|
@Test()
|
||||||
public void testIncompatibleEvalAndStrat() {
|
public void testIncompatibleEvalAndStrat() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue