Changed the definition of degeneracy (it's at the site level - degeneracy of a position in a codon, not degeneracy of the amino acid itself like I initially thought. Added the ability to supply an ancestral allele track (available in /humgen/gsa-hpprojects/GATK/data/Ancestor/).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5963 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d784dac495
commit
49b021d435
|
|
@ -30,6 +30,13 @@ import org.broadinstitute.sting.utils.exceptions.StingException;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import net.sf.picard.reference.FastaSequenceFile;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.gatk.walkers.fasta.FastaSequence;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -95,6 +102,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
@Argument(fullName="tranchesFile", shortName="tf", doc="The input tranches file describing where to cut the data", required=false)
|
||||
private String TRANCHE_FILENAME = null;
|
||||
|
||||
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
|
||||
private File ancestralAlignmentsFile = null;
|
||||
|
||||
// Variables
|
||||
private Set<SortableJexlVCMatchExp> jexlExpressions = new TreeSet<SortableJexlVCMatchExp>();
|
||||
private Set<String> compNames = new TreeSet<String>();
|
||||
|
|
@ -120,6 +130,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
// Utility class
|
||||
private final VariantEvalUtils variantEvalUtils = new VariantEvalUtils(this);
|
||||
|
||||
// Ancestral alignments
|
||||
private IndexedFastaSequenceFile ancestralAlignments = null;
|
||||
|
||||
/**
|
||||
* Initialize the stratifications, evaluations, evaluation contexts, and reporting object
|
||||
*/
|
||||
|
|
@ -165,7 +178,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
sampleNamesForStratification.add(ALL_SAMPLE_NAME);
|
||||
|
||||
// Initialize select expressions
|
||||
//jexlExpressions.addAll(VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS));
|
||||
for (VariantContextUtils.JexlVCMatchExp jexl : VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) {
|
||||
SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp);
|
||||
jexlExpressions.add(sjexl);
|
||||
|
|
@ -190,6 +202,15 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
|
||||
// Initialize report table
|
||||
report = variantEvalUtils.initializeGATKReport(stratificationObjects, evaluationObjects);
|
||||
|
||||
// Load ancestral alignments
|
||||
if (ancestralAlignmentsFile != null) {
|
||||
try {
|
||||
ancestralAlignments = new IndexedFastaSequenceFile(ancestralAlignmentsFile);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new ReviewedStingException(String.format("The ancestral alignments file, '%s', could not be found", ancestralAlignmentsFile.getAbsolutePath()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -204,6 +225,8 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
}
|
||||
|
||||
if (tracker != null) {
|
||||
String aastr = (ancestralAlignments == null) ? null : new String(ancestralAlignments.getSubsequenceAt(ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStop()).getBases());
|
||||
|
||||
// track sample vc
|
||||
HashMap<String, HashMap<String, VariantContext>> vcs = variantEvalUtils.getVariantContexts(tracker, ref, compNames, evalNames, typesToUse != null);
|
||||
|
||||
|
|
@ -220,6 +243,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
// if ( eval != null ) logger.info("Keeping " + eval);
|
||||
}
|
||||
|
||||
if (eval != null && aastr != null) {
|
||||
HashMap<String, Object> newAts = new HashMap<String, Object>(eval.getAttributes());
|
||||
newAts.put("ANCESTRALALLELE", aastr);
|
||||
|
||||
eval = VariantContext.modifyAttributes(eval, newAts);
|
||||
}
|
||||
|
||||
HashMap<VariantStratifier, ArrayList<String>> stateMap = new HashMap<VariantStratifier, ArrayList<String>>();
|
||||
for ( VariantStratifier vs : stratificationObjects ) {
|
||||
ArrayList<String> states = vs.getRelevantStates(ref, tracker, comp, compName, eval, evalName, sampleName);
|
||||
|
|
|
|||
|
|
@ -53,6 +53,8 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
public long nHomVar = 0;
|
||||
@DataPoint(description = "Number of singletons")
|
||||
public long nSingletons = 0;
|
||||
@DataPoint(description = "Number of derived homozygotes")
|
||||
public long nHomDerived = 0;
|
||||
|
||||
// calculations that get set in the finalizeEvaluation method
|
||||
@DataPoint(description = "heterozygosity per locus rate")
|
||||
|
|
@ -115,19 +117,47 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType());
|
||||
}
|
||||
|
||||
String refStr = vc1.getReference().getBaseString().toUpperCase();
|
||||
|
||||
String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE").toUpperCase() : null;
|
||||
// if (aaStr.equals(".")) {
|
||||
// aaStr = refStr;
|
||||
// }
|
||||
|
||||
// ref aa alt class
|
||||
// A C A der homozygote
|
||||
// A C C anc homozygote
|
||||
|
||||
// A A A ref homozygote
|
||||
// A A C
|
||||
// A C A
|
||||
// A C C
|
||||
|
||||
for (Genotype g : vc1.getGenotypes().values()) {
|
||||
String altStr = vc1.getAlternateAlleles().size() > 0 ? vc1.getAlternateAllele(0).getBaseString().toUpperCase() : null;
|
||||
|
||||
switch (g.getType()) {
|
||||
case NO_CALL:
|
||||
nNoCalls++;
|
||||
break;
|
||||
case HOM_REF:
|
||||
nHomRef++;
|
||||
|
||||
if ( aaStr != null && altStr != null && !refStr.equalsIgnoreCase(aaStr) ) {
|
||||
nHomDerived++;
|
||||
}
|
||||
|
||||
break;
|
||||
case HET:
|
||||
nHets++;
|
||||
break;
|
||||
case HOM_VAR:
|
||||
nHomVar++;
|
||||
|
||||
if ( aaStr != null && altStr != null && !altStr.equalsIgnoreCase(aaStr) ) {
|
||||
nHomDerived++;
|
||||
}
|
||||
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("BUG: Unexpected genotype type: " + g);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
|
|
@ -48,27 +47,13 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
|
||||
protected final static Logger logger = Logger.getLogger(GenotypeConcordance.class);
|
||||
|
||||
// a mapping from allele count to stats
|
||||
@DataPoint(description = "the frequency statistics for each allele")
|
||||
FrequencyStats alleleFreqStats = new FrequencyStats();
|
||||
|
||||
// a mapping from sample to stats
|
||||
@DataPoint(description = "the concordance statistics for each sample")
|
||||
SampleStats sampleStats = null;
|
||||
@DataPoint(description = "the detailed concordance statistics for each sample")
|
||||
SampleStats detailedStats = null;
|
||||
|
||||
// a mapping from sample to stats summary
|
||||
@DataPoint(description = "the concordance statistics summary for each sample")
|
||||
SampleSummaryStats sampleSummaryStats = null;
|
||||
|
||||
// two histograms of variant quality scores, for true positive and false positive calls
|
||||
@DataPoint(description = "the variant quality score histograms for true positive and false positive calls")
|
||||
QualityScoreHistograms qualityScoreHistograms = null;
|
||||
|
||||
@DataPoint(description = "the concordance statistics summary by allele count")
|
||||
ACSummaryStats alleleCountSummary = null;
|
||||
|
||||
@DataPoint(description = "the concordance statistics by allele count")
|
||||
ACStats alleleCountStats = null;
|
||||
@DataPoint(description = "the simplified concordance statistics for each sample")
|
||||
SampleSummaryStats simplifiedStats = null;
|
||||
|
||||
private static final int MAX_MISSED_VALIDATION_DATA = 100;
|
||||
|
||||
|
|
@ -253,27 +238,11 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
return interesting;
|
||||
}
|
||||
|
||||
if( qualityScoreHistograms == null ) {
|
||||
qualityScoreHistograms = new QualityScoreHistograms();
|
||||
}
|
||||
|
||||
if ( alleleCountStats == null && eval != null && validation != null && validation.getSampleNames().size() > 0) {
|
||||
alleleCountStats = new ACStats(eval,validation,Genotype.Type.values().length);
|
||||
alleleCountSummary = new ACSummaryStats(eval, validation);
|
||||
}
|
||||
|
||||
if ( alleleCountStats != null ) {
|
||||
// for ( int i = 0; i <= 2*validation.getGenotypes().size(); i++ ) {
|
||||
// concordanceStats.put(String.format("compAC%d",i), new long[nGenotypeTypes][nGenotypeTypes]);
|
||||
// rowKeys[1+2*evalvc.getGenotypes().size()+i] = String.format("compAC%d",i);
|
||||
// }
|
||||
}
|
||||
|
||||
if (sampleStats == null) {
|
||||
if (detailedStats == null) {
|
||||
if (eval != null) {
|
||||
// initialize the concordance table
|
||||
sampleStats = new SampleStats(eval,Genotype.Type.values().length);
|
||||
sampleSummaryStats = new SampleSummaryStats(eval);
|
||||
detailedStats = new SampleStats(eval,Genotype.Type.values().length);
|
||||
simplifiedStats = new SampleSummaryStats(eval);
|
||||
for (final VariantContext vc : missedValidationData) {
|
||||
determineStats(null, vc);
|
||||
}
|
||||
|
|
@ -323,11 +292,7 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
}
|
||||
}
|
||||
|
||||
sampleStats.incrValue(sample, truth, called);
|
||||
if ( evalAC != null && validationAC != null) {
|
||||
alleleCountStats.incrValue(evalAC,truth,called);
|
||||
alleleCountStats.incrValue(validationAC,truth,called);
|
||||
}
|
||||
detailedStats.incrValue(sample, truth, called);
|
||||
}
|
||||
}
|
||||
// otherwise, mark no-calls for all samples
|
||||
|
|
@ -336,10 +301,8 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
|
||||
for (final String sample : validation.getGenotypes().keySet()) {
|
||||
final Genotype.Type truth = validation.getGenotype(sample).getType();
|
||||
sampleStats.incrValue(sample, truth, called);
|
||||
if ( evalAC != null ) {
|
||||
alleleCountStats.incrValue(evalAC,truth,called);
|
||||
}
|
||||
detailedStats.incrValue(sample, truth, called);
|
||||
|
||||
// print out interesting sites
|
||||
/*
|
||||
if ( PRINT_INTERESTING_SITES && super.getVEWalker().gcLog != null ) {
|
||||
|
|
@ -354,33 +317,6 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
}
|
||||
}
|
||||
|
||||
// determine allele count concordance () // this is really a FN rate estimate -- CH
|
||||
if (validationIsValidVC && validation.isPolymorphic()) {
|
||||
int trueAlleleCount = 0;
|
||||
for (final Allele a : validation.getAlternateAlleles()) {
|
||||
trueAlleleCount += validation.getChromosomeCount(a);
|
||||
}
|
||||
if (eval != null) {
|
||||
alleleFreqStats.incrementFoundCount(trueAlleleCount);
|
||||
} else {
|
||||
alleleFreqStats.incrementMissedCount(trueAlleleCount);
|
||||
}
|
||||
}
|
||||
|
||||
// TP & FP quality score histograms
|
||||
if( eval != null && eval.isPolymorphic() && validationIsValidVC ) {
|
||||
if( eval.getGenotypes().keySet().size() == 1 ) { // single sample calls
|
||||
for( final String sample : eval.getGenotypes().keySet() ) { // only one sample
|
||||
if( validation.hasGenotype(sample) ) {
|
||||
final Genotype truth = validation.getGenotype(sample);
|
||||
qualityScoreHistograms.incrValue( eval.getPhredScaledQual(), !truth.isHomRef() );
|
||||
}
|
||||
}
|
||||
} else { // multi sample calls
|
||||
qualityScoreHistograms.incrValue( eval.getPhredScaledQual(), validation.isPolymorphic() );
|
||||
}
|
||||
}
|
||||
|
||||
return interesting;
|
||||
}
|
||||
|
||||
|
|
@ -389,16 +325,8 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
}
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
if( qualityScoreHistograms != null ) {
|
||||
qualityScoreHistograms.organizeHistogramTables();
|
||||
}
|
||||
|
||||
if( sampleSummaryStats != null && sampleStats != null ) {
|
||||
sampleSummaryStats.generateSampleSummaryStats( sampleStats );
|
||||
}
|
||||
|
||||
if ( alleleCountSummary != null && alleleCountStats != null ) {
|
||||
alleleCountSummary.generateSampleSummaryStats( alleleCountStats );
|
||||
if( simplifiedStats != null && detailedStats != null ) {
|
||||
simplifiedStats.generateSampleSummaryStats(detailedStats);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
@Analysis(description = "Ti/Tv Variant Evaluator")
|
||||
public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEval {
|
||||
|
|
@ -23,6 +24,12 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
|
|||
long nTvInComp = 0;
|
||||
@DataPoint(description = "the transition to transversion ratio for comp sites")
|
||||
double TiTvRatioStandard = 0.0;
|
||||
@DataPoint(description = "number of derived transition loci")
|
||||
long nTiDerived = 0;
|
||||
@DataPoint(description = "number of derived transversion loci")
|
||||
long nTvDerived = 0;
|
||||
@DataPoint(description = "the derived transition to transversion ratio")
|
||||
double tiTvDerivedRatio = 0.0;
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
|
|
@ -41,6 +48,21 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
|
|||
if (updateStandard) nTvInComp++;
|
||||
else nTv++;
|
||||
}
|
||||
|
||||
String refStr = vc.getReference().getBaseString().toUpperCase();
|
||||
String aaStr = vc.getAttributeAsString("ANCESTRALALLELE").toUpperCase();
|
||||
|
||||
if (aaStr != null && !aaStr.equalsIgnoreCase("null") && !aaStr.equals(".")) {
|
||||
BaseUtils.BaseSubstitutionType aaSubType = BaseUtils.SNPSubstitutionType(aaStr.getBytes()[0], vc.getAlternateAllele(0).getBases()[0]);
|
||||
|
||||
//System.out.println(refStr + " " + vc.getAttributeAsString("ANCESTRALALLELE").toUpperCase() + " " + aaSubType);
|
||||
|
||||
if (aaSubType == BaseUtils.BaseSubstitutionType.TRANSITION) {
|
||||
nTiDerived++;
|
||||
} else if (aaSubType == BaseUtils.BaseSubstitutionType.TRANSVERSION) {
|
||||
nTvDerived++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -55,6 +77,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
|
|||
public void finalizeEvaluation() {
|
||||
// the ti/tv ratio needs to be set (it's not calculated per-variant).
|
||||
this.tiTvRatio = rate(nTi,nTv);
|
||||
this.tiTvDerivedRatio = rate(nTiDerived,nTvDerived);
|
||||
this.TiTvRatioStandard = rate(nTiInComp, nTvInComp);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,11 +8,12 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatc
|
|||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
|
||||
public class Degeneracy extends VariantStratifier {
|
||||
private ArrayList<String> states;
|
||||
|
||||
private HashMap<String, String> degeneracies;
|
||||
private HashMap<String, HashMap<Integer, String>> degeneracies;
|
||||
|
||||
@Override
|
||||
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
|
||||
|
|
@ -24,29 +25,58 @@ public class Degeneracy extends VariantStratifier {
|
|||
states.add("6-fold");
|
||||
states.add("all");
|
||||
|
||||
degeneracies = new HashMap<String, String>();
|
||||
degeneracies.put("Ile", "3-fold");
|
||||
degeneracies.put("Leu", "6-fold");
|
||||
degeneracies.put("Val", "4-fold");
|
||||
degeneracies.put("Phe", "2-fold");
|
||||
degeneracies.put("Met", "1-fold");
|
||||
degeneracies.put("Cys", "2-fold");
|
||||
degeneracies.put("Ala", "4-fold");
|
||||
degeneracies.put("Gly", "4-fold");
|
||||
degeneracies.put("Pro", "4-fold");
|
||||
degeneracies.put("Thr", "4-fold");
|
||||
degeneracies.put("Ser", "6-fold");
|
||||
degeneracies.put("Tyr", "2-fold");
|
||||
degeneracies.put("Try", "1-fold");
|
||||
degeneracies.put("Trp", "1-fold");
|
||||
degeneracies.put("Gln", "2-fold");
|
||||
degeneracies.put("Asn", "2-fold");
|
||||
degeneracies.put("His", "2-fold");
|
||||
degeneracies.put("Glu", "2-fold");
|
||||
degeneracies.put("Asp", "2-fold");
|
||||
degeneracies.put("Lys", "2-fold");
|
||||
degeneracies.put("Arg", "6-fold");
|
||||
degeneracies.put("Stop", "3-fold");
|
||||
HashMap<String, String[]> aminoAcids = new HashMap<String, String[]>();
|
||||
aminoAcids.put("Ile", new String[]{"ATT", "ATC", "ATA"});
|
||||
aminoAcids.put("Leu", new String[]{"CTT", "CTC", "CTA", "CTG", "TTA", "TTG"});
|
||||
aminoAcids.put("Val", new String[]{"GTT", "GTC", "GTA", "GTG"});
|
||||
aminoAcids.put("Phe", new String[]{"TTT", "TTC"});
|
||||
aminoAcids.put("Met", new String[]{"ATG"});
|
||||
aminoAcids.put("Cys", new String[]{"TGT", "TGC"});
|
||||
aminoAcids.put("Ala", new String[]{"GCT", "GCC", "GCA", "GCG"});
|
||||
aminoAcids.put("Gly", new String[]{"GGT", "GGC", "GGA", "GGG"});
|
||||
aminoAcids.put("Pro", new String[]{"CCT", "CCC", "CCA", "CCG"});
|
||||
aminoAcids.put("Thr", new String[]{"ACT", "ACC", "ACA", "ACG"});
|
||||
aminoAcids.put("Ser", new String[]{"TCT", "TCC", "TCA", "TCG", "AGT", "AGC"});
|
||||
aminoAcids.put("Tyr", new String[]{"TAT", "TAC"});
|
||||
aminoAcids.put("Trp", new String[]{"TGG"});
|
||||
aminoAcids.put("Glu", new String[]{"CAA", "CAG"});
|
||||
aminoAcids.put("Asn", new String[]{"AAT", "AAC"});
|
||||
aminoAcids.put("His", new String[]{"CAT", "CAC"});
|
||||
aminoAcids.put("Gln", new String[]{"GAA", "GAG"});
|
||||
aminoAcids.put("Asp", new String[]{"GAT", "GAC"});
|
||||
aminoAcids.put("Lys", new String[]{"AAA", "AAG"});
|
||||
aminoAcids.put("Arg", new String[]{"CGT", "CGC", "CGA", "CGG", "AGA", "AGG"});
|
||||
aminoAcids.put("Stop", new String[]{"TAA", "TAG", "TGA"});
|
||||
|
||||
degeneracies = new HashMap<String, HashMap<Integer, String>>();
|
||||
|
||||
for (String aminoAcid : aminoAcids.keySet()) {
|
||||
String[] codons = aminoAcids.get(aminoAcid);
|
||||
|
||||
for (int pos = 0; pos < 3; pos++) {
|
||||
HashSet<Character> alleles = new HashSet<Character>();
|
||||
|
||||
for (String codon : codons) {
|
||||
alleles.add(codon.charAt(pos));
|
||||
}
|
||||
|
||||
String degeneracy;
|
||||
switch (alleles.size()) {
|
||||
case 1: degeneracy = "1-fold"; break;
|
||||
case 2: degeneracy = "2-fold"; break;
|
||||
case 3: degeneracy = "3-fold"; break;
|
||||
case 4: degeneracy = "4-fold"; break;
|
||||
case 6: degeneracy = "6-fold"; break;
|
||||
default: degeneracy = "1-fold"; break;
|
||||
}
|
||||
|
||||
if (!degeneracies.containsKey(aminoAcid)) {
|
||||
degeneracies.put(aminoAcid, new HashMap<Integer, String>());
|
||||
}
|
||||
|
||||
degeneracies.get(aminoAcid).put(pos, degeneracy);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
|
|
@ -61,9 +91,11 @@ public class Degeneracy extends VariantStratifier {
|
|||
if (eval != null && eval.isVariant()) {
|
||||
String type = null;
|
||||
String aa = null;
|
||||
Integer frame = null;
|
||||
|
||||
if (eval.hasAttribute("refseq.functionalClass")) {
|
||||
aa = eval.getAttributeAsString("refseq.variantAA");
|
||||
frame = eval.getAttributeAsInt("refseq.frame");
|
||||
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
|
||||
int annotationId = 1;
|
||||
String key;
|
||||
|
|
@ -82,14 +114,22 @@ public class Degeneracy extends VariantStratifier {
|
|||
|
||||
String aakey = String.format("refseq.variantAA_%d", annotationId);
|
||||
aa = eval.getAttributeAsString(aakey);
|
||||
|
||||
if (aa != null) {
|
||||
String framekey = String.format("refseq.frame_%d", annotationId);
|
||||
|
||||
if (eval.hasAttribute(framekey)) {
|
||||
frame = eval.getAttributeAsInt(framekey);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
annotationId++;
|
||||
} while (eval.hasAttribute(key));
|
||||
}
|
||||
|
||||
if (aa != null && degeneracies.containsKey(aa)) {
|
||||
relevantStates.add(degeneracies.get(aa));
|
||||
if (aa != null && degeneracies.containsKey(aa) && frame != null) {
|
||||
relevantStates.add(degeneracies.get(aa).get(frame));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -32,19 +32,6 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testFundamentalsCountVariantsSNPsAndIndels() {
|
||||
// nProcessedLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38
|
||||
// nCalledLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -c PASS = 9
|
||||
// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 4
|
||||
// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 5
|
||||
// variantRate = nVariantLoci / nProcessedLoci = 0.131578947
|
||||
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 3
|
||||
// nInsertions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 1
|
||||
// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1
|
||||
// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 4
|
||||
// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 8
|
||||
// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 10
|
||||
// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T VariantEval",
|
||||
|
|
@ -58,25 +45,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("e4545f524cc8386079dc9190de5d9bcc")
|
||||
Arrays.asList("48b8417c1f8bd74ff7b9808580abd2a2")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndels", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFundamentalsCountVariantsSNPsAndIndelsWithNovelty() {
|
||||
// nProcessedLociKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38
|
||||
// nCalledLociKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }'= 3
|
||||
// nVariantLociKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | wc -l = 3
|
||||
// variantRateKnown = nVariantLoci / nProcessedLoci = 0.0789473684
|
||||
// nSNPsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != "." && length($5) == 1) print $0 }' | wc -l = 3
|
||||
// nInsertionsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 0
|
||||
// nDeletionsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 0
|
||||
// nNoCallsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 0
|
||||
// nHetsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 3
|
||||
// nHomRefKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 1
|
||||
// nHomVarKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T VariantEval",
|
||||
|
|
@ -91,25 +66,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("3dbefb800e432fdd237d6c57e4456352")
|
||||
Arrays.asList("86d45ecefdf5849c55b3ca8f82a3d525")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFundamentalsCountVariantsSNPsAndIndelsWithNoveltyAndFilter() {
|
||||
// nProcessedLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38
|
||||
// nCalledLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -vc PASS = 3
|
||||
// nRefLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 1
|
||||
// nVariantLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 2
|
||||
// nSNPsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 1
|
||||
// nInsertionsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 0
|
||||
// nDeletionsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1
|
||||
// nNoCallsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 3
|
||||
// nHetsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 1
|
||||
// nHomRefFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 2
|
||||
// nHomVarFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 3
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T VariantEval",
|
||||
|
|
@ -125,25 +88,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("9fafed19a700a7d4bd7aaed2dcad37be")
|
||||
Arrays.asList("3d18901ec1766aa2e748eac913f5ddcd")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFundamentalsCountVariantsSNPsAndIndelsWithCpG() {
|
||||
// nProcessedLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38
|
||||
// nCalledLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep -c PASS = 8
|
||||
// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 3
|
||||
// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 5
|
||||
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 3
|
||||
// nInsertions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 1
|
||||
// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1
|
||||
// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 4
|
||||
// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 8
|
||||
// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 10
|
||||
// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T VariantEval",
|
||||
|
|
@ -158,7 +109,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("09ed51c3d5ac2099ded1d0e8cf8ee183")
|
||||
Arrays.asList("677fe398643e62a10d6739d36a720a12")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec);
|
||||
}
|
||||
|
|
@ -179,7 +130,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("9d233d3d8cec8e580acb98b1a2725b56")
|
||||
Arrays.asList("5fb44fd7cb00941c986a9941e43e44cd")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec);
|
||||
}
|
||||
|
|
@ -200,22 +151,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("850094f32657f04cb958891de4cfc5b2")
|
||||
Arrays.asList("daaca7ef3b7313e5af217cbc6f37c9e2")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFundamentalsCountVariantsSNPsAndIndelsWithSample() {
|
||||
// HG00513
|
||||
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if (length($4) == 1 && length($5) == 1) print $0 }' | awk '{ print $10 }' | grep -v '0/0' | grep -v '\.\/\.' | wc -l = 3
|
||||
// nInsertions = $ grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if (length($4) == 1 && length($5) > 1) print $0 }' | awk '{ print $10 }' | grep -v '0/0' | grep -v '\.\/\.' | wc -l = 1
|
||||
// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if (length($4) > 1 && length($5) == 1) print $0 }' | awk '{ print $10 }' | grep -v '0/0' | grep -v '\.\/\.' | wc -l = 0
|
||||
// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 2
|
||||
// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "0/1") print $0 }' | wc -l = 2
|
||||
// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "0/0") print $0 }' | wc -l = 3
|
||||
// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "1/1") print $0 }' | wc -l = 2
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T VariantEval",
|
||||
|
|
@ -230,21 +172,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("cea071b8b0ebd8f138ba91375edf036e")
|
||||
Arrays.asList("97c466f8ffd0fcf2c30ef08669d213d9")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFundamentalsCountVariantsSNPsAndIndelsWithJexlExpression() {
|
||||
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $0 }' | wc -l = 7
|
||||
// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 == ".") print $0 }' | wc -l = 4
|
||||
// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 != ".") print $0 }' | wc -l = 3
|
||||
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && length($5) == 1 && length($6) == 1 && $6 != ".") print $0 }' | wc -l = 3
|
||||
// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/0' = 9
|
||||
// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/1' = 3
|
||||
// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '1/1' = 5
|
||||
// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '\.\/\.' = 4
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T VariantEval",
|
||||
|
|
@ -261,21 +195,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("e2f505046251e19b5737f4999c896fe2")
|
||||
Arrays.asList("df8cdfcf3d0c2fc795812c6eae6a76f8")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFundamentalsCountVariantsSNPsAndIndelsWithMultipleJexlExpressions() {
|
||||
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $0 }' | wc -l = 7
|
||||
// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 == ".") print $0 }' | wc -l = 4
|
||||
// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 != ".") print $0 }' | wc -l = 3
|
||||
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && length($5) == 1 && length($6) == 1 && $6 != ".") print $0 }' | wc -l = 3
|
||||
// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/0' = 9
|
||||
// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/1' = 3
|
||||
// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '1/1' = 5
|
||||
// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '\.\/\.' = 4
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T VariantEval",
|
||||
|
|
@ -294,26 +220,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("8b97fe8e5e75efe08c080bbf47960c8f")
|
||||
Arrays.asList("c7aed12265e2b2311d17a0cc8a29f6aa")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFundamentalsCountVariantsNoCompRod() {
|
||||
// nProcessedLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38
|
||||
// nCalledLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -c PASS = 9
|
||||
// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 4
|
||||
// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 5
|
||||
// variantRate = nVariantLoci / nProcessedLoci = 0.131578947
|
||||
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 3
|
||||
// nInsertions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 1
|
||||
// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1
|
||||
// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 4
|
||||
// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 8
|
||||
// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 10
|
||||
// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T VariantEval",
|
||||
|
|
@ -326,7 +239,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("1687108ed96d1127b196c2d74cf80a49")
|
||||
Arrays.asList("d44c8f44384189a09eea85a8e89d7299")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsNoCompRod", spec);
|
||||
}
|
||||
|
|
@ -336,84 +249,26 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
String extraArgs = "-L 1:1-10,000,000";
|
||||
for (String tests : testsEnumerations) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
|
||||
1, Arrays.asList("ca71324abf5659964c8f9e28b8fdbb28"));
|
||||
1, Arrays.asList("cdbe47ea01b9dd79ff1c5ce6f5fa8bec"));
|
||||
executeTestParallel("testSelect1", spec);
|
||||
//executeTest("testSelect1", spec);
|
||||
}
|
||||
}
|
||||
|
||||
// @Test
|
||||
// public void testSelect2() {
|
||||
// String extraArgs = "-L 1:1-10,000,000";
|
||||
// WalkerTestSpec spec = new WalkerTestSpec( withSelect(withSelect(root, "DP < 50", "DP50"), "set==\"Intersection\"", "intersection") + " " + extraArgs + " -o %s",
|
||||
// 1, Arrays.asList(""));
|
||||
// //executeTestParallel("testSelect2", spec);
|
||||
// executeTest("testSelect2", spec);
|
||||
// }
|
||||
|
||||
@Test
|
||||
public void testVEGenotypeConcordance() {
|
||||
String vcfFiles[] = {"GenotypeConcordanceEval.vcf", "GenotypeConcordanceEval.vcf.gz"};
|
||||
for (String vcfFile : vcfFiles) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s",
|
||||
1,
|
||||
Arrays.asList("732d32997b19d9c4f0291287858c56d2"));
|
||||
executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
|
||||
//executeTest("testVEGenotypeConcordance" + vcfFile, spec);
|
||||
}
|
||||
String vcfFile = "GenotypeConcordanceEval.vcf";
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVESimple() {
|
||||
HashMap<String, String> expectations = new HashMap<String, String>();
|
||||
expectations.put("-L 1:1-10,000,000 -ST CpG", "c74067360656519f769f805d6e1ef36b");
|
||||
expectations.put("-L 1:1-10,000,000 -ST CpG -family NA19238+NA19239=NA19240 -mvq 0 -EV MendelianViolationEvaluator", "0fe151e00ab35f3b95d4fea651592ec3");
|
||||
|
||||
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
||||
String extraArgs = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
for (String tests : testsEnumerations) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec( tests + " " + extraArgs + " -o %s",
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
executeTestParallel("testVESimple", spec);
|
||||
//executeTest("testVESimple", spec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVEComplex() {
|
||||
HashMap<String, String> expectations = new HashMap<String, String>();
|
||||
String extraArgs1 = "-L " + validationDataLocation + "chr1_b36_pilot3.interval_list -family NA19238+NA19239=NA19240 -mvq 30 -EV MendelianViolationEvaluator -ST CpG" +
|
||||
" -B:dbsnp_130,dbSNP " + GATKDataLocation + "dbsnp_130_b36.rod" +
|
||||
" -B:comp_hapmap,VCF3 " + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
|
||||
|
||||
|
||||
expectations.put("", "700eba07bac9fba4ed963bbbdcab0e29");
|
||||
expectations.put(" -knownName comp_hapmap -knownName dbsnp", "c2464d5613072fb326ebffcf3078ae31");
|
||||
expectations.put(" -knownName comp_hapmap", "0912f35a4c4179ff93b152d8c4e009e2");
|
||||
for (String tests : testsEnumerations) {
|
||||
for (Map.Entry<String, String> entry : expectations.entrySet()) {
|
||||
String extraArgs2 = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs1 + extraArgs2 + " -o %s",
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
executeTestParallel("testVEComplex", spec);
|
||||
//executeTest("testVEComplex", spec);
|
||||
}
|
||||
}
|
||||
WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s",
|
||||
1,
|
||||
Arrays.asList("e4c981f7f5d78680c71310fc9be9a1c1"));
|
||||
executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompVsEvalAC() {
|
||||
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("e68272d2f3b7a6439c4949cf0e34beeb"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("162daa5039e1965eb2423a8589339a69"));
|
||||
executeTestParallel("testCompVsEvalAC",spec);
|
||||
//executeTest("testCompVsEvalAC",spec);
|
||||
}
|
||||
|
||||
private static String withSelect(String cmd, String select, String name) {
|
||||
|
|
@ -423,9 +278,8 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testTranches() {
|
||||
String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("4c5ef7c142427a85d1b9b1c9fe8fd3c2"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("90cd98044e754b80034a9f4e6d2c55b9"));
|
||||
executeTestParallel("testTranches",spec);
|
||||
//executeTest("testTranches",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -433,7 +287,6 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("70aa420929de7f888a6f48c2d01bbcda"));
|
||||
executeTestParallel("testCompOverlap",spec);
|
||||
//executeTest("testCompOverlap",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -446,7 +299,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
" -D " + dbsnp +
|
||||
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
||||
" -noST -ST Novelty -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("923af856d04042c0ee9d01aa9eb8675a"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("5b1fc9a4066aca61f1b5f7b933ad37d9"));
|
||||
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
|
||||
}
|
||||
|
||||
|
|
@ -460,7 +313,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
||||
" -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
|
||||
" -noST -ST Novelty -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("45c08af95777e1eee3e9acbf136a8b6b"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6d902d9d4d8fef5219a43e416a51cee6"));
|
||||
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
|
||||
}
|
||||
|
||||
|
|
@ -520,57 +373,4 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
);
|
||||
executeTestParallel("testPerSampleAndSubsettedSampleHaveSameResults-onesample", spec2);
|
||||
}
|
||||
|
||||
// @Test
|
||||
// public void testVEGenomicallyAnnotated() {
|
||||
// String vecmd = "-T VariantEval" +
|
||||
// " -R " + b36KGReference +
|
||||
// " -L 21" +
|
||||
// " -D " + GATKDataLocation + "dbsnp_129_b36.rod" +
|
||||
// " -EV CountFunctionalClasses -noEV" +
|
||||
// " -B:eval,VCF " + validationDataLocation + "test.filtered.maf_annotated.vcf" +
|
||||
// " -o %s";
|
||||
// String md5 = "";
|
||||
//
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(vecmd, 1, Arrays.asList(md5));
|
||||
// executeTestParallel("testVEGenomicallyAnnotated", spec);
|
||||
// //executeTest("testVEGenomicallyAnnotated", spec);
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testVEWriteVCF() {
|
||||
// String extraArgs = "-L 1:1-10,000,000 -NO_HEADER -family NA19238+NA19239=NA19240 -mvq 30 -EV MendelianViolationEvaluator";
|
||||
// for (String tests : testsEnumerations) {
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs + " -o %s -outputVCF %s -NO_HEADER",
|
||||
// 2,
|
||||
// Arrays.asList("50321436a65ef7d574286cb0a1c55f7e", "d4bdd06ed5cb1aff1dfee8b69d5d17b8"));
|
||||
// executeTestParallel("testVEWriteVCF", spec);
|
||||
// //executeTest("testVEWriteVCF", spec);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testVEValidatePass() {
|
||||
// String extraArgs = "-L 1:1-10,000,000";
|
||||
// for (String tests : testsEnumerations) {
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(withValidateTiTv(withSelect(tests, "DP < 50", "DP50"), 1.0, 4.0) + " " + extraArgs + " -o %s",
|
||||
// 1, Arrays.asList("8a0203f0533b628ad7f1f230a43f105f"));
|
||||
// executeTestParallel("testVEValidatePass", spec);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testVEValidateFail() {
|
||||
// String extraArgs = "-L 1:1-10,000,000";
|
||||
// for (String tests : testsEnumerations) {
|
||||
// WalkerTestSpec spec = new WalkerTestSpec(withValidateTiTv(withSelect(tests, "DP < 50", "DP50"), 1.0, 1.2) + " " + extraArgs + " -o %s",
|
||||
// 1, UserException.class);
|
||||
// executeTestParallel("testVEValidateFail", spec);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// private static String withValidateTiTv(String cmd, double min, double max) {
|
||||
// return String.format("%s -validate 'eval.comp_genotypes.all.called.all.titv.tiTvRatio >= %2$s' -validate 'eval.comp_genotypes.all.called.all.titv.tiTvRatio <= %3$s'", cmd, min, max);
|
||||
// }
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue