Changed the definition of degeneracy (it's at the site level - degeneracy of a position in a codon, not degeneracy of the amino acid itself like I initially thought. Added the ability to supply an ancestral allele track (available in /humgen/gsa-hpprojects/GATK/data/Ancestor/).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5963 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2011-06-08 15:07:31 +00:00
parent d784dac495
commit 49b021d435
6 changed files with 182 additions and 331 deletions

View File

@ -30,6 +30,13 @@ import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import net.sf.picard.reference.FastaSequenceFile;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.gatk.walkers.fasta.FastaSequence;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import net.sf.picard.reference.ReferenceSequence;
import java.io.FileNotFoundException;
import java.io.File;
import java.io.PrintStream;
@ -95,6 +102,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
@Argument(fullName="tranchesFile", shortName="tf", doc="The input tranches file describing where to cut the data", required=false)
private String TRANCHE_FILENAME = null;
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
private File ancestralAlignmentsFile = null;
// Variables
private Set<SortableJexlVCMatchExp> jexlExpressions = new TreeSet<SortableJexlVCMatchExp>();
private Set<String> compNames = new TreeSet<String>();
@ -120,6 +130,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
// Utility class
private final VariantEvalUtils variantEvalUtils = new VariantEvalUtils(this);
// Ancestral alignments
private IndexedFastaSequenceFile ancestralAlignments = null;
/**
* Initialize the stratifications, evaluations, evaluation contexts, and reporting object
*/
@ -165,7 +178,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
sampleNamesForStratification.add(ALL_SAMPLE_NAME);
// Initialize select expressions
//jexlExpressions.addAll(VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS));
for (VariantContextUtils.JexlVCMatchExp jexl : VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) {
SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp);
jexlExpressions.add(sjexl);
@ -190,6 +202,15 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
// Initialize report table
report = variantEvalUtils.initializeGATKReport(stratificationObjects, evaluationObjects);
// Load ancestral alignments
if (ancestralAlignmentsFile != null) {
try {
ancestralAlignments = new IndexedFastaSequenceFile(ancestralAlignmentsFile);
} catch (FileNotFoundException e) {
throw new ReviewedStingException(String.format("The ancestral alignments file, '%s', could not be found", ancestralAlignmentsFile.getAbsolutePath()));
}
}
}
/**
@ -204,6 +225,8 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
}
if (tracker != null) {
String aastr = (ancestralAlignments == null) ? null : new String(ancestralAlignments.getSubsequenceAt(ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStop()).getBases());
// track sample vc
HashMap<String, HashMap<String, VariantContext>> vcs = variantEvalUtils.getVariantContexts(tracker, ref, compNames, evalNames, typesToUse != null);
@ -220,6 +243,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
// if ( eval != null ) logger.info("Keeping " + eval);
}
if (eval != null && aastr != null) {
HashMap<String, Object> newAts = new HashMap<String, Object>(eval.getAttributes());
newAts.put("ANCESTRALALLELE", aastr);
eval = VariantContext.modifyAttributes(eval, newAts);
}
HashMap<VariantStratifier, ArrayList<String>> stateMap = new HashMap<VariantStratifier, ArrayList<String>>();
for ( VariantStratifier vs : stratificationObjects ) {
ArrayList<String> states = vs.getRelevantStates(ref, tracker, comp, compName, eval, evalName, sampleName);

View File

@ -53,6 +53,8 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
public long nHomVar = 0;
@DataPoint(description = "Number of singletons")
public long nSingletons = 0;
@DataPoint(description = "Number of derived homozygotes")
public long nHomDerived = 0;
// calculations that get set in the finalizeEvaluation method
@DataPoint(description = "heterozygosity per locus rate")
@ -115,19 +117,47 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType());
}
String refStr = vc1.getReference().getBaseString().toUpperCase();
String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE").toUpperCase() : null;
// if (aaStr.equals(".")) {
// aaStr = refStr;
// }
// ref aa alt class
// A C A der homozygote
// A C C anc homozygote
// A A A ref homozygote
// A A C
// A C A
// A C C
for (Genotype g : vc1.getGenotypes().values()) {
String altStr = vc1.getAlternateAlleles().size() > 0 ? vc1.getAlternateAllele(0).getBaseString().toUpperCase() : null;
switch (g.getType()) {
case NO_CALL:
nNoCalls++;
break;
case HOM_REF:
nHomRef++;
if ( aaStr != null && altStr != null && !refStr.equalsIgnoreCase(aaStr) ) {
nHomDerived++;
}
break;
case HET:
nHets++;
break;
case HOM_VAR:
nHomVar++;
if ( aaStr != null && altStr != null && !altStr.equalsIgnoreCase(aaStr) ) {
nHomDerived++;
}
break;
default:
throw new ReviewedStingException("BUG: Unexpected genotype type: " + g);

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
import org.apache.log4j.Logger;
import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.VCFConstants;
@ -48,27 +47,13 @@ public class GenotypeConcordance extends VariantEvaluator {
protected final static Logger logger = Logger.getLogger(GenotypeConcordance.class);
// a mapping from allele count to stats
@DataPoint(description = "the frequency statistics for each allele")
FrequencyStats alleleFreqStats = new FrequencyStats();
// a mapping from sample to stats
@DataPoint(description = "the concordance statistics for each sample")
SampleStats sampleStats = null;
@DataPoint(description = "the detailed concordance statistics for each sample")
SampleStats detailedStats = null;
// a mapping from sample to stats summary
@DataPoint(description = "the concordance statistics summary for each sample")
SampleSummaryStats sampleSummaryStats = null;
// two histograms of variant quality scores, for true positive and false positive calls
@DataPoint(description = "the variant quality score histograms for true positive and false positive calls")
QualityScoreHistograms qualityScoreHistograms = null;
@DataPoint(description = "the concordance statistics summary by allele count")
ACSummaryStats alleleCountSummary = null;
@DataPoint(description = "the concordance statistics by allele count")
ACStats alleleCountStats = null;
@DataPoint(description = "the simplified concordance statistics for each sample")
SampleSummaryStats simplifiedStats = null;
private static final int MAX_MISSED_VALIDATION_DATA = 100;
@ -253,27 +238,11 @@ public class GenotypeConcordance extends VariantEvaluator {
return interesting;
}
if( qualityScoreHistograms == null ) {
qualityScoreHistograms = new QualityScoreHistograms();
}
if ( alleleCountStats == null && eval != null && validation != null && validation.getSampleNames().size() > 0) {
alleleCountStats = new ACStats(eval,validation,Genotype.Type.values().length);
alleleCountSummary = new ACSummaryStats(eval, validation);
}
if ( alleleCountStats != null ) {
// for ( int i = 0; i <= 2*validation.getGenotypes().size(); i++ ) {
// concordanceStats.put(String.format("compAC%d",i), new long[nGenotypeTypes][nGenotypeTypes]);
// rowKeys[1+2*evalvc.getGenotypes().size()+i] = String.format("compAC%d",i);
// }
}
if (sampleStats == null) {
if (detailedStats == null) {
if (eval != null) {
// initialize the concordance table
sampleStats = new SampleStats(eval,Genotype.Type.values().length);
sampleSummaryStats = new SampleSummaryStats(eval);
detailedStats = new SampleStats(eval,Genotype.Type.values().length);
simplifiedStats = new SampleSummaryStats(eval);
for (final VariantContext vc : missedValidationData) {
determineStats(null, vc);
}
@ -323,11 +292,7 @@ public class GenotypeConcordance extends VariantEvaluator {
}
}
sampleStats.incrValue(sample, truth, called);
if ( evalAC != null && validationAC != null) {
alleleCountStats.incrValue(evalAC,truth,called);
alleleCountStats.incrValue(validationAC,truth,called);
}
detailedStats.incrValue(sample, truth, called);
}
}
// otherwise, mark no-calls for all samples
@ -336,10 +301,8 @@ public class GenotypeConcordance extends VariantEvaluator {
for (final String sample : validation.getGenotypes().keySet()) {
final Genotype.Type truth = validation.getGenotype(sample).getType();
sampleStats.incrValue(sample, truth, called);
if ( evalAC != null ) {
alleleCountStats.incrValue(evalAC,truth,called);
}
detailedStats.incrValue(sample, truth, called);
// print out interesting sites
/*
if ( PRINT_INTERESTING_SITES && super.getVEWalker().gcLog != null ) {
@ -354,33 +317,6 @@ public class GenotypeConcordance extends VariantEvaluator {
}
}
// determine allele count concordance () // this is really a FN rate estimate -- CH
if (validationIsValidVC && validation.isPolymorphic()) {
int trueAlleleCount = 0;
for (final Allele a : validation.getAlternateAlleles()) {
trueAlleleCount += validation.getChromosomeCount(a);
}
if (eval != null) {
alleleFreqStats.incrementFoundCount(trueAlleleCount);
} else {
alleleFreqStats.incrementMissedCount(trueAlleleCount);
}
}
// TP & FP quality score histograms
if( eval != null && eval.isPolymorphic() && validationIsValidVC ) {
if( eval.getGenotypes().keySet().size() == 1 ) { // single sample calls
for( final String sample : eval.getGenotypes().keySet() ) { // only one sample
if( validation.hasGenotype(sample) ) {
final Genotype truth = validation.getGenotype(sample);
qualityScoreHistograms.incrValue( eval.getPhredScaledQual(), !truth.isHomRef() );
}
}
} else { // multi sample calls
qualityScoreHistograms.incrValue( eval.getPhredScaledQual(), validation.isPolymorphic() );
}
}
return interesting;
}
@ -389,16 +325,8 @@ public class GenotypeConcordance extends VariantEvaluator {
}
public void finalizeEvaluation() {
if( qualityScoreHistograms != null ) {
qualityScoreHistograms.organizeHistogramTables();
}
if( sampleSummaryStats != null && sampleStats != null ) {
sampleSummaryStats.generateSampleSummaryStats( sampleStats );
}
if ( alleleCountSummary != null && alleleCountStats != null ) {
alleleCountSummary.generateSampleSummaryStats( alleleCountStats );
if( simplifiedStats != null && detailedStats != null ) {
simplifiedStats.generateSampleSummaryStats(detailedStats);
}
}

View File

@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
import org.broadinstitute.sting.utils.BaseUtils;
@Analysis(description = "Ti/Tv Variant Evaluator")
public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEval {
@ -23,6 +24,12 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
long nTvInComp = 0;
@DataPoint(description = "the transition to transversion ratio for comp sites")
double TiTvRatioStandard = 0.0;
@DataPoint(description = "number of derived transition loci")
long nTiDerived = 0;
@DataPoint(description = "number of derived transversion loci")
long nTvDerived = 0;
@DataPoint(description = "the derived transition to transversion ratio")
double tiTvDerivedRatio = 0.0;
public boolean enabled() {
return true;
@ -41,6 +48,21 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
if (updateStandard) nTvInComp++;
else nTv++;
}
String refStr = vc.getReference().getBaseString().toUpperCase();
String aaStr = vc.getAttributeAsString("ANCESTRALALLELE").toUpperCase();
if (aaStr != null && !aaStr.equalsIgnoreCase("null") && !aaStr.equals(".")) {
BaseUtils.BaseSubstitutionType aaSubType = BaseUtils.SNPSubstitutionType(aaStr.getBytes()[0], vc.getAlternateAllele(0).getBases()[0]);
//System.out.println(refStr + " " + vc.getAttributeAsString("ANCESTRALALLELE").toUpperCase() + " " + aaSubType);
if (aaSubType == BaseUtils.BaseSubstitutionType.TRANSITION) {
nTiDerived++;
} else if (aaSubType == BaseUtils.BaseSubstitutionType.TRANSVERSION) {
nTvDerived++;
}
}
}
}
@ -55,6 +77,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
public void finalizeEvaluation() {
// the ti/tv ratio needs to be set (it's not calculated per-variant).
this.tiTvRatio = rate(nTi,nTv);
this.tiTvDerivedRatio = rate(nTiDerived,nTvDerived);
this.TiTvRatioStandard = rate(nTiInComp, nTvInComp);
}
}

View File

@ -8,11 +8,12 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatc
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Set;
import java.util.HashSet;
public class Degeneracy extends VariantStratifier {
private ArrayList<String> states;
private HashMap<String, String> degeneracies;
private HashMap<String, HashMap<Integer, String>> degeneracies;
@Override
public void initialize(Set<SortableJexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames, Set<String> contigNames) {
@ -24,29 +25,58 @@ public class Degeneracy extends VariantStratifier {
states.add("6-fold");
states.add("all");
degeneracies = new HashMap<String, String>();
degeneracies.put("Ile", "3-fold");
degeneracies.put("Leu", "6-fold");
degeneracies.put("Val", "4-fold");
degeneracies.put("Phe", "2-fold");
degeneracies.put("Met", "1-fold");
degeneracies.put("Cys", "2-fold");
degeneracies.put("Ala", "4-fold");
degeneracies.put("Gly", "4-fold");
degeneracies.put("Pro", "4-fold");
degeneracies.put("Thr", "4-fold");
degeneracies.put("Ser", "6-fold");
degeneracies.put("Tyr", "2-fold");
degeneracies.put("Try", "1-fold");
degeneracies.put("Trp", "1-fold");
degeneracies.put("Gln", "2-fold");
degeneracies.put("Asn", "2-fold");
degeneracies.put("His", "2-fold");
degeneracies.put("Glu", "2-fold");
degeneracies.put("Asp", "2-fold");
degeneracies.put("Lys", "2-fold");
degeneracies.put("Arg", "6-fold");
degeneracies.put("Stop", "3-fold");
HashMap<String, String[]> aminoAcids = new HashMap<String, String[]>();
aminoAcids.put("Ile", new String[]{"ATT", "ATC", "ATA"});
aminoAcids.put("Leu", new String[]{"CTT", "CTC", "CTA", "CTG", "TTA", "TTG"});
aminoAcids.put("Val", new String[]{"GTT", "GTC", "GTA", "GTG"});
aminoAcids.put("Phe", new String[]{"TTT", "TTC"});
aminoAcids.put("Met", new String[]{"ATG"});
aminoAcids.put("Cys", new String[]{"TGT", "TGC"});
aminoAcids.put("Ala", new String[]{"GCT", "GCC", "GCA", "GCG"});
aminoAcids.put("Gly", new String[]{"GGT", "GGC", "GGA", "GGG"});
aminoAcids.put("Pro", new String[]{"CCT", "CCC", "CCA", "CCG"});
aminoAcids.put("Thr", new String[]{"ACT", "ACC", "ACA", "ACG"});
aminoAcids.put("Ser", new String[]{"TCT", "TCC", "TCA", "TCG", "AGT", "AGC"});
aminoAcids.put("Tyr", new String[]{"TAT", "TAC"});
aminoAcids.put("Trp", new String[]{"TGG"});
aminoAcids.put("Glu", new String[]{"CAA", "CAG"});
aminoAcids.put("Asn", new String[]{"AAT", "AAC"});
aminoAcids.put("His", new String[]{"CAT", "CAC"});
aminoAcids.put("Gln", new String[]{"GAA", "GAG"});
aminoAcids.put("Asp", new String[]{"GAT", "GAC"});
aminoAcids.put("Lys", new String[]{"AAA", "AAG"});
aminoAcids.put("Arg", new String[]{"CGT", "CGC", "CGA", "CGG", "AGA", "AGG"});
aminoAcids.put("Stop", new String[]{"TAA", "TAG", "TGA"});
degeneracies = new HashMap<String, HashMap<Integer, String>>();
for (String aminoAcid : aminoAcids.keySet()) {
String[] codons = aminoAcids.get(aminoAcid);
for (int pos = 0; pos < 3; pos++) {
HashSet<Character> alleles = new HashSet<Character>();
for (String codon : codons) {
alleles.add(codon.charAt(pos));
}
String degeneracy;
switch (alleles.size()) {
case 1: degeneracy = "1-fold"; break;
case 2: degeneracy = "2-fold"; break;
case 3: degeneracy = "3-fold"; break;
case 4: degeneracy = "4-fold"; break;
case 6: degeneracy = "6-fold"; break;
default: degeneracy = "1-fold"; break;
}
if (!degeneracies.containsKey(aminoAcid)) {
degeneracies.put(aminoAcid, new HashMap<Integer, String>());
}
degeneracies.get(aminoAcid).put(pos, degeneracy);
}
}
}
public ArrayList<String> getAllStates() {
@ -61,9 +91,11 @@ public class Degeneracy extends VariantStratifier {
if (eval != null && eval.isVariant()) {
String type = null;
String aa = null;
Integer frame = null;
if (eval.hasAttribute("refseq.functionalClass")) {
aa = eval.getAttributeAsString("refseq.variantAA");
frame = eval.getAttributeAsInt("refseq.frame");
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
int annotationId = 1;
String key;
@ -82,14 +114,22 @@ public class Degeneracy extends VariantStratifier {
String aakey = String.format("refseq.variantAA_%d", annotationId);
aa = eval.getAttributeAsString(aakey);
if (aa != null) {
String framekey = String.format("refseq.frame_%d", annotationId);
if (eval.hasAttribute(framekey)) {
frame = eval.getAttributeAsInt(framekey);
}
}
}
annotationId++;
} while (eval.hasAttribute(key));
}
if (aa != null && degeneracies.containsKey(aa)) {
relevantStates.add(degeneracies.get(aa));
if (aa != null && degeneracies.containsKey(aa) && frame != null) {
relevantStates.add(degeneracies.get(aa).get(frame));
}
}

View File

@ -32,19 +32,6 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test
public void testFundamentalsCountVariantsSNPsAndIndels() {
// nProcessedLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38
// nCalledLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -c PASS = 9
// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 4
// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 5
// variantRate = nVariantLoci / nProcessedLoci = 0.131578947
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 3
// nInsertions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 1
// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1
// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 4
// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 8
// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 10
// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T VariantEval",
@ -58,25 +45,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("e4545f524cc8386079dc9190de5d9bcc")
Arrays.asList("48b8417c1f8bd74ff7b9808580abd2a2")
);
executeTest("testFundamentalsCountVariantsSNPsandIndels", spec);
}
@Test
public void testFundamentalsCountVariantsSNPsAndIndelsWithNovelty() {
// nProcessedLociKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38
// nCalledLociKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }'= 3
// nVariantLociKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | wc -l = 3
// variantRateKnown = nVariantLoci / nProcessedLoci = 0.0789473684
// nSNPsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != "." && length($5) == 1) print $0 }' | wc -l = 3
// nInsertionsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 0
// nDeletionsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 0
// nNoCallsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 0
// nHetsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 3
// nHomRefKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 1
// nHomVarKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T VariantEval",
@ -91,25 +66,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("3dbefb800e432fdd237d6c57e4456352")
Arrays.asList("86d45ecefdf5849c55b3ca8f82a3d525")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec);
}
@Test
public void testFundamentalsCountVariantsSNPsAndIndelsWithNoveltyAndFilter() {
// nProcessedLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38
// nCalledLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -vc PASS = 3
// nRefLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 1
// nVariantLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 2
// nSNPsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 1
// nInsertionsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 0
// nDeletionsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1
// nNoCallsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 3
// nHetsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 1
// nHomRefFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 2
// nHomVarFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 3
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T VariantEval",
@ -125,25 +88,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("9fafed19a700a7d4bd7aaed2dcad37be")
Arrays.asList("3d18901ec1766aa2e748eac913f5ddcd")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec);
}
@Test
public void testFundamentalsCountVariantsSNPsAndIndelsWithCpG() {
// nProcessedLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38
// nCalledLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep -c PASS = 8
// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 3
// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 5
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 3
// nInsertions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 1
// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1
// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 4
// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 8
// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 10
// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T VariantEval",
@ -158,7 +109,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("09ed51c3d5ac2099ded1d0e8cf8ee183")
Arrays.asList("677fe398643e62a10d6739d36a720a12")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec);
}
@ -179,7 +130,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("9d233d3d8cec8e580acb98b1a2725b56")
Arrays.asList("5fb44fd7cb00941c986a9941e43e44cd")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec);
}
@ -200,22 +151,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("850094f32657f04cb958891de4cfc5b2")
Arrays.asList("daaca7ef3b7313e5af217cbc6f37c9e2")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec);
}
@Test
public void testFundamentalsCountVariantsSNPsAndIndelsWithSample() {
// HG00513
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if (length($4) == 1 && length($5) == 1) print $0 }' | awk '{ print $10 }' | grep -v '0/0' | grep -v '\.\/\.' | wc -l = 3
// nInsertions = $ grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if (length($4) == 1 && length($5) > 1) print $0 }' | awk '{ print $10 }' | grep -v '0/0' | grep -v '\.\/\.' | wc -l = 1
// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if (length($4) > 1 && length($5) == 1) print $0 }' | awk '{ print $10 }' | grep -v '0/0' | grep -v '\.\/\.' | wc -l = 0
// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 2
// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "0/1") print $0 }' | wc -l = 2
// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "0/0") print $0 }' | wc -l = 3
// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "1/1") print $0 }' | wc -l = 2
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T VariantEval",
@ -230,21 +172,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("cea071b8b0ebd8f138ba91375edf036e")
Arrays.asList("97c466f8ffd0fcf2c30ef08669d213d9")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec);
}
@Test
public void testFundamentalsCountVariantsSNPsAndIndelsWithJexlExpression() {
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $0 }' | wc -l = 7
// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 == ".") print $0 }' | wc -l = 4
// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 != ".") print $0 }' | wc -l = 3
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && length($5) == 1 && length($6) == 1 && $6 != ".") print $0 }' | wc -l = 3
// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/0' = 9
// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/1' = 3
// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '1/1' = 5
// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '\.\/\.' = 4
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T VariantEval",
@ -261,21 +195,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("e2f505046251e19b5737f4999c896fe2")
Arrays.asList("df8cdfcf3d0c2fc795812c6eae6a76f8")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec);
}
@Test
public void testFundamentalsCountVariantsSNPsAndIndelsWithMultipleJexlExpressions() {
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $0 }' | wc -l = 7
// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 == ".") print $0 }' | wc -l = 4
// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 != ".") print $0 }' | wc -l = 3
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && length($5) == 1 && length($6) == 1 && $6 != ".") print $0 }' | wc -l = 3
// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/0' = 9
// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/1' = 3
// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '1/1' = 5
// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '\.\/\.' = 4
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T VariantEval",
@ -294,26 +220,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("8b97fe8e5e75efe08c080bbf47960c8f")
Arrays.asList("c7aed12265e2b2311d17a0cc8a29f6aa")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec);
}
@Test
public void testFundamentalsCountVariantsNoCompRod() {
// nProcessedLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38
// nCalledLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -c PASS = 9
// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 4
// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 5
// variantRate = nVariantLoci / nProcessedLoci = 0.131578947
// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 3
// nInsertions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 1
// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1
// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 4
// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 8
// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 10
// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T VariantEval",
@ -326,7 +239,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("1687108ed96d1127b196c2d74cf80a49")
Arrays.asList("d44c8f44384189a09eea85a8e89d7299")
);
executeTest("testFundamentalsCountVariantsNoCompRod", spec);
}
@ -336,84 +249,26 @@ public class VariantEvalIntegrationTest extends WalkerTest {
String extraArgs = "-L 1:1-10,000,000";
for (String tests : testsEnumerations) {
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
1, Arrays.asList("ca71324abf5659964c8f9e28b8fdbb28"));
1, Arrays.asList("cdbe47ea01b9dd79ff1c5ce6f5fa8bec"));
executeTestParallel("testSelect1", spec);
//executeTest("testSelect1", spec);
}
}
// @Test
// public void testSelect2() {
// String extraArgs = "-L 1:1-10,000,000";
// WalkerTestSpec spec = new WalkerTestSpec( withSelect(withSelect(root, "DP < 50", "DP50"), "set==\"Intersection\"", "intersection") + " " + extraArgs + " -o %s",
// 1, Arrays.asList(""));
// //executeTestParallel("testSelect2", spec);
// executeTest("testSelect2", spec);
// }
@Test
public void testVEGenotypeConcordance() {
String vcfFiles[] = {"GenotypeConcordanceEval.vcf", "GenotypeConcordanceEval.vcf.gz"};
for (String vcfFile : vcfFiles) {
WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s",
1,
Arrays.asList("732d32997b19d9c4f0291287858c56d2"));
executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
//executeTest("testVEGenotypeConcordance" + vcfFile, spec);
}
String vcfFile = "GenotypeConcordanceEval.vcf";
}
@Test
public void testVESimple() {
HashMap<String, String> expectations = new HashMap<String, String>();
expectations.put("-L 1:1-10,000,000 -ST CpG", "c74067360656519f769f805d6e1ef36b");
expectations.put("-L 1:1-10,000,000 -ST CpG -family NA19238+NA19239=NA19240 -mvq 0 -EV MendelianViolationEvaluator", "0fe151e00ab35f3b95d4fea651592ec3");
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
String extraArgs = entry.getKey();
String md5 = entry.getValue();
for (String tests : testsEnumerations) {
WalkerTestSpec spec = new WalkerTestSpec( tests + " " + extraArgs + " -o %s",
1, // just one output file
Arrays.asList(md5));
executeTestParallel("testVESimple", spec);
//executeTest("testVESimple", spec);
}
}
}
@Test
public void testVEComplex() {
HashMap<String, String> expectations = new HashMap<String, String>();
String extraArgs1 = "-L " + validationDataLocation + "chr1_b36_pilot3.interval_list -family NA19238+NA19239=NA19240 -mvq 30 -EV MendelianViolationEvaluator -ST CpG" +
" -B:dbsnp_130,dbSNP " + GATKDataLocation + "dbsnp_130_b36.rod" +
" -B:comp_hapmap,VCF3 " + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
expectations.put("", "700eba07bac9fba4ed963bbbdcab0e29");
expectations.put(" -knownName comp_hapmap -knownName dbsnp", "c2464d5613072fb326ebffcf3078ae31");
expectations.put(" -knownName comp_hapmap", "0912f35a4c4179ff93b152d8c4e009e2");
for (String tests : testsEnumerations) {
for (Map.Entry<String, String> entry : expectations.entrySet()) {
String extraArgs2 = entry.getKey();
String md5 = entry.getValue();
WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs1 + extraArgs2 + " -o %s",
1, // just one output file
Arrays.asList(md5));
executeTestParallel("testVEComplex", spec);
//executeTest("testVEComplex", spec);
}
}
WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s",
1,
Arrays.asList("e4c981f7f5d78680c71310fc9be9a1c1"));
executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
}
@Test
public void testCompVsEvalAC() {
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("e68272d2f3b7a6439c4949cf0e34beeb"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("162daa5039e1965eb2423a8589339a69"));
executeTestParallel("testCompVsEvalAC",spec);
//executeTest("testCompVsEvalAC",spec);
}
private static String withSelect(String cmd, String select, String name) {
@ -423,9 +278,8 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test
public void testTranches() {
String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("4c5ef7c142427a85d1b9b1c9fe8fd3c2"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("90cd98044e754b80034a9f4e6d2c55b9"));
executeTestParallel("testTranches",spec);
//executeTest("testTranches",spec);
}
@Test
@ -433,7 +287,6 @@ public class VariantEvalIntegrationTest extends WalkerTest {
String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("70aa420929de7f888a6f48c2d01bbcda"));
executeTestParallel("testCompOverlap",spec);
//executeTest("testCompOverlap",spec);
}
@Test
@ -446,7 +299,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" -D " + dbsnp +
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("923af856d04042c0ee9d01aa9eb8675a"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("5b1fc9a4066aca61f1b5f7b933ad37d9"));
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
}
@ -460,7 +313,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("45c08af95777e1eee3e9acbf136a8b6b"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6d902d9d4d8fef5219a43e416a51cee6"));
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
}
@ -520,57 +373,4 @@ public class VariantEvalIntegrationTest extends WalkerTest {
);
executeTestParallel("testPerSampleAndSubsettedSampleHaveSameResults-onesample", spec2);
}
// @Test
// public void testVEGenomicallyAnnotated() {
// String vecmd = "-T VariantEval" +
// " -R " + b36KGReference +
// " -L 21" +
// " -D " + GATKDataLocation + "dbsnp_129_b36.rod" +
// " -EV CountFunctionalClasses -noEV" +
// " -B:eval,VCF " + validationDataLocation + "test.filtered.maf_annotated.vcf" +
// " -o %s";
// String md5 = "";
//
// WalkerTestSpec spec = new WalkerTestSpec(vecmd, 1, Arrays.asList(md5));
// executeTestParallel("testVEGenomicallyAnnotated", spec);
// //executeTest("testVEGenomicallyAnnotated", spec);
// }
//
// @Test
// public void testVEWriteVCF() {
// String extraArgs = "-L 1:1-10,000,000 -NO_HEADER -family NA19238+NA19239=NA19240 -mvq 30 -EV MendelianViolationEvaluator";
// for (String tests : testsEnumerations) {
// WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs + " -o %s -outputVCF %s -NO_HEADER",
// 2,
// Arrays.asList("50321436a65ef7d574286cb0a1c55f7e", "d4bdd06ed5cb1aff1dfee8b69d5d17b8"));
// executeTestParallel("testVEWriteVCF", spec);
// //executeTest("testVEWriteVCF", spec);
// }
// }
//
// @Test
// public void testVEValidatePass() {
// String extraArgs = "-L 1:1-10,000,000";
// for (String tests : testsEnumerations) {
// WalkerTestSpec spec = new WalkerTestSpec(withValidateTiTv(withSelect(tests, "DP < 50", "DP50"), 1.0, 4.0) + " " + extraArgs + " -o %s",
// 1, Arrays.asList("8a0203f0533b628ad7f1f230a43f105f"));
// executeTestParallel("testVEValidatePass", spec);
// }
// }
//
// @Test
// public void testVEValidateFail() {
// String extraArgs = "-L 1:1-10,000,000";
// for (String tests : testsEnumerations) {
// WalkerTestSpec spec = new WalkerTestSpec(withValidateTiTv(withSelect(tests, "DP < 50", "DP50"), 1.0, 1.2) + " " + extraArgs + " -o %s",
// 1, UserException.class);
// executeTestParallel("testVEValidateFail", spec);
// }
// }
//
// private static String withValidateTiTv(String cmd, double min, double max) {
// return String.format("%s -validate 'eval.comp_genotypes.all.called.all.titv.tiTvRatio >= %2$s' -validate 'eval.comp_genotypes.all.called.all.titv.tiTvRatio <= %3$s'", cmd, min, max);
// }
}