From 49b021d435aba82606b78f5faa9980f8db45682b Mon Sep 17 00:00:00 2001 From: kiran Date: Wed, 8 Jun 2011 15:07:31 +0000 Subject: [PATCH] Changed the definition of degeneracy (it's at the site level - degeneracy of a position in a codon, not degeneracy of the amino acid itself like I initially thought. Added the ability to supply an ancestral allele track (available in /humgen/gsa-hpprojects/GATK/data/Ancestor/). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5963 348d0f76-0448-11de-a6fe-93d51630548a --- .../varianteval/VariantEvalWalker.java | 32 ++- .../varianteval/evaluators/CountVariants.java | 30 +++ .../evaluators/GenotypeConcordance.java | 96 +------ .../evaluators/TiTvVariantEvaluator.java | 23 ++ .../stratifications/Degeneracy.java | 92 +++++-- .../VariantEvalIntegrationTest.java | 240 ++---------------- 6 files changed, 182 insertions(+), 331 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 0f5e93f46..3ac363bab 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -30,6 +30,13 @@ import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; import org.broadinstitute.sting.utils.vcf.VCFUtils; +import net.sf.picard.reference.FastaSequenceFile; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; +import org.broadinstitute.sting.gatk.walkers.fasta.FastaSequence; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import net.sf.picard.reference.ReferenceSequence; +import java.io.FileNotFoundException; import java.io.File; import java.io.PrintStream; @@ -95,6 +102,9 @@ public class VariantEvalWalker extends RodWalker implements Tr @Argument(fullName="tranchesFile", shortName="tf", doc="The input tranches file describing where to cut the data", required=false) private String TRANCHE_FILENAME = null; + @Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false) + private File ancestralAlignmentsFile = null; + // Variables private Set jexlExpressions = new TreeSet(); private Set compNames = new TreeSet(); @@ -120,6 +130,9 @@ public class VariantEvalWalker extends RodWalker implements Tr // Utility class private final VariantEvalUtils variantEvalUtils = new VariantEvalUtils(this); + // Ancestral alignments + private IndexedFastaSequenceFile ancestralAlignments = null; + /** * Initialize the stratifications, evaluations, evaluation contexts, and reporting object */ @@ -165,7 +178,6 @@ public class VariantEvalWalker extends RodWalker implements Tr sampleNamesForStratification.add(ALL_SAMPLE_NAME); // Initialize select expressions - //jexlExpressions.addAll(VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)); for (VariantContextUtils.JexlVCMatchExp jexl : VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) { SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp); jexlExpressions.add(sjexl); @@ -190,6 +202,15 @@ public class VariantEvalWalker extends RodWalker implements Tr // Initialize report table report = variantEvalUtils.initializeGATKReport(stratificationObjects, evaluationObjects); + + // Load ancestral alignments + if (ancestralAlignmentsFile != null) { + try { + ancestralAlignments = new IndexedFastaSequenceFile(ancestralAlignmentsFile); + } catch (FileNotFoundException e) { + throw new ReviewedStingException(String.format("The ancestral alignments file, '%s', could not be found", ancestralAlignmentsFile.getAbsolutePath())); + } + } } /** @@ -204,6 +225,8 @@ public class VariantEvalWalker extends RodWalker implements Tr } if (tracker != null) { + String aastr = (ancestralAlignments == null) ? null : new String(ancestralAlignments.getSubsequenceAt(ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStop()).getBases()); + // track sample vc HashMap> vcs = variantEvalUtils.getVariantContexts(tracker, ref, compNames, evalNames, typesToUse != null); @@ -220,6 +243,13 @@ public class VariantEvalWalker extends RodWalker implements Tr // if ( eval != null ) logger.info("Keeping " + eval); } + if (eval != null && aastr != null) { + HashMap newAts = new HashMap(eval.getAttributes()); + newAts.put("ANCESTRALALLELE", aastr); + + eval = VariantContext.modifyAttributes(eval, newAts); + } + HashMap> stateMap = new HashMap>(); for ( VariantStratifier vs : stratificationObjects ) { ArrayList states = vs.getRelevantStates(ref, tracker, comp, compName, eval, evalName, sampleName); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java index d99dc7ac7..db4a11960 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java @@ -53,6 +53,8 @@ public class CountVariants extends VariantEvaluator implements StandardEval { public long nHomVar = 0; @DataPoint(description = "Number of singletons") public long nSingletons = 0; + @DataPoint(description = "Number of derived homozygotes") + public long nHomDerived = 0; // calculations that get set in the finalizeEvaluation method @DataPoint(description = "heterozygosity per locus rate") @@ -115,19 +117,47 @@ public class CountVariants extends VariantEvaluator implements StandardEval { throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType()); } + String refStr = vc1.getReference().getBaseString().toUpperCase(); + + String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE").toUpperCase() : null; +// if (aaStr.equals(".")) { +// aaStr = refStr; +// } + + // ref aa alt class + // A C A der homozygote + // A C C anc homozygote + + // A A A ref homozygote + // A A C + // A C A + // A C C + for (Genotype g : vc1.getGenotypes().values()) { + String altStr = vc1.getAlternateAlleles().size() > 0 ? vc1.getAlternateAllele(0).getBaseString().toUpperCase() : null; + switch (g.getType()) { case NO_CALL: nNoCalls++; break; case HOM_REF: nHomRef++; + + if ( aaStr != null && altStr != null && !refStr.equalsIgnoreCase(aaStr) ) { + nHomDerived++; + } + break; case HET: nHets++; break; case HOM_VAR: nHomVar++; + + if ( aaStr != null && altStr != null && !altStr.equalsIgnoreCase(aaStr) ) { + nHomDerived++; + } + break; default: throw new ReviewedStingException("BUG: Unexpected genotype type: " + g); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java index 890682f02..ad8bfbbe4 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; import org.apache.log4j.Logger; -import org.broad.tribble.util.variantcontext.Allele; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFConstants; @@ -48,27 +47,13 @@ public class GenotypeConcordance extends VariantEvaluator { protected final static Logger logger = Logger.getLogger(GenotypeConcordance.class); - // a mapping from allele count to stats - @DataPoint(description = "the frequency statistics for each allele") - FrequencyStats alleleFreqStats = new FrequencyStats(); - // a mapping from sample to stats - @DataPoint(description = "the concordance statistics for each sample") - SampleStats sampleStats = null; + @DataPoint(description = "the detailed concordance statistics for each sample") + SampleStats detailedStats = null; // a mapping from sample to stats summary - @DataPoint(description = "the concordance statistics summary for each sample") - SampleSummaryStats sampleSummaryStats = null; - - // two histograms of variant quality scores, for true positive and false positive calls - @DataPoint(description = "the variant quality score histograms for true positive and false positive calls") - QualityScoreHistograms qualityScoreHistograms = null; - - @DataPoint(description = "the concordance statistics summary by allele count") - ACSummaryStats alleleCountSummary = null; - - @DataPoint(description = "the concordance statistics by allele count") - ACStats alleleCountStats = null; + @DataPoint(description = "the simplified concordance statistics for each sample") + SampleSummaryStats simplifiedStats = null; private static final int MAX_MISSED_VALIDATION_DATA = 100; @@ -253,27 +238,11 @@ public class GenotypeConcordance extends VariantEvaluator { return interesting; } - if( qualityScoreHistograms == null ) { - qualityScoreHistograms = new QualityScoreHistograms(); - } - - if ( alleleCountStats == null && eval != null && validation != null && validation.getSampleNames().size() > 0) { - alleleCountStats = new ACStats(eval,validation,Genotype.Type.values().length); - alleleCountSummary = new ACSummaryStats(eval, validation); - } - - if ( alleleCountStats != null ) { -// for ( int i = 0; i <= 2*validation.getGenotypes().size(); i++ ) { -// concordanceStats.put(String.format("compAC%d",i), new long[nGenotypeTypes][nGenotypeTypes]); -// rowKeys[1+2*evalvc.getGenotypes().size()+i] = String.format("compAC%d",i); -// } - } - - if (sampleStats == null) { + if (detailedStats == null) { if (eval != null) { // initialize the concordance table - sampleStats = new SampleStats(eval,Genotype.Type.values().length); - sampleSummaryStats = new SampleSummaryStats(eval); + detailedStats = new SampleStats(eval,Genotype.Type.values().length); + simplifiedStats = new SampleSummaryStats(eval); for (final VariantContext vc : missedValidationData) { determineStats(null, vc); } @@ -323,11 +292,7 @@ public class GenotypeConcordance extends VariantEvaluator { } } - sampleStats.incrValue(sample, truth, called); - if ( evalAC != null && validationAC != null) { - alleleCountStats.incrValue(evalAC,truth,called); - alleleCountStats.incrValue(validationAC,truth,called); - } + detailedStats.incrValue(sample, truth, called); } } // otherwise, mark no-calls for all samples @@ -336,10 +301,8 @@ public class GenotypeConcordance extends VariantEvaluator { for (final String sample : validation.getGenotypes().keySet()) { final Genotype.Type truth = validation.getGenotype(sample).getType(); - sampleStats.incrValue(sample, truth, called); - if ( evalAC != null ) { - alleleCountStats.incrValue(evalAC,truth,called); - } + detailedStats.incrValue(sample, truth, called); + // print out interesting sites /* if ( PRINT_INTERESTING_SITES && super.getVEWalker().gcLog != null ) { @@ -354,33 +317,6 @@ public class GenotypeConcordance extends VariantEvaluator { } } - // determine allele count concordance () // this is really a FN rate estimate -- CH - if (validationIsValidVC && validation.isPolymorphic()) { - int trueAlleleCount = 0; - for (final Allele a : validation.getAlternateAlleles()) { - trueAlleleCount += validation.getChromosomeCount(a); - } - if (eval != null) { - alleleFreqStats.incrementFoundCount(trueAlleleCount); - } else { - alleleFreqStats.incrementMissedCount(trueAlleleCount); - } - } - - // TP & FP quality score histograms - if( eval != null && eval.isPolymorphic() && validationIsValidVC ) { - if( eval.getGenotypes().keySet().size() == 1 ) { // single sample calls - for( final String sample : eval.getGenotypes().keySet() ) { // only one sample - if( validation.hasGenotype(sample) ) { - final Genotype truth = validation.getGenotype(sample); - qualityScoreHistograms.incrValue( eval.getPhredScaledQual(), !truth.isHomRef() ); - } - } - } else { // multi sample calls - qualityScoreHistograms.incrValue( eval.getPhredScaledQual(), validation.isPolymorphic() ); - } - } - return interesting; } @@ -389,16 +325,8 @@ public class GenotypeConcordance extends VariantEvaluator { } public void finalizeEvaluation() { - if( qualityScoreHistograms != null ) { - qualityScoreHistograms.organizeHistogramTables(); - } - - if( sampleSummaryStats != null && sampleStats != null ) { - sampleSummaryStats.generateSampleSummaryStats( sampleStats ); - } - - if ( alleleCountSummary != null && alleleCountStats != null ) { - alleleCountSummary.generateSampleSummaryStats( alleleCountStats ); + if( simplifiedStats != null && detailedStats != null ) { + simplifiedStats.generateSampleSummaryStats(detailedStats); } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java index be101c697..99b4daec8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java @@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint; +import org.broadinstitute.sting.utils.BaseUtils; @Analysis(description = "Ti/Tv Variant Evaluator") public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEval { @@ -23,6 +24,12 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv long nTvInComp = 0; @DataPoint(description = "the transition to transversion ratio for comp sites") double TiTvRatioStandard = 0.0; + @DataPoint(description = "number of derived transition loci") + long nTiDerived = 0; + @DataPoint(description = "number of derived transversion loci") + long nTvDerived = 0; + @DataPoint(description = "the derived transition to transversion ratio") + double tiTvDerivedRatio = 0.0; public boolean enabled() { return true; @@ -41,6 +48,21 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv if (updateStandard) nTvInComp++; else nTv++; } + + String refStr = vc.getReference().getBaseString().toUpperCase(); + String aaStr = vc.getAttributeAsString("ANCESTRALALLELE").toUpperCase(); + + if (aaStr != null && !aaStr.equalsIgnoreCase("null") && !aaStr.equals(".")) { + BaseUtils.BaseSubstitutionType aaSubType = BaseUtils.SNPSubstitutionType(aaStr.getBytes()[0], vc.getAlternateAllele(0).getBases()[0]); + + //System.out.println(refStr + " " + vc.getAttributeAsString("ANCESTRALALLELE").toUpperCase() + " " + aaSubType); + + if (aaSubType == BaseUtils.BaseSubstitutionType.TRANSITION) { + nTiDerived++; + } else if (aaSubType == BaseUtils.BaseSubstitutionType.TRANSVERSION) { + nTvDerived++; + } + } } } @@ -55,6 +77,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv public void finalizeEvaluation() { // the ti/tv ratio needs to be set (it's not calculated per-variant). this.tiTvRatio = rate(nTi,nTv); + this.tiTvDerivedRatio = rate(nTiDerived,nTvDerived); this.TiTvRatioStandard = rate(nTiInComp, nTvInComp); } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java index f47905b61..7c41ea58e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java @@ -8,11 +8,12 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatc import java.util.ArrayList; import java.util.HashMap; import java.util.Set; +import java.util.HashSet; public class Degeneracy extends VariantStratifier { private ArrayList states; - private HashMap degeneracies; + private HashMap> degeneracies; @Override public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames, Set contigNames) { @@ -24,29 +25,58 @@ public class Degeneracy extends VariantStratifier { states.add("6-fold"); states.add("all"); - degeneracies = new HashMap(); - degeneracies.put("Ile", "3-fold"); - degeneracies.put("Leu", "6-fold"); - degeneracies.put("Val", "4-fold"); - degeneracies.put("Phe", "2-fold"); - degeneracies.put("Met", "1-fold"); - degeneracies.put("Cys", "2-fold"); - degeneracies.put("Ala", "4-fold"); - degeneracies.put("Gly", "4-fold"); - degeneracies.put("Pro", "4-fold"); - degeneracies.put("Thr", "4-fold"); - degeneracies.put("Ser", "6-fold"); - degeneracies.put("Tyr", "2-fold"); - degeneracies.put("Try", "1-fold"); - degeneracies.put("Trp", "1-fold"); - degeneracies.put("Gln", "2-fold"); - degeneracies.put("Asn", "2-fold"); - degeneracies.put("His", "2-fold"); - degeneracies.put("Glu", "2-fold"); - degeneracies.put("Asp", "2-fold"); - degeneracies.put("Lys", "2-fold"); - degeneracies.put("Arg", "6-fold"); - degeneracies.put("Stop", "3-fold"); + HashMap aminoAcids = new HashMap(); + aminoAcids.put("Ile", new String[]{"ATT", "ATC", "ATA"}); + aminoAcids.put("Leu", new String[]{"CTT", "CTC", "CTA", "CTG", "TTA", "TTG"}); + aminoAcids.put("Val", new String[]{"GTT", "GTC", "GTA", "GTG"}); + aminoAcids.put("Phe", new String[]{"TTT", "TTC"}); + aminoAcids.put("Met", new String[]{"ATG"}); + aminoAcids.put("Cys", new String[]{"TGT", "TGC"}); + aminoAcids.put("Ala", new String[]{"GCT", "GCC", "GCA", "GCG"}); + aminoAcids.put("Gly", new String[]{"GGT", "GGC", "GGA", "GGG"}); + aminoAcids.put("Pro", new String[]{"CCT", "CCC", "CCA", "CCG"}); + aminoAcids.put("Thr", new String[]{"ACT", "ACC", "ACA", "ACG"}); + aminoAcids.put("Ser", new String[]{"TCT", "TCC", "TCA", "TCG", "AGT", "AGC"}); + aminoAcids.put("Tyr", new String[]{"TAT", "TAC"}); + aminoAcids.put("Trp", new String[]{"TGG"}); + aminoAcids.put("Glu", new String[]{"CAA", "CAG"}); + aminoAcids.put("Asn", new String[]{"AAT", "AAC"}); + aminoAcids.put("His", new String[]{"CAT", "CAC"}); + aminoAcids.put("Gln", new String[]{"GAA", "GAG"}); + aminoAcids.put("Asp", new String[]{"GAT", "GAC"}); + aminoAcids.put("Lys", new String[]{"AAA", "AAG"}); + aminoAcids.put("Arg", new String[]{"CGT", "CGC", "CGA", "CGG", "AGA", "AGG"}); + aminoAcids.put("Stop", new String[]{"TAA", "TAG", "TGA"}); + + degeneracies = new HashMap>(); + + for (String aminoAcid : aminoAcids.keySet()) { + String[] codons = aminoAcids.get(aminoAcid); + + for (int pos = 0; pos < 3; pos++) { + HashSet alleles = new HashSet(); + + for (String codon : codons) { + alleles.add(codon.charAt(pos)); + } + + String degeneracy; + switch (alleles.size()) { + case 1: degeneracy = "1-fold"; break; + case 2: degeneracy = "2-fold"; break; + case 3: degeneracy = "3-fold"; break; + case 4: degeneracy = "4-fold"; break; + case 6: degeneracy = "6-fold"; break; + default: degeneracy = "1-fold"; break; + } + + if (!degeneracies.containsKey(aminoAcid)) { + degeneracies.put(aminoAcid, new HashMap()); + } + + degeneracies.get(aminoAcid).put(pos, degeneracy); + } + } } public ArrayList getAllStates() { @@ -61,9 +91,11 @@ public class Degeneracy extends VariantStratifier { if (eval != null && eval.isVariant()) { String type = null; String aa = null; + Integer frame = null; if (eval.hasAttribute("refseq.functionalClass")) { aa = eval.getAttributeAsString("refseq.variantAA"); + frame = eval.getAttributeAsInt("refseq.frame"); } else if (eval.hasAttribute("refseq.functionalClass_1")) { int annotationId = 1; String key; @@ -82,14 +114,22 @@ public class Degeneracy extends VariantStratifier { String aakey = String.format("refseq.variantAA_%d", annotationId); aa = eval.getAttributeAsString(aakey); + + if (aa != null) { + String framekey = String.format("refseq.frame_%d", annotationId); + + if (eval.hasAttribute(framekey)) { + frame = eval.getAttributeAsInt(framekey); + } + } } annotationId++; } while (eval.hasAttribute(key)); } - if (aa != null && degeneracies.containsKey(aa)) { - relevantStates.add(degeneracies.get(aa)); + if (aa != null && degeneracies.containsKey(aa) && frame != null) { + relevantStates.add(degeneracies.get(aa).get(frame)); } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 0f046f96a..38efd50ef 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -32,19 +32,6 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testFundamentalsCountVariantsSNPsAndIndels() { -// nProcessedLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38 -// nCalledLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -c PASS = 9 -// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 4 -// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 5 -// variantRate = nVariantLoci / nProcessedLoci = 0.131578947 -// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 3 -// nInsertions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 1 -// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1 -// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 4 -// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 8 -// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 10 -// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5 - WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( "-T VariantEval", @@ -58,25 +45,13 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("e4545f524cc8386079dc9190de5d9bcc") + Arrays.asList("48b8417c1f8bd74ff7b9808580abd2a2") ); executeTest("testFundamentalsCountVariantsSNPsandIndels", spec); } @Test public void testFundamentalsCountVariantsSNPsAndIndelsWithNovelty() { -// nProcessedLociKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38 -// nCalledLociKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }'= 3 -// nVariantLociKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | wc -l = 3 -// variantRateKnown = nVariantLoci / nProcessedLoci = 0.0789473684 -// nSNPsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != "." && length($5) == 1) print $0 }' | wc -l = 3 -// nInsertionsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 0 -// nDeletionsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 0 -// nNoCallsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 0 -// nHetsKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 3 -// nHomRefKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 1 -// nHomVarKnown = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | grep DBSNP129 | awk '{ if ($5 != ".") print $0 }' | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5 - WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( "-T VariantEval", @@ -91,25 +66,13 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("3dbefb800e432fdd237d6c57e4456352") + Arrays.asList("86d45ecefdf5849c55b3ca8f82a3d525") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); } @Test public void testFundamentalsCountVariantsSNPsAndIndelsWithNoveltyAndFilter() { -// nProcessedLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38 -// nCalledLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -vc PASS = 3 -// nRefLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 1 -// nVariantLociFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 2 -// nSNPsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 1 -// nInsertionsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 0 -// nDeletionsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1 -// nNoCallsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 3 -// nHetsFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 1 -// nHomRefFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 2 -// nHomVarFail = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -v PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 3 - WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( "-T VariantEval", @@ -125,25 +88,13 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("9fafed19a700a7d4bd7aaed2dcad37be") + Arrays.asList("3d18901ec1766aa2e748eac913f5ddcd") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); } @Test public void testFundamentalsCountVariantsSNPsAndIndelsWithCpG() { -// nProcessedLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38 -// nCalledLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep -c PASS = 8 -// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 3 -// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 5 -// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 3 -// nInsertions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 1 -// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1 -// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 4 -// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 8 -// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 10 -// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep NONCPG | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5 - WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( "-T VariantEval", @@ -158,7 +109,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("09ed51c3d5ac2099ded1d0e8cf8ee183") + Arrays.asList("677fe398643e62a10d6739d36a720a12") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec); } @@ -179,7 +130,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("9d233d3d8cec8e580acb98b1a2725b56") + Arrays.asList("5fb44fd7cb00941c986a9941e43e44cd") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec); } @@ -200,22 +151,13 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("850094f32657f04cb958891de4cfc5b2") + Arrays.asList("daaca7ef3b7313e5af217cbc6f37c9e2") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec); } @Test public void testFundamentalsCountVariantsSNPsAndIndelsWithSample() { -// HG00513 -// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if (length($4) == 1 && length($5) == 1) print $0 }' | awk '{ print $10 }' | grep -v '0/0' | grep -v '\.\/\.' | wc -l = 3 -// nInsertions = $ grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if (length($4) == 1 && length($5) > 1) print $0 }' | awk '{ print $10 }' | grep -v '0/0' | grep -v '\.\/\.' | wc -l = 1 -// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if (length($4) > 1 && length($5) == 1) print $0 }' | awk '{ print $10 }' | grep -v '0/0' | grep -v '\.\/\.' | wc -l = 0 -// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 2 -// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "0/1") print $0 }' | wc -l = 2 -// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "0/0") print $0 }' | wc -l = 3 -// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($10 ~ "1/1") print $0 }' | wc -l = 2 - WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( "-T VariantEval", @@ -230,21 +172,13 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("cea071b8b0ebd8f138ba91375edf036e") + Arrays.asList("97c466f8ffd0fcf2c30ef08669d213d9") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec); } @Test public void testFundamentalsCountVariantsSNPsAndIndelsWithJexlExpression() { -// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $0 }' | wc -l = 7 -// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 == ".") print $0 }' | wc -l = 4 -// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 != ".") print $0 }' | wc -l = 3 -// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && length($5) == 1 && length($6) == 1 && $6 != ".") print $0 }' | wc -l = 3 -// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/0' = 9 -// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/1' = 3 -// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '1/1' = 5 -// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '\.\/\.' = 4 WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( "-T VariantEval", @@ -261,21 +195,13 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("e2f505046251e19b5737f4999c896fe2") + Arrays.asList("df8cdfcf3d0c2fc795812c6eae6a76f8") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec); } @Test public void testFundamentalsCountVariantsSNPsAndIndelsWithMultipleJexlExpressions() { -// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $0 }' | wc -l = 7 -// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 == ".") print $0 }' | wc -l = 4 -// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && $6 != ".") print $0 }' | wc -l = 3 -// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20 && length($5) == 1 && length($6) == 1 && $6 != ".") print $0 }' | wc -l = 3 -// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/0' = 9 -// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '0/1' = 3 -// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '1/1' = 5 -// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk -F"[\t;]" '{ for (i = 1; i < NF; i++) if ($i ~ "DP=") print $i, $0 }' | sed 's/^DP=//' | awk '{ if ($1 < 20) print $11 "\n" $12 "\n" $13 }' | grep -c '\.\/\.' = 4 WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( "-T VariantEval", @@ -294,26 +220,13 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("8b97fe8e5e75efe08c080bbf47960c8f") + Arrays.asList("c7aed12265e2b2311d17a0cc8a29f6aa") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec); } @Test public void testFundamentalsCountVariantsNoCompRod() { -// nProcessedLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | awk '{ print length($4) }' | ~kiran/bin/SimpleStats = 38 -// nCalledLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep -c PASS = 9 -// nRefLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 == ".") print $0 }' | wc -l = 4 -// nVariantLoci = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != ".") print $0 }' | wc -l = 5 -// variantRate = nVariantLoci / nProcessedLoci = 0.131578947 -// nSNPs = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($4) == 1 && length($5) == 1) print $0 }' | wc -l = 3 -// nInsertions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($5) > 1) print $0 }' | wc -l = 1 -// nDeletions = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ if ($5 != "." && length($4) > 1) print $0 }' | wc -l = 1 -// nNoCalls = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "[[:punct:]]/[[:punct:]]") print $0 }' | wc -l = 4 -// nHets = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/1" || $i ~ "1/0") print $0 }' | wc -l = 8 -// nHomRef = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "0/0") print $0 }' | wc -l = 10 -// nHomVar = grep -v '#' FundamentalsTest.annotated.db.subset.snps_and_indels.vcf | grep PASS | awk '{ for (i = 10; i <= 12; i++) if ($i ~ "1/1") print $0 }' | wc -l = 5 - WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( "-T VariantEval", @@ -326,7 +239,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("1687108ed96d1127b196c2d74cf80a49") + Arrays.asList("d44c8f44384189a09eea85a8e89d7299") ); executeTest("testFundamentalsCountVariantsNoCompRod", spec); } @@ -336,84 +249,26 @@ public class VariantEvalIntegrationTest extends WalkerTest { String extraArgs = "-L 1:1-10,000,000"; for (String tests : testsEnumerations) { WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", - 1, Arrays.asList("ca71324abf5659964c8f9e28b8fdbb28")); + 1, Arrays.asList("cdbe47ea01b9dd79ff1c5ce6f5fa8bec")); executeTestParallel("testSelect1", spec); - //executeTest("testSelect1", spec); } } -// @Test -// public void testSelect2() { -// String extraArgs = "-L 1:1-10,000,000"; -// WalkerTestSpec spec = new WalkerTestSpec( withSelect(withSelect(root, "DP < 50", "DP50"), "set==\"Intersection\"", "intersection") + " " + extraArgs + " -o %s", -// 1, Arrays.asList("")); -// //executeTestParallel("testSelect2", spec); -// executeTest("testSelect2", spec); -// } - @Test public void testVEGenotypeConcordance() { - String vcfFiles[] = {"GenotypeConcordanceEval.vcf", "GenotypeConcordanceEval.vcf.gz"}; - for (String vcfFile : vcfFiles) { - WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", - 1, - Arrays.asList("732d32997b19d9c4f0291287858c56d2")); - executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); - //executeTest("testVEGenotypeConcordance" + vcfFile, spec); - } + String vcfFile = "GenotypeConcordanceEval.vcf"; - } - - @Test - public void testVESimple() { - HashMap expectations = new HashMap(); - expectations.put("-L 1:1-10,000,000 -ST CpG", "c74067360656519f769f805d6e1ef36b"); - expectations.put("-L 1:1-10,000,000 -ST CpG -family NA19238+NA19239=NA19240 -mvq 0 -EV MendelianViolationEvaluator", "0fe151e00ab35f3b95d4fea651592ec3"); - - for ( Map.Entry entry : expectations.entrySet() ) { - String extraArgs = entry.getKey(); - String md5 = entry.getValue(); - for (String tests : testsEnumerations) { - WalkerTestSpec spec = new WalkerTestSpec( tests + " " + extraArgs + " -o %s", - 1, // just one output file - Arrays.asList(md5)); - executeTestParallel("testVESimple", spec); - //executeTest("testVESimple", spec); - } - } - } - - @Test - public void testVEComplex() { - HashMap expectations = new HashMap(); - String extraArgs1 = "-L " + validationDataLocation + "chr1_b36_pilot3.interval_list -family NA19238+NA19239=NA19240 -mvq 30 -EV MendelianViolationEvaluator -ST CpG" + - " -B:dbsnp_130,dbSNP " + GATKDataLocation + "dbsnp_130_b36.rod" + - " -B:comp_hapmap,VCF3 " + validationDataLocation + "CEU_hapmap_nogt_23.vcf"; - - - expectations.put("", "700eba07bac9fba4ed963bbbdcab0e29"); - expectations.put(" -knownName comp_hapmap -knownName dbsnp", "c2464d5613072fb326ebffcf3078ae31"); - expectations.put(" -knownName comp_hapmap", "0912f35a4c4179ff93b152d8c4e009e2"); - for (String tests : testsEnumerations) { - for (Map.Entry entry : expectations.entrySet()) { - String extraArgs2 = entry.getKey(); - String md5 = entry.getValue(); - - WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs1 + extraArgs2 + " -o %s", - 1, // just one output file - Arrays.asList(md5)); - executeTestParallel("testVEComplex", spec); - //executeTest("testVEComplex", spec); - } - } + WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", + 1, + Arrays.asList("e4c981f7f5d78680c71310fc9be9a1c1")); + executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); } @Test public void testCompVsEvalAC() { String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("e68272d2f3b7a6439c4949cf0e34beeb")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("162daa5039e1965eb2423a8589339a69")); executeTestParallel("testCompVsEvalAC",spec); - //executeTest("testCompVsEvalAC",spec); } private static String withSelect(String cmd, String select, String name) { @@ -423,9 +278,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testTranches() { String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("4c5ef7c142427a85d1b9b1c9fe8fd3c2")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("90cd98044e754b80034a9f4e6d2c55b9")); executeTestParallel("testTranches",spec); - //executeTest("testTranches",spec); } @Test @@ -433,7 +287,6 @@ public class VariantEvalIntegrationTest extends WalkerTest { String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("70aa420929de7f888a6f48c2d01bbcda")); executeTestParallel("testCompOverlap",spec); - //executeTest("testCompOverlap",spec); } @Test @@ -446,7 +299,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -D " + dbsnp + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("923af856d04042c0ee9d01aa9eb8675a")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("5b1fc9a4066aca61f1b5f7b933ad37d9")); executeTestParallel("testEvalTrackWithoutGenotypes",spec); } @@ -460,7 +313,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("45c08af95777e1eee3e9acbf136a8b6b")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6d902d9d4d8fef5219a43e416a51cee6")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); } @@ -520,57 +373,4 @@ public class VariantEvalIntegrationTest extends WalkerTest { ); executeTestParallel("testPerSampleAndSubsettedSampleHaveSameResults-onesample", spec2); } - -// @Test -// public void testVEGenomicallyAnnotated() { -// String vecmd = "-T VariantEval" + -// " -R " + b36KGReference + -// " -L 21" + -// " -D " + GATKDataLocation + "dbsnp_129_b36.rod" + -// " -EV CountFunctionalClasses -noEV" + -// " -B:eval,VCF " + validationDataLocation + "test.filtered.maf_annotated.vcf" + -// " -o %s"; -// String md5 = ""; -// -// WalkerTestSpec spec = new WalkerTestSpec(vecmd, 1, Arrays.asList(md5)); -// executeTestParallel("testVEGenomicallyAnnotated", spec); -// //executeTest("testVEGenomicallyAnnotated", spec); -// } -// -// @Test -// public void testVEWriteVCF() { -// String extraArgs = "-L 1:1-10,000,000 -NO_HEADER -family NA19238+NA19239=NA19240 -mvq 30 -EV MendelianViolationEvaluator"; -// for (String tests : testsEnumerations) { -// WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs + " -o %s -outputVCF %s -NO_HEADER", -// 2, -// Arrays.asList("50321436a65ef7d574286cb0a1c55f7e", "d4bdd06ed5cb1aff1dfee8b69d5d17b8")); -// executeTestParallel("testVEWriteVCF", spec); -// //executeTest("testVEWriteVCF", spec); -// } -// } -// -// @Test -// public void testVEValidatePass() { -// String extraArgs = "-L 1:1-10,000,000"; -// for (String tests : testsEnumerations) { -// WalkerTestSpec spec = new WalkerTestSpec(withValidateTiTv(withSelect(tests, "DP < 50", "DP50"), 1.0, 4.0) + " " + extraArgs + " -o %s", -// 1, Arrays.asList("8a0203f0533b628ad7f1f230a43f105f")); -// executeTestParallel("testVEValidatePass", spec); -// } -// } -// -// @Test -// public void testVEValidateFail() { -// String extraArgs = "-L 1:1-10,000,000"; -// for (String tests : testsEnumerations) { -// WalkerTestSpec spec = new WalkerTestSpec(withValidateTiTv(withSelect(tests, "DP < 50", "DP50"), 1.0, 1.2) + " " + extraArgs + " -o %s", -// 1, UserException.class); -// executeTestParallel("testVEValidateFail", spec); -// } -// } -// -// private static String withValidateTiTv(String cmd, double min, double max) { -// return String.format("%s -validate 'eval.comp_genotypes.all.called.all.titv.tiTvRatio >= %2$s' -validate 'eval.comp_genotypes.all.called.all.titv.tiTvRatio <= %3$s'", cmd, min, max); -// } - }