From ac9dc0b4b4411c8d0983499690f26a3f72659c53 Mon Sep 17 00:00:00 2001 From: ebanks Date: Thu, 15 Apr 2010 19:53:02 +0000 Subject: [PATCH] Removing VariantEval (v1); everyone should be using VE2 now. Docs coming ASAP. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3177 348d0f76-0448-11de-a6fe-93d51630548a --- .../varianteval/BasicPoolVariantAnalysis.java | 59 --- .../varianteval/BasicVariantAnalysis.java | 69 --- .../varianteval/CallableBasesAnalysis.java | 103 ---- .../walkers/varianteval/ChipConcordance.java | 155 ------ .../varianteval/ClusterCounterAnalysis.java | 98 ---- .../walkers/varianteval/GenotypeAnalysis.java | 21 - .../varianteval/GenotypeConcordance.java | 63 --- .../varianteval/HardyWeinbergEquilibrium.java | 101 ---- .../gatk/walkers/varianteval/Histogram.java | 58 --- .../varianteval/IndelMetricsAnalysis.java | 64 --- .../varianteval/NeighborDistanceAnalysis.java | 95 ---- .../walkers/varianteval/PoolAnalysis.java | 12 - .../varianteval/PooledConcordance.java | 54 --- .../varianteval/PooledFrequencyAnalysis.java | 84 ---- .../varianteval/PopulationAnalysis.java | 21 - .../TransitionTranversionAnalysis.java | 47 -- .../varianteval/ValidationDataAnalysis.java | 60 --- .../walkers/varianteval/VariantAnalysis.java | 29 -- .../walkers/varianteval/VariantCounter.java | 81 ---- .../varianteval/VariantDBCoverage.java | 121 ----- .../varianteval/VariantEvalWalker.java | 441 ------------------ .../walkers/varianteval/VariantMatcher.java | 40 -- .../walkers}/ConcordanceTruthTable.java | 2 +- .../walkers/HapmapPoolAllelicInfoWalker.java | 1 - .../VariantEvalPerformanceTest.java | 222 --------- .../VariantEvalWalkerIntegrationTest.java | 221 --------- 26 files changed, 1 insertion(+), 2321 deletions(-) delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/BasicPoolVariantAnalysis.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/BasicVariantAnalysis.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CallableBasesAnalysis.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ChipConcordance.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ClusterCounterAnalysis.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeAnalysis.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/HardyWeinbergEquilibrium.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/Histogram.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/IndelMetricsAnalysis.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/NeighborDistanceAnalysis.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PoolAnalysis.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PooledConcordance.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PooledFrequencyAnalysis.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PopulationAnalysis.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/TransitionTranversionAnalysis.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ValidationDataAnalysis.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantAnalysis.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantCounter.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantDBCoverage.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantMatcher.java rename java/src/org/broadinstitute/sting/{gatk/walkers/varianteval => oneoffprojects/walkers}/ConcordanceTruthTable.java (99%) delete mode 100644 java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalPerformanceTest.java delete mode 100644 java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/BasicPoolVariantAnalysis.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/BasicPoolVariantAnalysis.java deleted file mode 100755 index 6806ace76..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/BasicPoolVariantAnalysis.java +++ /dev/null @@ -1,59 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.utils.genotype.Variation; - -import java.io.PrintStream; -import java.util.List; -import java.util.ArrayList; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: Sep 3, 2009 - * Time: 5:04:40 PM - * To change this template use File | Settings | File Templates. - */ -public abstract class BasicPoolVariantAnalysis implements VariantAnalysis{ - protected int numIndividualsInPool; - protected String name; - protected String filenames; - protected PrintStream out, callOut; - private VariantEvalWalker master; - - public BasicPoolVariantAnalysis(String name, int nIndividuals) { - this.name = name; - this.numIndividualsInPool = nIndividuals; - } - - public String getName() { return this.name; } - - public int getNumberOfIndividualsInPool() { return this.numIndividualsInPool; } - - public PrintStream getSummaryPrintStream() { return this.out; } - - public PrintStream getCallPrintStream() { return this.callOut; } - - public VariantEvalWalker getMaster() { return this.master; } - - public List getParams() { return new ArrayList(); } - - public List done() { return new ArrayList(); } - - public int getNumberOfAllelesInPool() { return 2*getNumberOfIndividualsInPool(); } - - public void initialize(VariantEvalWalker master, PrintStream out, PrintStream callOut, String filenames) { - this.master = master; - this.out = out; - this.callOut = callOut; - this.filenames = filenames; - } - - public void finalize(long nSites) { - // no need to finalize data in general - } - - public abstract String update(Variation variant, RefMetaDataTracker tracker, char ref, AlignmentContext context); - -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/BasicVariantAnalysis.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/BasicVariantAnalysis.java deleted file mode 100755 index 4630948c7..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/BasicVariantAnalysis.java +++ /dev/null @@ -1,69 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.genotype.Variation; - -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.List; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - */ -public abstract class BasicVariantAnalysis implements VariantAnalysis { - protected String name; - protected PrintStream out, callOut; - private VariantEvalWalker master; - protected String filename; - - public BasicVariantAnalysis(String name) { - this.name = name; - } - - public VariantEvalWalker getMaster() { return master; } - - public String getName() { - return name; - } - - public List getParams() { - return new ArrayList(); - } - - public void initialize(VariantEvalWalker master, PrintStream out, PrintStream callOut, String filename) { - this.master = master; - this.out = out; - this.callOut = callOut; - this.filename = filename; - } - - public PrintStream getSummaryPrintStream() { - return out; - } - - public PrintStream getCallPrintStream() { - return callOut; - } - - public List done() { - return new ArrayList(); - } - - /** - * No need to finalize the data in general - * @param nSites - */ - public void finalize(long nSites) { - - } - - public abstract String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context); -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CallableBasesAnalysis.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CallableBasesAnalysis.java deleted file mode 100755 index ac848089e..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CallableBasesAnalysis.java +++ /dev/null @@ -1,103 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.genotype.Genotype; -import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; -import org.broadinstitute.sting.utils.genotype.Variation; - -import java.util.ArrayList; -import java.util.List; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - *

- * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - */ -public class CallableBasesAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis { - //long all_bases = 0; - long all_calls = 0; - final static double[] thresholds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 50, 100}; - long[] discoverable_bases = new long[thresholds.length]; - long[] genotypable_bases = new long[thresholds.length]; - - public CallableBasesAnalysis() { - super("callable_bases"); - } - - public long nSites() { - return super.getMaster().getNMappedSites(); - } - - public long nCalls() { - return all_calls; - } - - public long nDiscoverable(int index) { - return discoverable_bases[index]; - } - - public double percentDiscoverable(int index) { - return (100.0 * nDiscoverable(index)) / nSites(); - } - - public long nGenotypable(int index) { - return genotypable_bases[index]; - } - - public double percentGenotypable(int index) { - return (100.0 * nGenotypable(index)) / nSites(); - } - - public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - //all_bases++; - - if (eval == null) // no data here! - return null; - - // we actually have a record here - if (!(eval instanceof VariantBackedByGenotype)) { // evaluation record isn't a genotype, die! - return null; - //throw new RuntimeException("Evaluation track isn't backed by a Genotype!"); - } - - all_calls++; - // For every threshold, updated discoverable and callable - for (int i = 0; i < thresholds.length; i++) { - double threshold = thresholds[i]; - - // update discoverable - if ( eval.getNegLog10PError() >= threshold ) - discoverable_bases[i]++; - - List genotypes = ((VariantBackedByGenotype)eval).getGenotypes(); - for ( Genotype g : genotypes ) { - if ( g.getNegLog10PError() >= threshold ) { - genotypable_bases[i]++; - break; - } - } - } - - return null; - } - - public List done() { - List s = new ArrayList(); - s.add(String.format("total_no_sites %d", nSites())); - s.add(String.format("total_no_calls %d", nCalls())); - s.add(String.format("")); - s.add(String.format("confidence_threshold\tdiscoverable_bases\tdiscoverable_bases_percent\tgenotypable_bases\tgenotypable_bases_percent")); - - for (int i = 0; i < thresholds.length; i++) { - double threshold = thresholds[i]; - s.add(String.format("%6.2f\t%d\t%.6f\t%d\t%.6f", threshold, nDiscoverable(i), percentDiscoverable(i), nGenotypable(i), percentGenotypable(i))); - } - - return s; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ChipConcordance.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ChipConcordance.java deleted file mode 100755 index 6faab7c9d..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ChipConcordance.java +++ /dev/null @@ -1,155 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.Genotype; -import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; -import org.broadinstitute.sting.utils.genotype.Variation; - -import java.io.BufferedReader; -import java.io.FileNotFoundException; -import java.io.FileReader; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - */ -public abstract class ChipConcordance extends BasicVariantAnalysis { - - // the array of truth tables used to store data in this analysis - private ConcordanceTruthTable[] truthTables; - - // mapping from sample name to index into truthTables for the corresponding table - private Map sampleToArrayIndex; - - // list of all chip rods used - private ArrayList rodNames = new ArrayList(); - - public ChipConcordance(final String name, final String chipName, boolean chipNameIsFile) { - super(name); - - // read sample names from file if appropriate, otherwise use the chip name - if ( chipNameIsFile ) { - List sampleNames = readSampleNamesFromFile(chipName); - rodNames.addAll(sampleNames); - } else { - rodNames.add(chipName); - } - - // initialize the truth table storage data - sampleToArrayIndex = new HashMap(); - truthTables = createTruthTableMappings(rodNames, sampleToArrayIndex); - } - - private List readSampleNamesFromFile(String file) { - ArrayList samples = new ArrayList(); - - try { - BufferedReader reader = new BufferedReader(new FileReader(file)); - String line = reader.readLine(); - while ( line != null ) { - samples.add(line); - line = reader.readLine(); - } - } catch( FileNotFoundException e) { - throw new StingException("Chip file at "+file+" was not found. Please check filepath."); - } catch( IOException e) { - throw new StingException(e.getMessage()); - } - - return samples; - } - - // these methods need to be implemented by subclasses - protected abstract ConcordanceTruthTable[] createTruthTableMappings(List rodNames, Map sampleToArrayIndex); - protected abstract void assertVariationIsValid(Variation eval); - protected abstract HashMap makeGenotypeHash(List evals, List rodNames); - - public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - // get all of the chip rods at this locus - HashMap chips = new HashMap(); - for ( String name : rodNames ) { - Variation chip = tracker.lookup(name,Variation.class); - if ( chip != null ) { - // chips must be Genotypes - if ( !(chip instanceof VariantBackedByGenotype) ) - throw new StingException("Failure: trying to analyze genotypes using non-genotype truth data"); - chips.put(name, ((VariantBackedByGenotype)chip).getCalledGenotype()); - } - } - - // evaluate only if we have truth data or a call - return ( eval != null || chips.size() > 0 ) ? inc(chips, eval, ref) : null; - } - - public String inc(Map chips, Variation eval, char ref) { - // TODO -- needed to add this for now while we're moving over to VE2 - if ( !(eval instanceof VariantBackedByGenotype) ) - return null; - - // each implementing class can decide whether the Variation is valid - assertVariationIsValid(eval); - - // This shouldn't happen, but let's check anyways to be safe - if (BaseUtils.simpleBaseToBaseIndex(ref) == -1) - return null; - - // create a hash of samples to their Genotypes - List evals = (eval != null ? ((VariantBackedByGenotype)eval).getGenotypes() : new ArrayList()); - HashMap evalHash = makeGenotypeHash(evals, rodNames); - - // make chip/eval Genotype pairs - List>[] chipEvals = new List[truthTables.length]; - for ( String name : rodNames ) { - Genotype chipG = chips.get(name); - Genotype evalG = evalHash.get(name); - - if (chipG == null && evalG == null) - continue; - - int index = sampleToArrayIndex.get(name); - if ( chipEvals[index] == null ) - chipEvals[index] = new ArrayList>(); - chipEvals[index].add(new Pair(chipG, evalG)); - } - - // now we can finally update our truth tables with the truth vs. calls data - StringBuilder s = new StringBuilder(); - for (int i = 0; i < truthTables.length; i++) { - if ( chipEvals[i] != null ) { - ConcordanceTruthTable table = truthTables[i]; - String x = table.addEntry(chipEvals[i], eval, ref); - if ( x != null ) s.append(x); - } - } - - String toReturn = s.toString(); - return toReturn.equals("") ? null : toReturn; - } - - public List done() { - List s = new ArrayList(); - - // let the truth tables do all the printing - for ( ConcordanceTruthTable table : truthTables ) { - table.addAllStats(s); - s.add(""); - } - - return s; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ClusterCounterAnalysis.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ClusterCounterAnalysis.java deleted file mode 100755 index 9c208d874..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ClusterCounterAnalysis.java +++ /dev/null @@ -1,98 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.IntervalRod; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.genotype.Variation; - -import java.util.ArrayList; -import java.util.List; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - */ -public class ClusterCounterAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis { - int minDistanceForFlagging = 5; - - /** - * Threshold distances between neighboring SNPs we will track and assess - */ - int[] neighborWiseBoundries = {1, 2, 5, 10, 20, 50, 100}; - int[] variantsWithClusters = new int[neighborWiseBoundries.length]; - /** - * Keep track of the last variation we saw in the stream - */ - Variation lastVariation = null; - - /** - * The interval that the last variation occurred in. Don't think that SNPs from different intervals are - * too close together in hybrid selection. - */ - GenomeLoc lastVariantInterval = null; - int nSeen = 0; - - - - public ClusterCounterAnalysis() { - super("cluster_counter_analysis"); - } - - public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - String r = null; - - if ( eval != null && eval.isSNP() ) { - IntervalRod intervalROD = tracker.lookup("interval",IntervalRod.class); - GenomeLoc interval = intervalROD == null ? null : intervalROD.getLocation(); - - if (lastVariation != null) { - GenomeLoc eL = eval.getLocation(); - GenomeLoc lvL = lastVariation.getLocation(); - - // if we are on the same contig, and we in the same interval - if (eL.getContigIndex() == lvL.getContigIndex() && ! (lastVariantInterval != null && lastVariantInterval.compareTo(interval) != 0)) { - long d = eL.distance(lvL); - nSeen++; - StringBuilder s = new StringBuilder(); - for (int i = 0; i < neighborWiseBoundries.length; i++) { - int maxDist = neighborWiseBoundries[i]; - s.append(String.format("%d ", d <= maxDist ? maxDist : 0)); - if ( d <= maxDist ) { - variantsWithClusters[i]++; - } - } - - // lookup in master for performance reasons - if ( d <= minDistanceForFlagging && getMaster().includeViolations() ) - r = String.format("snp_within_cluster %d %s %s %s", d, eL, lvL, s.toString()); - } - } - - // eval is now the last variation - lastVariation = eval; - lastVariantInterval = interval; - } - - return r; - } - - public List done() { - List s = new ArrayList(); - s.add(String.format("snps_counted_for_neighbor_distances %d", nSeen)); - s.add(String.format("description maxDist count")); - for ( int i = 0; i < neighborWiseBoundries.length; i++ ) { - int maxDist = neighborWiseBoundries[i]; - int count = variantsWithClusters[i]; - s.add(String.format("snps_within_clusters_of_size %10d %10d", maxDist, count)); - } - - return s; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeAnalysis.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeAnalysis.java deleted file mode 100755 index f781551a8..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeAnalysis.java +++ /dev/null @@ -1,21 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - * If an analysis implements this interface, it asserts that it performs a genotype based analysis, as - * opposed a straight variant analysis. The difference here is that variants are not asserted to be - * the actual genotype of a particular person, but are really just variation "out-there" in a population. - * A genotype analysis would be something like covered bases, confidently called bases, genotyping - * concordance, etc. - * - */ -public interface GenotypeAnalysis { - -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java deleted file mode 100755 index b9fdfe738..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java +++ /dev/null @@ -1,63 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.*; - -import java.util.*; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - *

- * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - */ -public class GenotypeConcordance extends ChipConcordance implements GenotypeAnalysis { - - private String providedChipName = null; - - public GenotypeConcordance(final String chipName, boolean chipNameIsFile) { - super("genotype_concordance", chipName, chipNameIsFile); - if ( !chipNameIsFile ) - providedChipName = chipName; - } - - protected void assertVariationIsValid(Variation eval) { - // must be a genotype - if ( eval != null && !(eval instanceof VariantBackedByGenotype) ) - throw new StingException("Failure: trying to analyze genotypes of non-genotype data"); - } - - protected ConcordanceTruthTable[] createTruthTableMappings(List rodNames, Map sampleToArrayIndex) { - ConcordanceTruthTable[] tables = new ConcordanceTruthTable[rodNames.size()]; - - // each sample gets its own truth table - for (int i = 0; i < rodNames.size(); i++) { - String name = rodNames.get(i); - tables[i] = new ConcordanceTruthTable(name); - sampleToArrayIndex.put(name, i); - } - - return tables; - } - - protected HashMap makeGenotypeHash(List evals, List rodNames) { - HashMap hash = new HashMap(); - - // associate each Genotype with the appropriate sample - for ( Genotype eval : evals ) { - if ( providedChipName != null ) - hash.put(providedChipName, eval); - // TODO -- fix me in VE2 - //else if ( eval instanceof SampleBacked ) - // hash.put(((SampleBacked)eval).getSampleName(), eval); - else if ( rodNames.size() == 1 ) - hash.put(rodNames.get(0), eval); - else - throw new StingException("Genotype data has no associated samples but are multi-sample...?"); - } - return hash; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/HardyWeinbergEquilibrium.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/HardyWeinbergEquilibrium.java deleted file mode 100755 index 197a16e77..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/HardyWeinbergEquilibrium.java +++ /dev/null @@ -1,101 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import cern.jet.math.Arithmetic; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.SNPCallFromGenotypes; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation; -import org.broadinstitute.sting.utils.genotype.Variation; - -import java.util.ArrayList; -import java.util.List; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - */ -public class HardyWeinbergEquilibrium extends BasicVariantAnalysis implements PopulationAnalysis { - private double threshold; - int nSites = 0; - int nViolations = 0; - - HardyWeinbergEquilibrium(double threshold) { - super("hwe"); - this.threshold = threshold; - } - - public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - String r = null; - - if ( eval != null && - eval instanceof SNPCallFromGenotypes ) { - nSites++; - SNPCallFromGenotypes call = (SNPCallFromGenotypes)eval; - //double pPoint = pointHWcalculate(call); - double pTailed = tailedHWcalculate(call); - double p = pTailed; - - //System.out.printf("HWE point=%.4e %s vs. tailed=%.4e %s for %s%n", pPoint, pPoint < threshold ? "***" : " ", pTailed, pTailed < threshold ? "***" : " ", call); - - if ( p < threshold ) { - r = String.format("HWE-violation %f < %f at %s", p, threshold, eval); - nViolations++; - } - } - - return r; - } - - public double tailedHWcalculate(SNPCallFromGenotypes call) { - int obsAA = call.nHomRefGenotypes(); - int obsAB = call.nHetGenotypes(); - int obsBB = call.nHomVarGenotypes(); - return HardyWeinbergCalculation.hwCalculate(obsAA, obsAB, obsBB); - } - - public double pointHWcalculate(SNPCallFromGenotypes call) { - int nAA = call.nHomRefGenotypes(); - int nAa = call.nHetGenotypes(); - int naa = call.nHomVarGenotypes(); - int nA = 2 * nAA + nAa; - int n = nAA + nAa + naa; - - // - // from Emigh 1980 - // - // P = pr[nAa | nA] = multinomial[n over nAA, nAa, naa] / binomial[2n over nA] * 2^nAa - // - // where nAA, nAa, naa are the observed numbers of the three genotypes, AA, Aa, and aa, - // respectively, and nA is the number of A alleles, where nA = 2nAA + nAa, and n is the number of alleles - // - int[] mXs = { nAA, nAa, naa }; // counts of each genotype as vector - double m = MathUtils.multinomial(mXs); - double b = Arithmetic.binomial(2 * n, nA); - double tosses = Math.pow(2, nAa); - double p = (m / b) * tosses; - - if ( false ) { - System.out.printf("HWE-violation at %s %f < %f %1.2f %5d %5d %5d %5d %5d %.2e %.2e %.2e => %.6e [%s]%n", - call.getLocation(), p, threshold, call.getNonRefAlleleFrequency(), nAA, nAa, naa, nA, n, m, b, tosses, p, call); - System.out.printf("(factorial(%d) / (factorial(%d) * factorial(%d) * factorial(%d))) / choose(%d, %d) * 2^%d - %f < 1e-3%n", - nAA + nAa + naa, nAA, nAa, naa, 2 * n, nA, nAa, p); - } - - return p; - } - - public List done() { - List s = new ArrayList(); - s.add(String.format("n_calls %d", nSites)); - s.add(String.format("n_violations %d", nViolations)); - s.add(String.format("violations_rate %.2f", (100.0*nViolations) / nSites)); - return s; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/Histogram.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/Histogram.java deleted file mode 100755 index 3bf07cb63..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/Histogram.java +++ /dev/null @@ -1,58 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import java.util.ArrayList; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - */ -public class Histogram { - ArrayList data; - int nBins = 100; - double minValue, maxValue; - - public Histogram(int nBins, double minValue, double maxValue, T initialValue) { - data = new ArrayList(nBins); - this.nBins = nBins; - this.minValue = minValue; - this.maxValue = maxValue; - initialize(initialValue); - } - - public void initialize(T initialValue) { - data.clear(); - for ( int i = 0; i < nBins; i++ ) { - data.add(i, initialValue); - } - } - - public void setBin(int i, T value) { - data.set(i, value); - } - - public void setBin(double x, T value) { - setBin(x2bin(x), value); - } - - public T getBin(int binI) { - return data.get(binI); - } - - public T getBin(double x) { - return getBin(x2bin(x)); - } - - public int x2bin(double x) { - return (int)Math.floor((x - minValue) / ((maxValue - minValue) * nBins)); - } - - public double bin2x(int bin) { - return (maxValue - minValue) * ((bin + 0.5) / nBins) + minValue; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/IndelMetricsAnalysis.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/IndelMetricsAnalysis.java deleted file mode 100755 index d3f76d3b3..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/IndelMetricsAnalysis.java +++ /dev/null @@ -1,64 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.genotype.Variation; - -import java.util.ArrayList; -import java.util.List; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - *

- * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - */ -public class IndelMetricsAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis { - long insertions = 0; - long deletions = 0; - int maxSize = 0; - long[][] sizes = new long[2][100]; - - public IndelMetricsAnalysis() { - super("indel_metrics"); - for (int i = 0; i < 100; i++) - sizes[0][i] = sizes[1][i] = 0; - } - - public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - if (eval != null && eval.isInsertion()) { - if (eval.isInsertion()) - insertions++; - else if (eval.isDeletion()) - deletions++; - else - throw new RuntimeException("Variation is indel, but isn't insertion or deletion!"); - - for (String allele : eval.getAlleleList()) - if (allele.length() < 100) { - sizes[eval.isDeletion() ? 0 : 1][allele.length()]++; - if (allele.length() > maxSize) - maxSize = allele.length(); - } - } - - return null; - } - - public List done() { - List s = new ArrayList(); - s.add(String.format("total_deletions %d", deletions)); - s.add(String.format("total_insertions %d", insertions)); - s.add(String.format("")); - - s.add("Size Distribution"); - s.add("size\tdeletions\tinsertions"); - for (int i = 1; i <= maxSize; i++) - s.add(String.format("%d\t%d\t\t%d", i, sizes[0][i], sizes[1][i])); - - return s; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/NeighborDistanceAnalysis.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/NeighborDistanceAnalysis.java deleted file mode 100755 index a4deceee7..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/NeighborDistanceAnalysis.java +++ /dev/null @@ -1,95 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.IntervalRod; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.genotype.Variation; - -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - */ -public class NeighborDistanceAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis { - ArrayList neighborWiseDistances; - int minDistanceForFlagging = 5; - int[] neighborWiseBoundries = {1, 2, 5, 10, 20, 50, 100, 1000, 10000}; - - Variation lastVariation = null; - GenomeLoc lastVariantInterval = null; - PrintStream violationsOut = null; - - public NeighborDistanceAnalysis() { - super("neighbor_distances"); - neighborWiseDistances = new ArrayList(); - } - - public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - String r = null; - - if ( eval != null && eval.isSNP() ) { - IntervalRod intervalROD = tracker.lookup("interval",IntervalRod.class); - - GenomeLoc interval = intervalROD == null ? null : intervalROD.getLocation(); - - if (lastVariation != null) { - GenomeLoc eL = eval.getLocation(); - GenomeLoc lvL = lastVariation.getLocation(); - if (eL.getContigIndex() == lvL.getContigIndex()) { - long d = eL.distance(lvL); - if ( lastVariantInterval != null && lastVariantInterval.compareTo(interval) != 0) { - // we're on different intervals - //out.printf("# Excluding %d %s %s vs. %s %s%n", d, eL, interval, lvL, lastVariantInterval); - } else { - neighborWiseDistances.add(d); - r = d <= minDistanceForFlagging ? String.format("neighbor-distance %d %s %s", d, eL, lvL) : null; - } - } - } - - lastVariation = eval; - lastVariantInterval = interval; - } - - return r; - } - - public List done() { - int[] pairCounts = new int[neighborWiseBoundries.length]; - Arrays.fill(pairCounts, 0); - for ( long dist : neighborWiseDistances ) { - boolean done = false; - for ( int i = 0; i < neighborWiseBoundries.length && ! done ; i++ ) { - int maxDist = neighborWiseBoundries[i]; - if ( dist <= maxDist ) { - pairCounts[i]++; - done = true; - } - } - } - - List s = new ArrayList(); - s.add(String.format("snps_counted_for_neighbor_distances %d", neighborWiseDistances.size())); - int cumulative = 0; - s.add(String.format("description maxDist count cumulative")); - for ( int i = 0; i < neighborWiseBoundries.length; i++ ) { - int maxDist = neighborWiseBoundries[i]; - int count = pairCounts[i]; - cumulative += count; - s.add(String.format("snps_immediate_neighbors_within_bp %d %d %d", maxDist, count, cumulative)); - } - - return s; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PoolAnalysis.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PoolAnalysis.java deleted file mode 100755 index fcf1d34cc..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PoolAnalysis.java +++ /dev/null @@ -1,12 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: Sep 3, 2009 - * Time: 4:39:12 PM - * To change this template use File | Settings | File Templates. - */ -public interface PoolAnalysis extends PopulationAnalysis, GenotypeAnalysis { - -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PooledConcordance.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PooledConcordance.java deleted file mode 100755 index 8a70ff0a2..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PooledConcordance.java +++ /dev/null @@ -1,54 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.gatk.refdata.RodVCF; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.*; - -import java.util.*; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - *

- * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - */ -public class PooledConcordance extends ChipConcordance implements PoolAnalysis { - - public PooledConcordance(final String chipName, boolean chipNameIsFile) { - super("pooled_concordance", chipName, chipNameIsFile); - } - - protected void assertVariationIsValid(Variation eval) { - // we only support VCF for now - if ( eval != null && !(eval instanceof RodVCF) ) - throw new StingException("Failure: we currently only support analyzing pooled data in VCF format"); - } - - protected ConcordanceTruthTable[] createTruthTableMappings(List rodNames, Map sampleToArrayIndex) { - // there is only one truth table - the pool-wide one - ConcordanceTruthTable[] tables = new ConcordanceTruthTable[1]; - tables[0] = new ConcordanceTruthTable(rodNames.size()); - - for ( String name : rodNames ) { - sampleToArrayIndex.put(name, 0); - } - - return tables; - } - - protected HashMap makeGenotypeHash(List evals, List rodNames) { - - HashMap hash = new HashMap(); - - // te Genotype is pool-wide so all samples are associated with it - if ( evals.size() > 0 ) { - for ( String name : rodNames ) - hash.put(name, evals.get(0)); - } - - return hash; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PooledFrequencyAnalysis.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PooledFrequencyAnalysis.java deleted file mode 100644 index 79d38c8d0..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PooledFrequencyAnalysis.java +++ /dev/null @@ -1,84 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.utils.genotype.Variation; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; - -import java.util.List; -import java.util.ArrayList; -import java.io.PrintStream; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: Oct 20, 2009 - * Time: 4:03:23 PM - * To change this template use File | Settings | File Templates. - */ -public class PooledFrequencyAnalysis extends BasicPoolVariantAnalysis implements PoolAnalysis, GenotypeAnalysis { - /* - Maintains an array of basic variant analyses broken down by estimated frequency - */ - private VariantDBCoverage[] coverageAnalysisByFrequency; - private VariantCounter[] variantCounterByFrequency; - private TransitionTranversionAnalysis[] transitionTransversionByFrequency; - - public PooledFrequencyAnalysis(int poolSize, String knownDBSNPName ) { - super("Pooled_Frequency_Analysis",poolSize); - if ( poolSize > 0 ) { - coverageAnalysisByFrequency = new VariantDBCoverage[getNumberOfAllelesInPool()+1]; - variantCounterByFrequency = new VariantCounter[getNumberOfAllelesInPool()+1]; - transitionTransversionByFrequency = new TransitionTranversionAnalysis[getNumberOfAllelesInPool()+1]; - for ( int j = 0; j < getNumberOfAllelesInPool()+1; j ++ ) { - coverageAnalysisByFrequency[j] = new VariantDBCoverage(knownDBSNPName); - variantCounterByFrequency[j] = new VariantCounter(); - transitionTransversionByFrequency[j] = new TransitionTranversionAnalysis(); - } - } - } - - public void initialize(VariantEvalWalker master, PrintStream out1, PrintStream out2, String name) { - super.initialize(master,out1,out2,name); - - } - - public String update( Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context ) { - int f = calculateFrequencyFromVariation(eval); - String interest1 = coverageAnalysisByFrequency[f].update(eval,tracker,ref,context); - String interest2 = variantCounterByFrequency[f].update(eval,tracker,ref,context); - String interest3 = transitionTransversionByFrequency[f].update(eval,tracker,ref,context); - - if ( interest1 != null ) { - return "coverageAnalysis_Frequency_"+f+"\t"+interest1; - } - - if ( interest2 != null ) { - return "VariantCounter_Frequency_"+f+"\t"+interest2; - } - - if ( interest3 != null ) { - return "transitionTransversion_Frequency_"+f+"\t"+interest3; - } - - return null; - } - - public int calculateFrequencyFromVariation(Variation eval) { - double freq = eval.getNonRefAlleleFrequency(); - return (int) Math.round(getNumberOfAllelesInPool()*freq); - } - - @Override - public List done() { - List s = new ArrayList(); - for ( int j = 0; j < getNumberOfAllelesInPool()+1; j ++ ) { - s.add("Pool_Frequency_Analysis_Frequency:\t"+j); - s.addAll(coverageAnalysisByFrequency[j].done()); - s.addAll(variantCounterByFrequency[j].done()); - s.addAll(transitionTransversionByFrequency[j].done()); - } - - return s; - } - -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PopulationAnalysis.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PopulationAnalysis.java deleted file mode 100755 index 34da97596..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/PopulationAnalysis.java +++ /dev/null @@ -1,21 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - * If an analysis implements this interface, it asserts that it performs a genotype based analysis, as - * opposed a straight variant analysis. The difference here is that variants are not asserted to be - * the actual genotype of a particular person, but are really just variation "out-there" in a population. - * A genotype analysis would be something like covered bases, confidently called bases, genotyping - * concordance, etc. - * - */ -public interface PopulationAnalysis { - -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/TransitionTranversionAnalysis.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/TransitionTranversionAnalysis.java deleted file mode 100755 index 624a012e8..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/TransitionTranversionAnalysis.java +++ /dev/null @@ -1,47 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.genotype.Variation; - -import java.util.ArrayList; -import java.util.List; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - *

- * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - */ -public class TransitionTranversionAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis { - long nTransitions = 0, nTransversions = 0; - - public TransitionTranversionAnalysis() { - super("transitions_transversions"); - } - - public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - if (eval != null && eval.isSNP()) { - char altBase = eval.getAlternativeBaseForSNP(); - BaseUtils.BaseSubstitutionType subType = BaseUtils.SNPSubstitutionType((byte)ref, (byte)altBase); - if (subType == BaseUtils.BaseSubstitutionType.TRANSITION) { - nTransitions++; - } else { - nTransversions++; - } - } - return null; - } - - public List done() { - List s = new ArrayList(); - s.add(String.format("transitions %d", nTransitions)); - s.add(String.format("transversions %d", nTransversions)); - s.add(String.format("ratio %.2f", nTransitions / (1.0 * nTransversions))); - return s; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ValidationDataAnalysis.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ValidationDataAnalysis.java deleted file mode 100755 index 6eabe813b..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ValidationDataAnalysis.java +++ /dev/null @@ -1,60 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.genotype.Variation; - -import java.util.ArrayList; -import java.util.List; - -/** - * Created by IntelliJ IDEA. - * User: andrewk - * Date: Sep 30, 2009 - * Time: 5:28:57 PM - * To change this template use File | Settings | File Templates. - */ - -// @Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=VariationRod.class), @RMD(name=rodDbSNP.STANDARD_DBSNP_TRACK_NAME,type= rodDbSNP.class),@RMD(name="hapmap-chip",type=RodGenotypeChipAsGFF.class), @RMD(name="interval",type=IntervalRod.class), @RMD(name="validation",type=RodGenotypeChipAsGFF.class)}) - -public class ValidationDataAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis { - private int calls_at_validated_sites = 0; - private int calls_at_sites_validated_true = 0; - private int validated_sites = 0; - - public ValidationDataAnalysis() { - super("validation_data_analysis"); - } - - public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - - validated_sites++; - List objects = tracker.getReferenceMetaData("validation"); - Object val_data = (objects.size() > 0) ? objects.get(0) : null; - - if (eval != null) { - calls_at_sites_validated_true++; - //out.format("Has validaiton data: %s\n", val_data.getLocation()); - if (val_data != null) { - //out.format("Validated true: %s\n", val_data.getLocation()); - calls_at_validated_sites++; - } - } - //out.println(context.getLocation()); - - return null; - } - - public List done() { - List s = new ArrayList(); - if (calls_at_validated_sites > 0) { // only output info if there were any validation sites encountered - s.add(String.format("validated sites %d", validated_sites)); - s.add(String.format("calls at validated sites %d", calls_at_validated_sites)); - s.add(String.format("calls at sites validated true %d", calls_at_sites_validated_true)); - s.add(String.format("%% validated true %f", (float) calls_at_validated_sites / calls_at_sites_validated_true)); - }else{ - s.add("No validation data encountered"); - } - return s; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantAnalysis.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantAnalysis.java deleted file mode 100755 index f06723823..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantAnalysis.java +++ /dev/null @@ -1,29 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.genotype.Variation; - -import java.io.PrintStream; -import java.util.List; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - */ -public interface VariantAnalysis { - public String getName(); - public PrintStream getSummaryPrintStream(); - public PrintStream getCallPrintStream(); - public List getParams(); - public void initialize(VariantEvalWalker master, PrintStream out, PrintStream callOut, String filename); - public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context); - public void finalize(long nSites); - public List done(); -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantCounter.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantCounter.java deleted file mode 100755 index 63019fdfc..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantCounter.java +++ /dev/null @@ -1,81 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.genotype.Genotype; -import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; -import org.broadinstitute.sting.utils.genotype.Variation; - -import java.util.ArrayList; -import java.util.List; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - */ -public class VariantCounter extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis { - long nBasesCovered = 0; - int nSNPs = 0; - int nHets = 0; - - public VariantCounter() { - super("variant_counts"); - } - - public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - nSNPs += eval == null || eval.isReference() ? 0 : 1; - - // TODO -- break the het check out to a different module used only for single samples - - if ( this.getMaster().evalContainsGenotypes && eval != null && eval.isBiallelic() && eval.isSNP() && eval instanceof VariantBackedByGenotype ) { - List genotypes = ((VariantBackedByGenotype)eval).getGenotypes(); - if ( genotypes.size() == 1 && genotypes.get(0).isHet() ) { - nHets++; - } - } - - return null; - } - - /** - * No need to finalize the data in general - * @param nSites - */ - public void finalize(long nSites) { - nBasesCovered = nSites; - } - - private double variantRate(int n) { - return n / (1.0 * Math.max(nBasesCovered, 1)); - } - - private long variantRateInverse(int n) { - return nBasesCovered / Math.max(n, 1); - } - - public List done() { - List s = new ArrayList(); - s.add(String.format("n bases covered %d", nBasesCovered)); - s.add(String.format("variants %d", nSNPs)); - s.add(String.format("variant rate %.5f confident variants per base", variantRate(nSNPs))); - s.add(String.format("variant rate 1 / %d confident variants per base", variantRateInverse(nSNPs))); - - if ( this.getMaster().evalContainsGenotypes ) { - s.add(String.format("heterozygotes %d", nHets)); - s.add(String.format("homozygotes %d", nSNPs - nHets)); - - s.add(String.format("heterozygosity %.5f confident hets per base", variantRate(nHets))); - s.add(String.format("heterozygosity 1 / %d confident hets per base", variantRateInverse(nHets))); - - s.add(String.format("het to hom ratio %.2f confident hets per confident homozygote non-refs [single sample genome-wide expectation is 2:1]", - ((double)nHets) / (Math.max(nSNPs - nHets, 1)))); - } - return s; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantDBCoverage.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantDBCoverage.java deleted file mode 100755 index 64dd6e529..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantDBCoverage.java +++ /dev/null @@ -1,121 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodVCF; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.genotype.Variation; - -import java.util.ArrayList; -import java.util.List; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - *

- * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - */ -public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis { - private String dbName; - private int nDBSNPs = 0; - private int nEvalObs = 0; - private int nSNPsAtdbSNPs = 0; - private int nConcordant = 0; - - public VariantDBCoverage(final String name) { - super("db_coverage"); - dbName = name; - } - - public int nDBSNPs() { return nDBSNPs; } - public int nEvalSites() { return nEvalObs; } - public int nSNPsAtdbSNPs() { return nSNPsAtdbSNPs; } - public int nConcordant() { return nConcordant; } - public int nNovelSites() { return Math.abs(nEvalSites() - nSNPsAtdbSNPs()); } - - public boolean discordantP(Variation dbSNP, Variation eval) { - if (eval != null) { - char alt = (eval.isSNP()) ? eval.getAlternativeBaseForSNP() : Utils.stringToChar(eval.getReference()); - if (dbSNP != null && dbSNP.isSNP()) - return !dbSNP.getAlleleList().contains(String.valueOf(alt)); - } - return false; - } - - - /** - * What fraction of the evaluated site variants were also found in the db? - * - * @return - */ - public double dbSNPRate() { - return nSNPsAtdbSNPs() / (1.0 * nEvalSites()); - } - - public double concordanceRate() { - return nConcordant() / (1.0 * nSNPsAtdbSNPs()); - } - - public static Variation getFirstRealSNP(List dbsnpList) { - if (dbsnpList == null) - return null; - - Variation dbsnp = null; - for (Object d : dbsnpList) { - if (((Variation) d).isSNP() && (!(d instanceof RodVCF) || !((RodVCF) d).isFiltered())) { - dbsnp = (Variation) d; - break; - } - } - - return dbsnp; - } - - public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - Variation dbSNP = getFirstRealSNP(tracker.getReferenceMetaData( dbName, false )); - String result = null; - - if ( dbSNP != null ) nDBSNPs++; // count the number of real dbSNP events - if ( eval != null && eval.isSNP() ) { // ignore indels right now - nEvalObs++; // count the number of eval snps we've seen - - if (dbSNP != null) { // both eval and dbSNP have real snps - nSNPsAtdbSNPs++; - - if (!discordantP(dbSNP, eval)) // count whether we're concordant or not with the dbSNP value - nConcordant++; - else - result = String.format("Discordant [DBSNP %s] [EVAL %s]", dbSNP, eval); - } - } - -// if ( dbSNP != null && dbSNP.isSNP() ) { -// BrokenRODSimulator.attach("dbSNP"); -// rodDbSNP dbsnp = (rodDbSNP) BrokenRODSimulator.simulate_lookup("dbSNP", context.getLocation(), tracker); -// if ( ! dbSNP.getRS_ID().equals(dbsnp.getRS_ID()) && dbsnp.isSNP() ) { -// System.out.printf("Discordant site! %n%s%n vs.%n%s%n", dbSNP, dbsnp); -// } -// } - - return result; - } - - public List done() { - List s = new ArrayList(); - s.add(String.format("name %s", dbName)); - - s.add(String.format("n_db_snps %d", nDBSNPs())); - s.add(String.format("n_eval_sites %d", nEvalSites())); - s.add(String.format("n_overlapping_sites %d", nSNPsAtdbSNPs())); - s.add(String.format("n_concordant %d", nConcordant())); - s.add(String.format("n_novel_sites %d", nNovelSites())); - - s.add(String.format("%s_rate %.2f # percent eval snps at dbsnp snps", dbName.toLowerCase(), 100 * dbSNPRate())); - s.add(String.format("concordance_rate %.2f", 100 * concordanceRate())); - - return s; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java deleted file mode 100755 index 63c661a43..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ /dev/null @@ -1,441 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.RMD; -import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.genotype.Variation; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.PrintStream; -import java.util.*; -import java.util.regex.Pattern; - -/** - * A robust and general purpose tool for characterizing the quality of SNPs, Indels, and other variants that includes basic - * counting, ti/tv, dbSNP% (if -D is provided), concordance to chip or validation data, and will show interesting sites (-V) - * that are FNs, FP, etc. - */ -@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="eval",type=ReferenceOrderedDatum.class)}) // right now we have no base variant class for rods, this should change -//@Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=ReferenceOrderedDatum.class), @RMD(name="dbsnp",type=rodDbSNP.class),@RMD(name="hapmap-chip",type=ReferenceOrderedDatum.class), @RMD(name="interval",type=IntervalRod.class), @RMD(name="validation",type=RodGenotypeChipAsGFF.class)}) -//public class VariantEvalWalker extends RefWalker { -public class VariantEvalWalker extends RodWalker { - @Argument(shortName="minPhredConfidenceScore", doc="Minimum confidence score to consider an evaluation SNP a variant", required=false) - public double minConfidenceScore = -1.0; - - @Argument(shortName="printVariants", doc="If true, prints the variants in all of the variant tracks that are examined", required=false) - public boolean printVariants = false; - - @Argument(shortName="badHWEThreshold", doc="Only sites with deviations from Hardy-Weinberg equilibrium with P-values < than this threshold are flagged", required=false) - public double badHWEThreshold = 1e-3; - - @Argument(fullName="evalContainsGenotypes", shortName = "G", doc="If true, the input list of variants will be treated as a genotyping file, containing assertions of actual genotype values for a particular person. Analyses that only make sense on at the population level will be disabled, while those operating on genotypes will be enabled", required=false) - public boolean evalContainsGenotypes = false; - - @Argument(fullName="explode", shortName = "E", doc="Old style formatting, with each analysis split into separate files.", required=false) - public boolean explode = false; - - @Argument(fullName="includeViolations", shortName = "V", doc="If provided, violations will be written out along with summary information", required=false) - public boolean mIncludeViolations = false; - - @Argument(fullName="extensiveSubsets", shortName = "A", doc="If provided, output will be calculated over a lot of subsets, by default we only operate over all variants", required=false) - public boolean extensiveSubsets = false; - - @Argument(fullName="supressDateInformation", doc="This flag indicates that we want to suppress the date information from the output, so that if can be diff'ed against previous evals.", required=false) - public boolean supressDateInformation = false; - - @Argument(fullName="includeFilteredRecords", doc="If true, variation record with filter fields at are true will be included in the analysis", required=false) - public boolean includeFilteredRecords = false; - - - @Argument(fullName = "samplesFile", shortName="samples", doc="When running an analysis on one or more individuals with truth data, this field provides a filepath to the listing of which samples are used (and are used to name corresponding rods with -B)", required=false) - public String samplesFile = null; - - @Argument(fullName = "sampleName", shortName="sampleName", doc="When running an analysis on one or more individuals with truth data, provide this parameter to only analyze the genotype of this sample", required=false) - public String sampleName = null; - @Argument(fullName = "vcfInfoSelector", shortName="vcfInfoSelector", doc="When running an analysis on one or more individuals with truth data, provide this parameter to only analyze the genotype of this sample", required=false) - public String vcfInfoSelector = null; - - String analysisFilenameBase = null; - - final String knownSNPDBName = "dbSNP"; - final String One1KGSNPNames = "1kg"; - final String genotypeChipName = "hapmap-chip"; - - HashMap> analysisSets; - - PrintStream perLocusStream = null; - - long nMappedSites = 0; - - // the types of analysis we support, and the string tags we associate with the enumerated value - enum ANALYSIS_TYPE { - // todo -- differeniate into three classes -- snps at known snp sites, snps not at known snp site but covered by known indel, and novel - ALL_SNPS("all"), - SINGLETON_SNPS("singletons"), - TWOHIT_SNPS("2plus_hit"), - KNOWN_SNPS("known"), - SNPS_AT_NON_SNP_SITES("snp_at_known_non_snps"), - NOVEL_SNPS("novel"), - FILTERED_SNPS("filtered"); - - private final String value; - ANALYSIS_TYPE(String value) { this.value = value;} - - public String toString() { return value; } - - } - -// final ANALYSIS_TYPE[] POPULATION_ANALYSIS_NAMES = {ANALYSIS_TYPE.ALL_SNPS, -// ANALYSIS_TYPE.SINGLETON_SNPS, -// ANALYSIS_TYPE.TWOHIT_SNPS, -// ANALYSIS_TYPE.KNOWN_SNPS, -// ANALYSIS_TYPE.SNPS_AT_NON_SNP_SITES, -// ANALYSIS_TYPE.NOVEL_SNPS}; -// final ANALYSIS_TYPE[] GENOTYPE_ANALYSIS_NAMES = {ANALYSIS_TYPE.ALL_SNPS, -// ANALYSIS_TYPE.KNOWN_SNPS, -// ANALYSIS_TYPE.SNPS_AT_NON_SNP_SITES, -// ANALYSIS_TYPE.NOVEL_SNPS}; - - final ANALYSIS_TYPE[] POPULATION_ANALYSIS_NAMES = {ANALYSIS_TYPE.ALL_SNPS, - ANALYSIS_TYPE.SINGLETON_SNPS, - ANALYSIS_TYPE.TWOHIT_SNPS, - ANALYSIS_TYPE.KNOWN_SNPS, - ANALYSIS_TYPE.SNPS_AT_NON_SNP_SITES, - ANALYSIS_TYPE.NOVEL_SNPS, - ANALYSIS_TYPE.FILTERED_SNPS}; - final ANALYSIS_TYPE[] GENOTYPE_ANALYSIS_NAMES = {ANALYSIS_TYPE.ALL_SNPS, - ANALYSIS_TYPE.KNOWN_SNPS, - ANALYSIS_TYPE.SNPS_AT_NON_SNP_SITES, - ANALYSIS_TYPE.NOVEL_SNPS, - ANALYSIS_TYPE.FILTERED_SNPS}; - - final ANALYSIS_TYPE[] SIMPLE_ANALYSIS_NAMES = {ANALYSIS_TYPE.ALL_SNPS}; - ANALYSIS_TYPE[] ALL_ANALYSIS_NAMES = null; - - public void initialize() { - ALL_ANALYSIS_NAMES = SIMPLE_ANALYSIS_NAMES; - if (extensiveSubsets) - ALL_ANALYSIS_NAMES = evalContainsGenotypes ? GENOTYPE_ANALYSIS_NAMES : POPULATION_ANALYSIS_NAMES; - - // setup the path to the analysis - if (this.getToolkit().getArguments().outFileName != null) { - analysisFilenameBase = this.getToolkit().getArguments().outFileName + "."; // + ".analysis."; - } - - analysisSets = new HashMap>(); - for (ANALYSIS_TYPE setName : ALL_ANALYSIS_NAMES) { - analysisSets.put(setName, initializeAnalysisSet(setName)); - } - // THIS IS A HACK required in order to reproduce the behavior of old (and imperfect) RODIterator and - // hence to pass the integration test. The new iterator this code is now using does see ALL the SNPs, - // whether masked by overlapping indels/other events or not. - //TODO process correctly all the returned dbSNP rods at each location - //BrokenRODSimulator.attach("dbSNP"); - } - - - public long getNMappedSites() { - return nMappedSites; - } - - private ArrayList getAnalysisSet(final ANALYSIS_TYPE name) { - return analysisSets.containsKey(name) ? analysisSets.get(name) : null; - } - - private ArrayList initializeAnalysisSet(final ANALYSIS_TYPE setName) { - ArrayList analyses = new ArrayList(); - - // - // Add new analyses here! - // - analyses.add(new ValidationDataAnalysis()); - - analyses.add(new VariantCounter()); - analyses.add(new VariantDBCoverage(knownSNPDBName)); - analyses.add(new VariantDBCoverage(One1KGSNPNames)); - - if ( samplesFile != null ) { - //if ( numPeopleInPool < 1 ) - // analyses.add(new GenotypeConcordance(samplesFile, true)); - //else - analyses.add(new PooledConcordance(samplesFile, true)); - } else { - analyses.add(new GenotypeConcordance(genotypeChipName, false)); - } - - analyses.add(new TransitionTranversionAnalysis()); - analyses.add(new NeighborDistanceAnalysis()); - analyses.add(new HardyWeinbergEquilibrium(badHWEThreshold)); - analyses.add(new ClusterCounterAnalysis()); - analyses.add(new CallableBasesAnalysis()); - analyses.add(new IndelMetricsAnalysis()); - - // - // Filter out analyses inappropriate for our evaluation type Population or Genotype - // - Iterator iter = analyses.iterator(); - while (iter.hasNext()) { - VariantAnalysis analysis = iter.next(); - VariantAnalysis removed = null; - if ( evalContainsGenotypes ) { - if ( ! (analysis instanceof GenotypeAnalysis ) ) { - removed = analysis; - iter.remove(); - } - } else if ( ! (analysis instanceof PopulationAnalysis || analysis instanceof PoolAnalysis) ) { - removed = analysis; - iter.remove(); -// } else if ( numPeopleInPool > 1 && ! ( analysis instanceof PoolAnalysis ) ) { - } else if ( analysis instanceof PoolAnalysis && samplesFile == null ) { - removed = analysis; - iter.remove(); - } - - if ( removed != null ) { - logger.info(String.format("Disabling analysis %s in set %s", removed, setName)); - } - } - - - if (printVariants) analyses.add(new VariantMatcher(knownSNPDBName)); - - for (VariantAnalysis analysis : analyses) { - initializeAnalysisOutputStream(setName, analysis); - } - - return analyses; - } - - /** - * Returns the filename of the analysis output file where output for an analysis with - * - * @param name - * @param params - * - * @return - */ - public String getAnalysisFilename(final String name, final List params) { - if (analysisFilenameBase == null) - return null; - else - return analysisFilenameBase + Utils.join(".", Utils.cons(name, params)); - } - - public void initializeAnalysisOutputStream(final ANALYSIS_TYPE setName, VariantAnalysis analysis) { - final String filename = getAnalysisFilename(setName + "." + analysis.getName(), analysis.getParams()); - - try { - if (perLocusStream == null) - perLocusStream = filename == null ? out : new PrintStream(new File(analysisFilenameBase + "interesting_sites")); - } catch (FileNotFoundException e) { - throw new RuntimeException(e); - } - - if (filename == null || !explode) - analysis.initialize(this, out, perLocusStream, filename); - else { - File file = new File(filename); - try { - analysis.initialize(this, new PrintStream(new FileOutputStream(file)), perLocusStream, filename); - } catch (FileNotFoundException e) { - throw new StingException("Couldn't open analysis output file " + filename, e); - } - } - } - - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - //System.out.printf("Tracker at %s is %s, ref is %s, skip is %d mapped is %d%n", context.getLocation(), tracker, ref, context.getSkippedBases(), nMappedSites); - nMappedSites += context.getSkippedBases(); - - //System.out.printf("Tracker at %s is %s, ref is %s%n", context.getLocation(), tracker, ref); - //if ( ref == null ) - // out.printf("Last position was %s: skipping %d bases%n", - // context.getLocation(), context.getSkippedBases() ); - if ( ref == null ) { // we are seeing the last site - return 0; - } - - nMappedSites++; - - int nBoundGoodRods = tracker.getNBoundRodTracks("interval"); - if (nBoundGoodRods > 0) { - // System.out.printf("%s: n = %d%n", context.getLocation(), nBoundGoodRods ); - - // Iterate over each analysis, and update it - Variation eval = tracker.lookup("eval",Variation.class); - Variation evalForFilter = null; - - // ensure that the variation we're looking at is bi-allelic - if ( eval != null && ! eval.isBiallelic() ) - eval = null; - - if (eval != null) - if (eval.getNegLog10PError() * 10.0 < minConfidenceScore) eval = null; - - if ( eval != null && (eval instanceof RodVCF) ) { - if ( sampleName != null ) { - // code to grab a particular sample from a VCF - Variation evalOld = eval; -// System.out.printf("original is %s%n", evalOld); - eval = fakeVCFForSample((RodVCF)eval, ref, sampleName); - if ( eval != null && eval.isSNP() ) { -// System.out.printf("sample is %s%n", eval); -// System.out.printf("Replacing %s with %s%n", evalOld, eval); -// System.out.printf("%n"); - } else { - eval = null; - } - } - - if ( vcfInfoSelector != null && eval != null ) { - String[] keyValue = vcfInfoSelector.split("="); - Map map = ((RodVCF)eval).getRecord().getInfoValues(); - if ( map.containsKey(keyValue[0]) && ! Pattern.matches(keyValue[1], map.get(keyValue[0]) ) ) - eval = null; - } - - if ( eval != null && ((RodVCF)eval).mCurrentRecord.isFiltered() ) { - evalForFilter = eval; - if ( ! includeFilteredRecords ) { - eval = null; // we are not including filtered records, so set eval to null - } - //System.out.printf("Rejecting filtered record %s%n", eval); - } - } - - // update stats about all of the SNPs - updateAnalysisSet(ANALYSIS_TYPE.ALL_SNPS, eval, tracker, ref.getBase(), context); - - // update the known / novel set by checking whether the knownSNPDBName track has an entry here - if (eval != null) { - ANALYSIS_TYPE noveltySet = getNovelAnalysisType(tracker); - updateAnalysisSet(noveltySet, eval, tracker, ref.getBase(), context); - } - - if (evalForFilter != null) { - updateAnalysisSet(ANALYSIS_TYPE.FILTERED_SNPS, evalForFilter, tracker, ref.getBase(), context); - } - - - // are we a population backed call? then update - if (eval instanceof SNPCallFromGenotypes) { - SNPCallFromGenotypes call = (SNPCallFromGenotypes) eval; - int nVarGenotypes = call.nHetGenotypes() + call.nHomVarGenotypes(); - //System.out.printf("%d variant genotypes at %s%n", nVarGenotypes, calls); - final ANALYSIS_TYPE s = nVarGenotypes == 1 ? ANALYSIS_TYPE.SINGLETON_SNPS : ANALYSIS_TYPE.TWOHIT_SNPS; - updateAnalysisSet(s, eval, tracker, ref.getBase(), context); - } - } - - return 1; - } - - private RodVCF fakeVCFForSample(RodVCF eval, ReferenceContext ref, final String sampleName) { - VCFGenotypeRecord genotype = (VCFGenotypeRecord)eval.getGenotype(sampleName); - if ( genotype.getNegLog10PError() > 0 ) { - VCFRecord record = new VCFRecord(Character.toString(ref.getBase()), ref.getLocus(), "GT"); - record.setAlternateBases(eval.getRecord().getAlternateAlleles()); - record.addGenotypeRecord(genotype); - record.setQual(10*genotype.getNegLog10PError()); - record.setFilterString(eval.getFilterString()); - record.addInfoFields(eval.getInfoValues()); - return new RodVCF("fakeVCFForSample", record, eval.getReader()); - } else { - return null; - } - } - - private ANALYSIS_TYPE getNovelAnalysisType(RefMetaDataTracker tracker) { - List dbsnpList = tracker.getReferenceMetaData("dbsnp"); - - if (dbsnpList.size() == 0) - return ANALYSIS_TYPE.NOVEL_SNPS; - - for (Object d : dbsnpList) { - if (((rodDbSNP) d).isSNP()) { - return ANALYSIS_TYPE.KNOWN_SNPS; - } - } - - return ANALYSIS_TYPE.SNPS_AT_NON_SNP_SITES; - - // old and busted way of doing this -// Variation dbsnp = (Variation) BrokenRODSimulator.simulate_lookup("dbSNP", ref.getLocus(), tracker); -// -// ANALYSIS_TYPE noveltySet = null; -// if ( dbsnp == null ) noveltySet = ANALYSIS_TYPE.NOVEL_SNPS; -// else if ( dbsnp.isSNP() ) noveltySet = ANALYSIS_TYPE.KNOWN_SNPS; -// else { // if ( dbsnp.isIndel() ) -// noveltySet = ANALYSIS_TYPE.SNPS_AT_NON_SNP_SITES; - } - - public boolean includeViolations() { return mIncludeViolations; } - - public void updateAnalysisSet(final ANALYSIS_TYPE analysisSetName, Variation eval, - RefMetaDataTracker tracker, char ref, AlignmentContext context) { - // Iterate over each analysis, and update it - ArrayList set = getAnalysisSet(analysisSetName); - if ( set != null ) { - for ( VariantAnalysis analysis : set ) { - String s = analysis.update(eval, tracker, ref, context); - if ( s != null && includeViolations() ) { - analysis.getCallPrintStream().println(getLineHeader(analysisSetName, "flagged", analysis.getName()) + s); - } - } - } - } - - // Given result of map function - public Integer reduceInit() { - return 0; - } - - public Integer reduce(Integer value, Integer sum) { - return treeReduce(sum, value); - } - - public Integer treeReduce(Integer lhs, Integer rhs) { - return lhs + rhs; - } - - public void onTraversalDone(Integer result) { - for (ANALYSIS_TYPE analysisSetName : ALL_ANALYSIS_NAMES) { - printAnalysisSet(analysisSetName); - } - } - - private String getLineHeader(final ANALYSIS_TYPE analysisSetName, final String keyword, final String analysis) { - String s = Utils.join(",", Arrays.asList(analysisSetName, keyword, analysis)); - return s + Utils.dupString(' ', Math.max(50 - s.length(), 1)); - } - - private void printAnalysisSet(final ANALYSIS_TYPE analysisSetName) { - //out.printf("Writing analysis set %s", analysisSetName); - Date now = new Date(); - for (VariantAnalysis analysis : getAnalysisSet(analysisSetName)) { - String header = getLineHeader(analysisSetName, "summary", analysis.getName()); - analysis.finalize(getNMappedSites()); - PrintStream stream = analysis.getSummaryPrintStream(); - stream.printf("%s%s%n", header, Utils.dupString('-', 78)); - //stream.printf("%s Analysis set %s%n", analysisSetName, , analysisSetName); - stream.printf("%sAnalysis name %s%n", header, analysis.getName()); - stream.printf("%sAnalysis params %s%n", header, Utils.join(" ", analysis.getParams())); - stream.printf("%sAnalysis class %s%n", header, analysis.getClass().getName()); - if (!supressDateInformation) stream.printf("%sAnalysis time %s%n", header, now); - for (String line : analysis.done()) { - stream.printf("%s%s%n", header, line); - } - } - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantMatcher.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantMatcher.java deleted file mode 100755 index 19e2e2051..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantMatcher.java +++ /dev/null @@ -1,40 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.genotype.Variation; - -/** - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - */ -public class VariantMatcher extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis { - String dbName; - - public VariantMatcher(final String name) { - super("variant_matches"); - dbName = name; - } - - public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - String r = null; - - Variation db = tracker.lookup(dbName,Variation.class); - - if ( eval != null || db != null ) { - String matchFlag = " "; - if ( eval != null && db != null ) matchFlag = "*** "; - if ( eval == null && db != null ) matchFlag = ">>> "; - if ( eval != null && db == null ) matchFlag = "<<< "; - - r = String.format("%s %s: %s <=> EVAL: %s", dbName, matchFlag, db, eval); - } - return r; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ConcordanceTruthTable.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ConcordanceTruthTable.java similarity index 99% rename from java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ConcordanceTruthTable.java rename to java/src/org/broadinstitute/sting/oneoffprojects/walkers/ConcordanceTruthTable.java index 0d109c3b0..8f1b10598 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/ConcordanceTruthTable.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ConcordanceTruthTable.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; +package org.broadinstitute.sting.oneoffprojects.walkers; import org.broadinstitute.sting.utils.genotype.Genotype; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/HapmapPoolAllelicInfoWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/HapmapPoolAllelicInfoWalker.java index ad2cbeedc..39ca5f95a 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/HapmapPoolAllelicInfoWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/HapmapPoolAllelicInfoWalker.java @@ -4,7 +4,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.ConcordanceTruthTable; import org.broadinstitute.sting.playground.gatk.walkers.poolseq.PowerBelowFrequencyWalker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalPerformanceTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalPerformanceTest.java deleted file mode 100644 index ec057b7a2..000000000 --- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalPerformanceTest.java +++ /dev/null @@ -1,222 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.WalkerTest; -import org.junit.Test; - -import java.io.File; -import java.util.*; - - -/** - * - * @author aaron - * - * Class VariantEvalPerformanceTest - * - * TODO: remove me Eric - */ -public class VariantEvalPerformanceTest extends WalkerTest { - - @Test - public void testEvalVariantROD() { - HashMap md5 = new HashMap(); - md5.put("", "9cfda40f521d75a3e8bafc44a663c14a"); - md5.put("-A", "8fea7cc25f551ce170636fc35c5ae0fe"); - - /** - * the above MD5 was calculated from running the following command: - * - * java -jar ./dist/GenomeAnalysisTK.jar \ - * -R /broad/1KG/reference/human_b36_both.fasta \ - * -T VariantEval \ - * --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod \ - * -L 1:10,000,000-11,000,000 \ - * --outerr myVariantEval \ - * --supressDateInformation \ - * --rodBind eval,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls - * - */ - for ( Map.Entry e : md5.entrySet() ) { - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + oneKGLocation + "reference/human_b36_both.fasta" + - " --rodBind eval,Variants," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" + - " -T VariantEval" + - " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + - " -L 1:10,000,000-11,000,000" + - " --outerr %s" + - " --supressDateInformation " + e.getKey(), - 1, // just one output file - Arrays.asList(e.getValue())); - List result = executeTest("testEvalVariantROD", spec).getFirst(); - } - } - - @Test - public void testEvalVariantRODConfSix() { - List md5 = new ArrayList(); - md5.add("11d636d105f902680c46b9f2e330d922"); - - /** - * the above MD5 was calculated from running the following command: - * - * java -jar ./dist/GenomeAnalysisTK.jar \ - * -R /broad/1KG/reference/human_b36_both.fasta \ - * -T VariantEval \ - * --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod \ - * -L 1:10,000,000-11,000,000 \ - * --outerr myVariantEval \ - * --supressDateInformation \ - * --rodBind eval,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls \ - * -minConfidenceScore 6 - */ - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + oneKGLocation + "reference/human_b36_both.fasta" + - " --rodBind eval,Variants," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" + - " -T VariantEval" + - " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + - " -L 1:10,000,000-11,000,000" + - " --outerr %s" + - " --supressDateInformation" + - " -minPhredConfidenceScore 60", - 1, // just one output file - md5); - List result = executeTest("testEvalVariantRODConfSixty", spec).getFirst(); - } - - @Test - public void testEvalVariantRODOutputViolations() { - List md5 = new ArrayList(); - md5.add("12ecd457d62329e9d4e593de904a457d"); - - /** - * the above MD5 was calculated from running the following command: - * - * java -jar ./dist/GenomeAnalysisTK.jar \ - * -R /broad/1KG/reference/human_b36_both.fasta \ - * -T VariantEval \ - * --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod \ - * -L 1:10,000,000-11,000,000 \ - * --outerr myVariantEval \ - * --supressDateInformation \ - * --rodBind eval,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls \ - * --includeViolations - */ - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + oneKGLocation + "reference/human_b36_both.fasta" + - " --rodBind eval,Variants," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" + - " -T VariantEval" + - " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + - " -L 1:10,000,000-11,000,000" + - " --outerr %s" + - " --supressDateInformation" + - " --includeViolations", - 1, // just one output file - md5); - List result = executeTest("testEvalVariantRODOutputViolations", spec).getFirst(); - } - - @Test - public void testEvalGenotypeROD() { - List md5 = new ArrayList(); - md5.add("6ed44fd586c89dafd40cb8e0194dc456"); - /** - * the above MD5 was calculated after running the following command: - * - * java -jar ./dist/GenomeAnalysisTK.jar \ - * -R /broad/1KG/reference/human_b36_both.fasta \ - * -T VariantEval \ - * --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod \ - * -L 1:10,000,000-11,000,000 \ - * --outerr myVariantEval \ - * --supressDateInformation \ - * --evalContainsGenotypes \ - * --rodBind eval,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls \ - * --rodBind hapmap-chip,GFF,/humgen/gsa-scr1/GATK_Data/1KG_gffs/NA12878.1kg.gff - */ - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + oneKGLocation + "reference/human_b36_both.fasta" + - " --rodBind eval,Variants," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" + - " -T VariantEval" + - " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + - " -L 1:10,000,000-11,000,000" + - " --outerr %s" + - " --supressDateInformation" + - " --evalContainsGenotypes" + - " --rodBind hapmap-chip,GFF,/humgen/gsa-scr1/GATK_Data/1KG_gffs/NA12878.1kg.gff", - 1, // just one output file - md5); - List result = executeTest("testEvalGenotypeROD", spec).getFirst(); - } - - @Test - public void testEvalMarksGenotypingExample() { - List md5 = new ArrayList(); - md5.add("c0396cfe89a63948aebbbae0a0e06678"); - /** - * Run with the following commands: - * - * java -Xmx2048m -jar /humgen/gsa-hphome1/depristo/dev/GenomeAnalysisTK/trunk/dist/GenomeAnalysisTK.jar - * -T VariantEval -R /broad/1KG/reference/human_b36_both.fasta -l INFO - * -B eval,Variants,/humgen/gsa-scr1/ebanks/concordanceForMark/UMichVsBroad.venn.set1Only.calls - * -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -hc /humgen/gsa-scr1/GATK_Data/1KG_gffs/NA12878.1kg.gff - * -G -L 1 -o /humgen/gsa-scr1/ebanks/concordanceForMark/UMichVsBroad.venn.set1Only.calls.eval - */ - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T VariantEval -R " + oneKGLocation + "reference/human_b36_both.fasta " + - "-B eval,Variants," + validationDataLocation + "UMichVsBroad.venn.set1Only.calls " + - "-D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -hc /humgen/gsa-scr1/GATK_Data/1KG_gffs/NA12878.1kg.gff " + - "-G " + - "--supressDateInformation " + - "-L 1:1-10,000,000 " + - "--outerr %s", - 1, // just one output file - md5); - List result = executeTest("testEvalMarksGenotypingExample", spec).getFirst(); - } - - @Test - public void testEvalRuntimeWithLotsOfIntervals() { - List md5 = new ArrayList(); - md5.add("6a90341517fc3c5026529301d9970c7b"); - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T VariantEval -R " + oneKGLocation + "reference/human_b36_both.fasta " + - "-B eval,Variants," + validationDataLocation + "NA12878.pilot_3.all.geli.calls " + - "-D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod " + - "--supressDateInformation " + - "-L /humgen/gsa-scr1/GATK_Data/thousand_genomes_alpha_redesign.targets.b36.interval_list " + - "--outerr %s", - 1, // just one output file - md5); - List result = executeTest("testEvalRuntimeWithLotsOfIntervals", spec).getFirst(); - } - - @Test - public void testVCFVariantEvals() { - HashMap md5 = new HashMap(); - md5.put("", "ee6b096169d6c5e2ce49d394fbec799b"); - md5.put("-A", "a443193c0810363f85278b1cfaed2fff"); - md5.put("-A --includeFilteredRecords", "812d7f2ecac28b1be7e7028af17df9c0"); - md5.put("-A --sampleName NA12878", "a443193c0810363f85278b1cfaed2fff"); - md5.put("-A -vcfInfoSelector AF=0.50", "afed4bf0c9f11b86f6e5356012f9cf2d"); - - for ( Map.Entry e : md5.entrySet() ) { - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + oneKGLocation + "reference/human_b36_both.fasta" + - " --rodBind eval,VCF," + validationDataLocation + "NA12878.example1.vcf" + - " -T VariantEval" + - " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + - " -hc /humgen/gsa-scr1/GATK_Data/1KG_gffs/NA12878.1kg.gff" + - " -G" + - " -L 1:1-10,000" + - " --outerr %s" + - " --supressDateInformation " + e.getKey(), - 1, // just one output file - Arrays.asList(e.getValue())); - List result = executeTest("testVCFVariantEvals", spec).getFirst(); - } - } - - -} diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java deleted file mode 100644 index ab33b4307..000000000 --- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java +++ /dev/null @@ -1,221 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.WalkerTest; -import org.junit.Test; - -import java.io.File; -import java.util.*; - -/** - * @author aaron - *

- * Class VariantEvalWalkerTest - *

- * test out the variant eval walker under different runtime conditions. - */ -public class VariantEvalWalkerIntegrationTest extends WalkerTest { - - @Test - public void testEvalVariantROD() { - HashMap md5 = new HashMap(); - md5.put("", "9cfda40f521d75a3e8bafc44a663c14a"); - md5.put("-A", "8fea7cc25f551ce170636fc35c5ae0fe"); - - /** - * the above MD5 was calculated from running the following command: - * - * java -jar ./dist/GenomeAnalysisTK.jar \ - * -R /broad/1KG/reference/human_b36_both.fasta \ - * -T VariantEval \ - * --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod \ - * -L 1:10,000,000-11,000,000 \ - * --outerr myVariantEval \ - * --supressDateInformation \ - * --rodBind eval,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls - * - */ - for ( Map.Entry e : md5.entrySet() ) { - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + oneKGLocation + "reference/human_b36_both.fasta" + - " --rodBind eval,Variants," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" + - " -T VariantEval" + - " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + - " -L 1:10,000,000-11,000,000" + - " --outerr %s" + - " --supressDateInformation " + e.getKey(), - 1, // just one output file - Arrays.asList(e.getValue())); - List result = executeTest("testEvalVariantROD", spec).getFirst(); - } - } - - @Test - public void testEvalVariantRODConfSix() { - List md5 = new ArrayList(); - md5.add("11d636d105f902680c46b9f2e330d922"); - - /** - * the above MD5 was calculated from running the following command: - * - * java -jar ./dist/GenomeAnalysisTK.jar \ - * -R /broad/1KG/reference/human_b36_both.fasta \ - * -T VariantEval \ - * --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod \ - * -L 1:10,000,000-11,000,000 \ - * --outerr myVariantEval \ - * --supressDateInformation \ - * --rodBind eval,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls \ - * -minConfidenceScore 6 - */ - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + oneKGLocation + "reference/human_b36_both.fasta" + - " --rodBind eval,Variants," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" + - " -T VariantEval" + - " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + - " -L 1:10,000,000-11,000,000" + - " --outerr %s" + - " --supressDateInformation" + - " -minPhredConfidenceScore 60", - 1, // just one output file - md5); - List result = executeTest("testEvalVariantRODConfSixty", spec).getFirst(); - } - - @Test - public void testEvalVariantRODOutputViolations() { - List md5 = new ArrayList(); - md5.add("12ecd457d62329e9d4e593de904a457d"); - - /** - * the above MD5 was calculated from running the following command: - * - * java -jar ./dist/GenomeAnalysisTK.jar \ - * -R /broad/1KG/reference/human_b36_both.fasta \ - * -T VariantEval \ - * --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod \ - * -L 1:10,000,000-11,000,000 \ - * --outerr myVariantEval \ - * --supressDateInformation \ - * --rodBind eval,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls \ - * --includeViolations - */ - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + oneKGLocation + "reference/human_b36_both.fasta" + - " --rodBind eval,Variants," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" + - " -T VariantEval" + - " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + - " -L 1:10,000,000-11,000,000" + - " --outerr %s" + - " --supressDateInformation" + - " --includeViolations", - 1, // just one output file - md5); - List result = executeTest("testEvalVariantRODOutputViolations", spec).getFirst(); - } - - @Test - public void testEvalGenotypeROD() { - List md5 = new ArrayList(); - md5.add("6ed44fd586c89dafd40cb8e0194dc456"); - /** - * the above MD5 was calculated after running the following command: - * - * java -jar ./dist/GenomeAnalysisTK.jar \ - * -R /broad/1KG/reference/human_b36_both.fasta \ - * -T VariantEval \ - * --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod \ - * -L 1:10,000,000-11,000,000 \ - * --outerr myVariantEval \ - * --supressDateInformation \ - * --evalContainsGenotypes \ - * --rodBind eval,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls \ - * --rodBind hapmap-chip,GFF,/humgen/gsa-scr1/GATK_Data/1KG_gffs/NA12878.1kg.gff - */ - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + oneKGLocation + "reference/human_b36_both.fasta" + - " --rodBind eval,Variants," + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" + - " -T VariantEval" + - " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + - " -L 1:10,000,000-11,000,000" + - " --outerr %s" + - " --supressDateInformation" + - " --evalContainsGenotypes" + - " --rodBind hapmap-chip,GFF,/humgen/gsa-scr1/GATK_Data/1KG_gffs/NA12878.1kg.gff", - 1, // just one output file - md5); - List result = executeTest("testEvalGenotypeROD", spec).getFirst(); - } - - @Test - public void testEvalMarksGenotypingExample() { - List md5 = new ArrayList(); - md5.add("c0396cfe89a63948aebbbae0a0e06678"); - /** - * Run with the following commands: - * - * java -Xmx2048m -jar /humgen/gsa-hphome1/depristo/dev/GenomeAnalysisTK/trunk/dist/GenomeAnalysisTK.jar - * -T VariantEval -R /broad/1KG/reference/human_b36_both.fasta -l INFO - * -B eval,Variants,/humgen/gsa-scr1/ebanks/concordanceForMark/UMichVsBroad.venn.set1Only.calls - * -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -hc /humgen/gsa-scr1/GATK_Data/1KG_gffs/NA12878.1kg.gff - * -G -L 1 -o /humgen/gsa-scr1/ebanks/concordanceForMark/UMichVsBroad.venn.set1Only.calls.eval - */ - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T VariantEval -R " + oneKGLocation + "reference/human_b36_both.fasta " + - "-B eval,Variants," + validationDataLocation + "UMichVsBroad.venn.set1Only.calls " + - "-D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -hc /humgen/gsa-scr1/GATK_Data/1KG_gffs/NA12878.1kg.gff " + - "-G " + - "--supressDateInformation " + - "-L 1:1-10,000,000 " + - "--outerr %s", - 1, // just one output file - md5); - List result = executeTest("testEvalMarksGenotypingExample", spec).getFirst(); - } - - @Test - public void testEvalRuntimeWithLotsOfIntervals() { - List md5 = new ArrayList(); - md5.add("6a90341517fc3c5026529301d9970c7b"); - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T VariantEval -R " + oneKGLocation + "reference/human_b36_both.fasta " + - "-B eval,Variants," + validationDataLocation + "NA12878.pilot_3.all.geli.calls " + - "-D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod " + - "--supressDateInformation " + - "-L /humgen/gsa-scr1/GATK_Data/thousand_genomes_alpha_redesign.targets.b36.interval_list " + - "--outerr %s", - 1, // just one output file - md5); - List result = executeTest("testEvalRuntimeWithLotsOfIntervals", spec).getFirst(); - } - - @Test - public void testVCFVariantEvals() { - HashMap md5 = new HashMap(); - md5.put("", "ee6b096169d6c5e2ce49d394fbec799b"); - md5.put("-A", "a443193c0810363f85278b1cfaed2fff"); - md5.put("-A --includeFilteredRecords", "812d7f2ecac28b1be7e7028af17df9c0"); - md5.put("-A --sampleName NA12878", "a443193c0810363f85278b1cfaed2fff"); - md5.put("-A -vcfInfoSelector AF=0.50", "afed4bf0c9f11b86f6e5356012f9cf2d"); - - for ( Map.Entry e : md5.entrySet() ) { - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + oneKGLocation + "reference/human_b36_both.fasta" + - " --rodBind eval,VCF," + validationDataLocation + "NA12878.example1.vcf" + - " -T VariantEval" + - " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + - " -hc /humgen/gsa-scr1/GATK_Data/1KG_gffs/NA12878.1kg.gff" + - " -G" + - " -L 1:1-10,000" + - " --outerr %s" + - " --supressDateInformation " + e.getKey(), - 1, // just one output file - Arrays.asList(e.getValue())); - List result = executeTest("testVCFVariantEvals", spec).getFirst(); - } - } - - -} -