From af6d0003f8abe34dd606654b6bc473c198c7c1c6 Mon Sep 17 00:00:00 2001 From: ebanks Date: Sun, 1 Nov 2009 05:35:47 +0000 Subject: [PATCH] -Generalized the GenotypeConcordance module to deal with any number of individuals (although it will default to its old behavior if the -samples argument is left out). -Make rods return the appropriate type of Genotype calls from getGenotype(). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1954 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/refdata/RodGeliText.java | 6 +- .../sting/gatk/refdata/RodVCF.java | 11 +- .../varianteval/GenotypeConcordance.java | 280 +++++++++++------- .../PooledGenotypeConcordance.java | 2 +- .../varianteval/VariantEvalWalker.java | 8 +- .../utils/genotype/geli/GeliGenotypeCall.java | 30 +- .../utils/genotype/vcf/VCFGenotypeCall.java | 30 +- .../VariantEvalWalkerIntegrationTest.java | 4 +- 8 files changed, 240 insertions(+), 131 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodGeliText.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodGeliText.java index 1630300aa..9fa1a2bf7 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodGeliText.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodGeliText.java @@ -28,10 +28,10 @@ package org.broadinstitute.sting.gatk.refdata; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.genotype.BasicGenotype; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; +import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeCall; import java.io.IOException; import java.util.ArrayList; @@ -366,7 +366,7 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation */ @Override public Genotype getCalledGenotype() { - return new BasicGenotype(getLocation(), bestGenotype, refBase, lodBtnb); + return new GeliGenotypeCall(refBase, getLocation(), bestGenotype, lodBtnb); } /** @@ -377,7 +377,7 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation @Override public List getGenotypes() { List ret = new ArrayList(); - ret.add(new BasicGenotype(getLocation(), bestGenotype, refBase, lodBtnb)); + ret.add(new GeliGenotypeCall(refBase, getLocation(), bestGenotype, lodBtnb)); return ret; } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java index 9299f2a7b..6026a83f0 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java @@ -8,10 +8,7 @@ import org.broadinstitute.sting.utils.genotype.BasicGenotype; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; -import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; +import org.broadinstitute.sting.utils.genotype.vcf.*; import java.io.File; import java.io.FileNotFoundException; @@ -291,7 +288,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, qual = refQual; else if (lst.get(0).getFields().containsKey("GQ")) qual = Double.valueOf(lst.get(0).getFields().get("GQ")) / 10.0; - return new BasicGenotype(this.getLocation(), Utils.join("", lst.get(0).getAlleles()), this.getReference().charAt(0), qual); + return new VCFGenotypeCall(this.getReference().charAt(0), this.getLocation(), Utils.join("", lst.get(0).getAlleles()), qual, lst.get(0).getSampleName()); } return null; } @@ -315,7 +312,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, qual = refQual; else if (rec.getFields().containsKey("GQ")) qual = Double.valueOf(rec.getFields().get("GQ")) / 10.0; - genotypes.add(new BasicGenotype(this.getLocation(), Utils.join("", rec.getAlleles()), this.getReference().charAt(0), qual)); + genotypes.add(new VCFGenotypeCall(this.getReference().charAt(0), this.getLocation(), Utils.join("", rec.getAlleles()), qual, rec.getSampleName())); } return genotypes; } @@ -335,7 +332,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, qual = this.getNegLog10PError(); else if (record.getFields().containsKey("GQ")) qual = Double.valueOf(record.getFields().get("GQ")) / 10.0; - return new BasicGenotype(this.getLocation(), Utils.join("", record.getAlleles()), this.getReference().charAt(0), qual); + return new VCFGenotypeCall(this.getReference().charAt(0), this.getLocation(), Utils.join("", record.getAlleles()), qual, record.getSampleName()); } } return null; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java index 4f43c4bfe..853df213f 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java @@ -2,14 +2,13 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.RODRecordList; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.Genotype; -import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; -import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.utils.genotype.*; -import java.util.ArrayList; -import java.util.List; +import java.util.*; /** * The Broad Institute @@ -21,7 +20,6 @@ import java.util.List; * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. */ public class GenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis { - private String dbName; private static final int REF = 0; private static final int VAR_HET = 1; @@ -31,138 +29,109 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp private static final String[] TRUTH_NAMES = {"IS_REF", "IS_VAR_HET", "IS_VAR_HOM", "UNKNOWN"}; private static final String[] CALL_NAMES = {"CALLED_REF", "CALLED_VAR_HET", "CALLED_VAR_HOM", "NO_CALL"}; - private int[][] table = new int[4][4]; - private int[] truth_totals = new int[4]; - private int[] calls_totals = new int[4]; + private ArrayList rodNames = new ArrayList(); + private HashMap tables = new HashMap(); public GenotypeConcordance(final String name) { super("genotype_concordance"); - dbName = name; - for (int i = 0; i < 4; i++) { - truth_totals[i] = 0; - calls_totals[i] = 0; - for (int j = 0; j < 4; j++) - table[i][j] = 0; - } + rodNames.add(name); + initialize(); } - public void inc(Variation chip, Variation eval, char ref) { - if ((chip != null && !(chip instanceof VariantBackedByGenotype) || (eval != null && !(eval instanceof VariantBackedByGenotype)))) + public GenotypeConcordance(final List names) { + super("genotype_concordance"); + rodNames.addAll(names); + initialize(); + } + + private void initialize() { + for ( String name : rodNames ) + tables.put(name, new TruthTable()); + } + + public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { + // get all of the chip rods at this locus + HashMap chips = new HashMap(); + for ( String name : rodNames ) { + RODRecordList rods = tracker.getTrackData(name, null); + Variation chip = (rods == null ? null : (Variation)rods.getRecords().get(0)); + if ( chip != null ) { + if ( !(chip instanceof VariantBackedByGenotype) ) + throw new StingException("Failure: trying to analyze genotypes using non-genotype truth data"); + chips.put(name, ((VariantBackedByGenotype)chip).getCalledGenotype()); + } + } + + if ( eval != null && !(eval instanceof VariantBackedByGenotype) ) throw new StingException("Failure: trying to analyze genotypes of non-genotype data"); + // don't procede if we have no truth data and no call + if ( eval != null || chips.size() > 0 ) + inc(chips, (eval != null ? ((VariantBackedByGenotype)eval).getGenotypes() : new ArrayList()), ref); + return null; + } + + public void inc(Map chips, List evals, char ref) { + // This shouldn't happen, but let's check anyways if (BaseUtils.simpleBaseToBaseIndex(ref) == -1) return; - // get the genotyping data - Genotype chipGenotype = null; - Genotype evalGenotype = null; - if (chip != null) chipGenotype = ((VariantBackedByGenotype)chip).getCalledGenotype(); - if (eval != null) evalGenotype = ((VariantBackedByGenotype)eval).getCalledGenotype(); + HashMap evalHash = makeGenotypeHash(evals); - int truthIndex, callIndex; - if (chip == null) - truthIndex = UNKNOWN; - else if (!chipGenotype.isVariant(ref)) - truthIndex = REF; - else if (chipGenotype.isHet()) - truthIndex = VAR_HET; - else - truthIndex = VAR_HOM; + for ( String name : rodNames ) { + Genotype chip = chips.get(name); + Genotype eval = evalHash.get(name); - if (eval == null) - callIndex = NO_CALL; - else if (!evalGenotype.isVariant(ref)) - callIndex = REF; - else if (evalGenotype.isHet()) - callIndex = VAR_HET; - else - callIndex = VAR_HOM; + if (chip == null && eval == null) + continue; + + int truthType = getGenotypeType(chip, ref); + int callType = getGenotypeType(eval, ref); + TruthTable table = tables.get(name); - if (chip != null || eval != null) { //System.out.printf("TEST: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval); - table[truthIndex][callIndex]++; - truth_totals[truthIndex]++; - if (callIndex != NO_CALL) calls_totals[callIndex]++; + table.addEntry(truthType, callType); } } - public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - Variation chip = (Variation) tracker.lookup(dbName, null); - if (eval != null || chip != null) - inc(chip, eval, ref); - return null; - } - - private void addCalledGenotypeConcordance(List s) { - StringBuilder sb = new StringBuilder(); - sb.append("CALLED_GENOTYPE_CONCORDANCE\t"); - for (int i = 0; i < 4; i++) { - int nConcordantCallsI = table[i][i]; - String value = "N/A"; - if (i != UNKNOWN) - value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i] - table[UNKNOWN][i])); - sb.append(value); + private HashMap makeGenotypeHash(List evals) { + HashMap hash = new HashMap(); + for ( Genotype eval : evals ) { + if ( eval instanceof SampleBacked ) + hash.put(((SampleBacked)eval).getSampleName(), eval); + else if ( rodNames.size() == 1 ) + hash.put(rodNames.get(0), eval); + else + throw new StingException("Genotype data has no associated samples but are multi-sample...?"); } - s.add(sb.toString()); + return hash; } - /** How many overall calls where made that aren't NO_CALLS or UNKNOWNS? */ - private int getNCalled() { - int n = 0; - for (int i = 0; i < 4; i++) - for (int j = 0; j < 4; j++) - if (i != NO_CALL && j != NO_CALL) n += table[i][j]; - return n; - } + private static int getGenotypeType(Genotype g, char ref) { + int type; - private void addOverallStats(List s) { - int nConcordantRefCalls = table[REF][REF]; - int nConcordantHetCalls = table[VAR_HET][VAR_HET]; - int nConcordantVarHomCalls = table[VAR_HOM][VAR_HOM]; - int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM]; - int nConcordantVarCalls = nConcordantHetCalls + nConcordantVarHomCalls; - int nConcordantCalls = nConcordantRefCalls + nConcordantVarCalls; - int nTrueVar = truth_totals[VAR_HET] + truth_totals[VAR_HOM]; - int nCalled = getNCalled(); - s.add(String.format("VARIANT_SENSITIVITY %s", cellPercent(nVarCalls, nTrueVar))); - s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls))); - s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled))); + if ( g == null ) + type = NO_CALL; + else if ( !g.isVariant(ref) ) + type = REF; + else if ( g.isHet() ) + type = VAR_HET; + else + type = VAR_HOM; + + return type; } public List done() { List s = new ArrayList(); - s.add(String.format("name %s", dbName)); - s.add(String.format("TRUTH_STATE\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY")); - for (int i = 0; i < 4; i++) { - StringBuffer sb = new StringBuffer(); - sb.append(String.format("%15s ", TRUTH_NAMES[i])); - for (int j = 0; j < 4; j++) - sb.append(String.format("%9d ", table[i][j])); - sb.append(String.format("%9d ", truth_totals[i])); - if (i == VAR_HET || i == VAR_HOM) { - sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM]))); - sb.append(String.format("%s", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i]))); - } else { - sb.append("\tN/A\t\t\tN/A"); - } - s.add(sb.toString()); + + for ( String name : rodNames ) { + TruthTable table = tables.get(name); + table.addAllStats(s, name); + s.add(""); } - addCalledGenotypeConcordance(s); - addOverallStats(s); - - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - s.add(String.format("%s_%s_%s %d", TRUTH_NAMES[i], CALL_NAMES[j], "NO_SITES", table[i][j])); - s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_TRUTH", cellPercent(table[i][j], truth_totals[i]))); - s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_CALLS", cellPercent(table[i][j], calls_totals[j]))); - } - if (i == VAR_HET || i == VAR_HOM) { - s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM]))); - s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i]))); - } - } return s; } @@ -173,4 +142,95 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp sb.append("%"); return sb.toString(); } + + private class TruthTable { + int[][] table = new int[4][4]; + int[] truth_totals = new int[4]; + int[] calls_totals = new int[4]; + + public TruthTable() { + for (int i = 0; i < 4; i++) { + truth_totals[i] = 0; + calls_totals[i] = 0; + for (int j = 0; j < 4; j++) + table[i][j] = 0; + } + } + + public void addEntry(int truthIndex, int callIndex) { + table[truthIndex][callIndex]++; + truth_totals[truthIndex]++; + calls_totals[callIndex]++; + } + + public void addAllStats(List s, String name) { + s.add(String.format("name %s", name)); + s.add(String.format("TRUTH_STATE\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY")); + for (int i = 0; i < 4; i++) { + StringBuffer sb = new StringBuffer(); + sb.append(String.format("%15s ", TRUTH_NAMES[i])); + for (int j = 0; j < 4; j++) + sb.append(String.format("%9d ", table[i][j])); + sb.append(String.format("%9d ", truth_totals[i])); + if (i == VAR_HET || i == VAR_HOM) { + sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM]))); + sb.append(String.format("%s", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i]))); + } else { + sb.append("\tN/A\t\t\tN/A"); + } + s.add(sb.toString()); + } + + addCalledGenotypeConcordance(s); + addOverallStats(s); + + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + s.add(String.format("%s_%s_%s %d", TRUTH_NAMES[i], CALL_NAMES[j], "NO_SITES", table[i][j])); + s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_TRUTH", cellPercent(table[i][j], truth_totals[i]))); + s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_CALLS", cellPercent(table[i][j], calls_totals[j]))); + } + if (i == VAR_HET || i == VAR_HOM) { + s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM]))); + s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i]))); + } + } + } + + private void addCalledGenotypeConcordance(List s) { + StringBuilder sb = new StringBuilder(); + sb.append("CALLED_GENOTYPE_CONCORDANCE\t"); + for (int i = 0; i < 4; i++) { + int nConcordantCallsI = table[i][i]; + String value = "N/A"; + if (i != UNKNOWN) + value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i] - table[UNKNOWN][i])); + sb.append(value); + } + s.add(sb.toString()); + } + + // How many overall calls where made that aren't NO_CALLS or UNKNOWNS? + private int getNCalled() { + int n = 0; + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + if (i != NO_CALL && j != NO_CALL) n += table[i][j]; + return n; + } + + private void addOverallStats(List s) { + int nConcordantRefCalls = table[REF][REF]; + int nConcordantHetCalls = table[VAR_HET][VAR_HET]; + int nConcordantVarHomCalls = table[VAR_HOM][VAR_HOM]; + int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM]; + int nConcordantVarCalls = nConcordantHetCalls + nConcordantVarHomCalls; + int nConcordantCalls = nConcordantRefCalls + nConcordantVarCalls; + int nTrueVar = truth_totals[VAR_HET] + truth_totals[VAR_HOM]; + int nCalled = getNCalled(); + s.add(String.format("VARIANT_SENSITIVITY %s", cellPercent(nVarCalls, nTrueVar))); + s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls))); + s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled))); + } + } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java index 3b4d4210b..009f22251 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java @@ -17,7 +17,7 @@ import java.util.*; * To change this template use File | Settings | File Templates. */ -public class PooledGenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis { +public class PooledGenotypeConcordance extends BasicVariantAnalysis implements PoolAnalysis, GenotypeAnalysis { private PooledConcordanceTable table; private String[] hapmapNames; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java index 663670410..9e61a53c0 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java @@ -56,8 +56,8 @@ public class VariantEvalWalker extends RodWalker { @Argument(fullName = "numPeopleInPool", shortName="PS", doc="If using a variant file from a pooled caller, this field provides the number of individuals in each pool", required=false) public int numPeopleInPool = -1; - @Argument(fullName = "pathToHapmapPoolFile", shortName="HPF", doc="If using a variant file from a pooled caller on pools of hapmap individuals, this field provides a filepath to the pool construction file listing which hapmap individuals are in which pool", required=false) - public String pathToHapmapPoolFile = null; + @Argument(fullName = "samplesFile", shortName="samples", doc="When running an analysis on a number of individuals with truth data, this field provides a filepath to the listing of which samples are used (and are used to name corresponding rods with -B)", required=false) + public String samplesFile = null; String analysisFilenameBase = null; @@ -129,7 +129,7 @@ public class VariantEvalWalker extends RodWalker { // Add new analyses here! // analyses.add(new ValidationDataAnalysis()); - analyses.add(new PooledGenotypeConcordance(pathToHapmapPoolFile)); + analyses.add(new PooledGenotypeConcordance(samplesFile)); analyses.add(new VariantCounter()); analyses.add(new VariantDBCoverage(knownSNPDBName)); analyses.add(new PooledFrequencyAnalysis(numPeopleInPool,knownSNPDBName)); @@ -149,7 +149,7 @@ public class VariantEvalWalker extends RodWalker { VariantAnalysis analysis = iter.next(); boolean disableForGenotyping = evalContainsGenotypes && !(analysis instanceof GenotypeAnalysis); boolean disableForPopulation = !evalContainsGenotypes && !(analysis instanceof PopulationAnalysis); - boolean disableForPools = (pathToHapmapPoolFile == null && analysis instanceof PooledGenotypeConcordance) || (numPeopleInPool < 1 && analysis instanceof PoolAnalysis); + boolean disableForPools = numPeopleInPool < 1 && analysis instanceof PoolAnalysis; boolean disable = disableForGenotyping | disableForPopulation | disableForPools; String causeName = disableForGenotyping ? "population" : (disableForPopulation ? "genotype" : (disableForPools ? "pool" : null)); if (disable) { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java index d4422378f..150fdf4e7 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java @@ -1,8 +1,7 @@ package org.broadinstitute.sting.utils.genotype.geli; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.genotype.*; import java.util.List; @@ -44,6 +43,33 @@ public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked mReads = new ArrayList(); } + public GeliGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError) { + mRefBase = ref; + mLocation = loc; + mBestGenotype = DiploidGenotype.valueOf(genotype); + mRefGenotype = DiploidGenotype.createHomGenotype(ref); + mNextGenotype = mRefGenotype; + + // set general posteriors to min double value + mPosteriors = new double[10]; + Arrays.fill(mPosteriors, Double.MIN_VALUE); + + // set the ref to PError + mPosteriors[mRefGenotype.ordinal()] = -1.0 * negLog10PError; + + // set the best genotype to zero (need to do this after ref in case ref=best) + mPosteriors[mBestGenotype.ordinal()] = 0.0; + + // choose a smart next best genotype and set it to PError + if ( mBestGenotype == mRefGenotype ) + mNextGenotype = DiploidGenotype.valueOf(BaseUtils.simpleComplement(genotype)); + else + mNextGenotype = mRefGenotype; + mPosteriors[mNextGenotype.ordinal()] = -1.0 * negLog10PError; + + mReads = new ArrayList(); + } + public void setReads(List reads) { mReads = new ArrayList(reads); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java index 0c6fc91fd..e3d837f26 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java @@ -1,8 +1,7 @@ package org.broadinstitute.sting.utils.genotype.vcf; import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.genotype.*; import java.util.Arrays; @@ -48,6 +47,33 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked, mReads = new ArrayList(); } + public VCFGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError, String sample) { + mRefBase = ref; + mLocation = loc; + mBestGenotype = DiploidGenotype.valueOf(genotype); + mRefGenotype = DiploidGenotype.createHomGenotype(ref); + mSampleName = sample; + + // set general posteriors to min double value + mPosteriors = new double[10]; + Arrays.fill(mPosteriors, Double.MIN_VALUE); + + // set the ref to PError + mPosteriors[mRefGenotype.ordinal()] = -1.0 * negLog10PError; + + // set the best genotype to zero (need to do this after ref in case ref=best) + mPosteriors[mBestGenotype.ordinal()] = 0.0; + + // choose a smart next best genotype and set it to PError + if ( mBestGenotype == mRefGenotype ) + mNextGenotype = DiploidGenotype.valueOf(BaseUtils.simpleComplement(genotype)); + else + mNextGenotype = mRefGenotype; + mPosteriors[mNextGenotype.ordinal()] = -1.0 * negLog10PError; + + mReads = new ArrayList(); + } + public void setReads(List reads) { mReads = new ArrayList(reads); } diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java index fe742ec84..316c5c98d 100644 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalkerIntegrationTest.java @@ -116,7 +116,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalGenotypeROD() { List md5 = new ArrayList(); - md5.add("46c381dad05310267cdfd409228d3692"); + md5.add("87ec21995ab4227404da1dc144c1f4c7"); /** * the above MD5 was calculated after running the following command: * @@ -150,7 +150,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest { @Test public void testEvalMarksGenotypingExample() { List md5 = new ArrayList(); - md5.add("cb914e7b65e6561685cccf0cd2cc5dfb"); + md5.add("e9aebff0e3ccba7ba6f64433ba7d3533"); /** * Run with the following commands: *