-Generalized the GenotypeConcordance module to deal with any number of individuals (although it will default to its old behavior if the -samples argument is left out).
-Make rods return the appropriate type of Genotype calls from getGenotype(). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1954 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b95165e39c
commit
af6d0003f8
|
|
@ -28,10 +28,10 @@ package org.broadinstitute.sting.gatk.refdata;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeCall;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -366,7 +366,7 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
|
|||
*/
|
||||
@Override
|
||||
public Genotype getCalledGenotype() {
|
||||
return new BasicGenotype(getLocation(), bestGenotype, refBase, lodBtnb);
|
||||
return new GeliGenotypeCall(refBase, getLocation(), bestGenotype, lodBtnb);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -377,7 +377,7 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
|
|||
@Override
|
||||
public List<Genotype> getGenotypes() {
|
||||
List<Genotype> ret = new ArrayList<Genotype>();
|
||||
ret.add(new BasicGenotype(getLocation(), bestGenotype, refBase, lodBtnb));
|
||||
ret.add(new GeliGenotypeCall(refBase, getLocation(), bestGenotype, lodBtnb));
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,10 +8,7 @@ import org.broadinstitute.sting.utils.genotype.BasicGenotype;
|
|||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
@ -291,7 +288,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
|||
qual = refQual;
|
||||
else if (lst.get(0).getFields().containsKey("GQ"))
|
||||
qual = Double.valueOf(lst.get(0).getFields().get("GQ")) / 10.0;
|
||||
return new BasicGenotype(this.getLocation(), Utils.join("", lst.get(0).getAlleles()), this.getReference().charAt(0), qual);
|
||||
return new VCFGenotypeCall(this.getReference().charAt(0), this.getLocation(), Utils.join("", lst.get(0).getAlleles()), qual, lst.get(0).getSampleName());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
|
@ -315,7 +312,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
|||
qual = refQual;
|
||||
else if (rec.getFields().containsKey("GQ"))
|
||||
qual = Double.valueOf(rec.getFields().get("GQ")) / 10.0;
|
||||
genotypes.add(new BasicGenotype(this.getLocation(), Utils.join("", rec.getAlleles()), this.getReference().charAt(0), qual));
|
||||
genotypes.add(new VCFGenotypeCall(this.getReference().charAt(0), this.getLocation(), Utils.join("", rec.getAlleles()), qual, rec.getSampleName()));
|
||||
}
|
||||
return genotypes;
|
||||
}
|
||||
|
|
@ -335,7 +332,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
|||
qual = this.getNegLog10PError();
|
||||
else if (record.getFields().containsKey("GQ"))
|
||||
qual = Double.valueOf(record.getFields().get("GQ")) / 10.0;
|
||||
return new BasicGenotype(this.getLocation(), Utils.join("", record.getAlleles()), this.getReference().charAt(0), qual);
|
||||
return new VCFGenotypeCall(this.getReference().charAt(0), this.getLocation(), Utils.join("", record.getAlleles()), qual, record.getSampleName());
|
||||
}
|
||||
}
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -2,14 +2,13 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
|
|
@ -21,7 +20,6 @@ import java.util.List;
|
|||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
public class GenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
|
||||
private String dbName;
|
||||
|
||||
private static final int REF = 0;
|
||||
private static final int VAR_HET = 1;
|
||||
|
|
@ -31,138 +29,109 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
|||
private static final String[] TRUTH_NAMES = {"IS_REF", "IS_VAR_HET", "IS_VAR_HOM", "UNKNOWN"};
|
||||
private static final String[] CALL_NAMES = {"CALLED_REF", "CALLED_VAR_HET", "CALLED_VAR_HOM", "NO_CALL"};
|
||||
|
||||
private int[][] table = new int[4][4];
|
||||
private int[] truth_totals = new int[4];
|
||||
private int[] calls_totals = new int[4];
|
||||
private ArrayList<String> rodNames = new ArrayList<String>();
|
||||
private HashMap<String, TruthTable> tables = new HashMap<String, TruthTable>();
|
||||
|
||||
public GenotypeConcordance(final String name) {
|
||||
super("genotype_concordance");
|
||||
dbName = name;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
truth_totals[i] = 0;
|
||||
calls_totals[i] = 0;
|
||||
for (int j = 0; j < 4; j++)
|
||||
table[i][j] = 0;
|
||||
}
|
||||
rodNames.add(name);
|
||||
initialize();
|
||||
}
|
||||
|
||||
public void inc(Variation chip, Variation eval, char ref) {
|
||||
if ((chip != null && !(chip instanceof VariantBackedByGenotype) || (eval != null && !(eval instanceof VariantBackedByGenotype))))
|
||||
public GenotypeConcordance(final List<String> names) {
|
||||
super("genotype_concordance");
|
||||
rodNames.addAll(names);
|
||||
initialize();
|
||||
}
|
||||
|
||||
private void initialize() {
|
||||
for ( String name : rodNames )
|
||||
tables.put(name, new TruthTable());
|
||||
}
|
||||
|
||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
// get all of the chip rods at this locus
|
||||
HashMap<String, Genotype> chips = new HashMap<String, Genotype>();
|
||||
for ( String name : rodNames ) {
|
||||
RODRecordList<ReferenceOrderedDatum> rods = tracker.getTrackData(name, null);
|
||||
Variation chip = (rods == null ? null : (Variation)rods.getRecords().get(0));
|
||||
if ( chip != null ) {
|
||||
if ( !(chip instanceof VariantBackedByGenotype) )
|
||||
throw new StingException("Failure: trying to analyze genotypes using non-genotype truth data");
|
||||
chips.put(name, ((VariantBackedByGenotype)chip).getCalledGenotype());
|
||||
}
|
||||
}
|
||||
|
||||
if ( eval != null && !(eval instanceof VariantBackedByGenotype) )
|
||||
throw new StingException("Failure: trying to analyze genotypes of non-genotype data");
|
||||
|
||||
// don't procede if we have no truth data and no call
|
||||
if ( eval != null || chips.size() > 0 )
|
||||
inc(chips, (eval != null ? ((VariantBackedByGenotype)eval).getGenotypes() : new ArrayList<Genotype>()), ref);
|
||||
return null;
|
||||
}
|
||||
|
||||
public void inc(Map<String, Genotype> chips, List<Genotype> evals, char ref) {
|
||||
|
||||
// This shouldn't happen, but let's check anyways
|
||||
if (BaseUtils.simpleBaseToBaseIndex(ref) == -1)
|
||||
return;
|
||||
|
||||
// get the genotyping data
|
||||
Genotype chipGenotype = null;
|
||||
Genotype evalGenotype = null;
|
||||
if (chip != null) chipGenotype = ((VariantBackedByGenotype)chip).getCalledGenotype();
|
||||
if (eval != null) evalGenotype = ((VariantBackedByGenotype)eval).getCalledGenotype();
|
||||
HashMap<String, Genotype> evalHash = makeGenotypeHash(evals);
|
||||
|
||||
int truthIndex, callIndex;
|
||||
if (chip == null)
|
||||
truthIndex = UNKNOWN;
|
||||
else if (!chipGenotype.isVariant(ref))
|
||||
truthIndex = REF;
|
||||
else if (chipGenotype.isHet())
|
||||
truthIndex = VAR_HET;
|
||||
else
|
||||
truthIndex = VAR_HOM;
|
||||
for ( String name : rodNames ) {
|
||||
Genotype chip = chips.get(name);
|
||||
Genotype eval = evalHash.get(name);
|
||||
|
||||
if (eval == null)
|
||||
callIndex = NO_CALL;
|
||||
else if (!evalGenotype.isVariant(ref))
|
||||
callIndex = REF;
|
||||
else if (evalGenotype.isHet())
|
||||
callIndex = VAR_HET;
|
||||
else
|
||||
callIndex = VAR_HOM;
|
||||
if (chip == null && eval == null)
|
||||
continue;
|
||||
|
||||
int truthType = getGenotypeType(chip, ref);
|
||||
int callType = getGenotypeType(eval, ref);
|
||||
TruthTable table = tables.get(name);
|
||||
|
||||
if (chip != null || eval != null) {
|
||||
//System.out.printf("TEST: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval);
|
||||
table[truthIndex][callIndex]++;
|
||||
truth_totals[truthIndex]++;
|
||||
if (callIndex != NO_CALL) calls_totals[callIndex]++;
|
||||
table.addEntry(truthType, callType);
|
||||
}
|
||||
}
|
||||
|
||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
Variation chip = (Variation) tracker.lookup(dbName, null);
|
||||
if (eval != null || chip != null)
|
||||
inc(chip, eval, ref);
|
||||
return null;
|
||||
}
|
||||
|
||||
private void addCalledGenotypeConcordance(List<String> s) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("CALLED_GENOTYPE_CONCORDANCE\t");
|
||||
for (int i = 0; i < 4; i++) {
|
||||
int nConcordantCallsI = table[i][i];
|
||||
String value = "N/A";
|
||||
if (i != UNKNOWN)
|
||||
value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i] - table[UNKNOWN][i]));
|
||||
sb.append(value);
|
||||
private HashMap<String, Genotype> makeGenotypeHash(List<Genotype> evals) {
|
||||
HashMap<String, Genotype> hash = new HashMap<String, Genotype>();
|
||||
for ( Genotype eval : evals ) {
|
||||
if ( eval instanceof SampleBacked )
|
||||
hash.put(((SampleBacked)eval).getSampleName(), eval);
|
||||
else if ( rodNames.size() == 1 )
|
||||
hash.put(rodNames.get(0), eval);
|
||||
else
|
||||
throw new StingException("Genotype data has no associated samples but are multi-sample...?");
|
||||
}
|
||||
s.add(sb.toString());
|
||||
return hash;
|
||||
}
|
||||
|
||||
/** How many overall calls where made that aren't NO_CALLS or UNKNOWNS? */
|
||||
private int getNCalled() {
|
||||
int n = 0;
|
||||
for (int i = 0; i < 4; i++)
|
||||
for (int j = 0; j < 4; j++)
|
||||
if (i != NO_CALL && j != NO_CALL) n += table[i][j];
|
||||
return n;
|
||||
}
|
||||
private static int getGenotypeType(Genotype g, char ref) {
|
||||
int type;
|
||||
|
||||
private void addOverallStats(List<String> s) {
|
||||
int nConcordantRefCalls = table[REF][REF];
|
||||
int nConcordantHetCalls = table[VAR_HET][VAR_HET];
|
||||
int nConcordantVarHomCalls = table[VAR_HOM][VAR_HOM];
|
||||
int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM];
|
||||
int nConcordantVarCalls = nConcordantHetCalls + nConcordantVarHomCalls;
|
||||
int nConcordantCalls = nConcordantRefCalls + nConcordantVarCalls;
|
||||
int nTrueVar = truth_totals[VAR_HET] + truth_totals[VAR_HOM];
|
||||
int nCalled = getNCalled();
|
||||
s.add(String.format("VARIANT_SENSITIVITY %s", cellPercent(nVarCalls, nTrueVar)));
|
||||
s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls)));
|
||||
s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled)));
|
||||
if ( g == null )
|
||||
type = NO_CALL;
|
||||
else if ( !g.isVariant(ref) )
|
||||
type = REF;
|
||||
else if ( g.isHet() )
|
||||
type = VAR_HET;
|
||||
else
|
||||
type = VAR_HOM;
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
public List<String> done() {
|
||||
List<String> s = new ArrayList<String>();
|
||||
s.add(String.format("name %s", dbName));
|
||||
s.add(String.format("TRUTH_STATE\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY"));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append(String.format("%15s ", TRUTH_NAMES[i]));
|
||||
for (int j = 0; j < 4; j++)
|
||||
sb.append(String.format("%9d ", table[i][j]));
|
||||
sb.append(String.format("%9d ", truth_totals[i]));
|
||||
if (i == VAR_HET || i == VAR_HOM) {
|
||||
sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
|
||||
sb.append(String.format("%s", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
|
||||
} else {
|
||||
sb.append("\tN/A\t\t\tN/A");
|
||||
}
|
||||
s.add(sb.toString());
|
||||
|
||||
for ( String name : rodNames ) {
|
||||
TruthTable table = tables.get(name);
|
||||
table.addAllStats(s, name);
|
||||
s.add("");
|
||||
}
|
||||
|
||||
addCalledGenotypeConcordance(s);
|
||||
addOverallStats(s);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = 0; j < 4; j++) {
|
||||
s.add(String.format("%s_%s_%s %d", TRUTH_NAMES[i], CALL_NAMES[j], "NO_SITES", table[i][j]));
|
||||
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_TRUTH", cellPercent(table[i][j], truth_totals[i])));
|
||||
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_CALLS", cellPercent(table[i][j], calls_totals[j])));
|
||||
}
|
||||
if (i == VAR_HET || i == VAR_HOM) {
|
||||
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
|
||||
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
|
||||
}
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
|
|
@ -173,4 +142,95 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
|||
sb.append("%");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private class TruthTable {
|
||||
int[][] table = new int[4][4];
|
||||
int[] truth_totals = new int[4];
|
||||
int[] calls_totals = new int[4];
|
||||
|
||||
public TruthTable() {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
truth_totals[i] = 0;
|
||||
calls_totals[i] = 0;
|
||||
for (int j = 0; j < 4; j++)
|
||||
table[i][j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public void addEntry(int truthIndex, int callIndex) {
|
||||
table[truthIndex][callIndex]++;
|
||||
truth_totals[truthIndex]++;
|
||||
calls_totals[callIndex]++;
|
||||
}
|
||||
|
||||
public void addAllStats(List<String> s, String name) {
|
||||
s.add(String.format("name %s", name));
|
||||
s.add(String.format("TRUTH_STATE\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY"));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append(String.format("%15s ", TRUTH_NAMES[i]));
|
||||
for (int j = 0; j < 4; j++)
|
||||
sb.append(String.format("%9d ", table[i][j]));
|
||||
sb.append(String.format("%9d ", truth_totals[i]));
|
||||
if (i == VAR_HET || i == VAR_HOM) {
|
||||
sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
|
||||
sb.append(String.format("%s", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
|
||||
} else {
|
||||
sb.append("\tN/A\t\t\tN/A");
|
||||
}
|
||||
s.add(sb.toString());
|
||||
}
|
||||
|
||||
addCalledGenotypeConcordance(s);
|
||||
addOverallStats(s);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = 0; j < 4; j++) {
|
||||
s.add(String.format("%s_%s_%s %d", TRUTH_NAMES[i], CALL_NAMES[j], "NO_SITES", table[i][j]));
|
||||
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_TRUTH", cellPercent(table[i][j], truth_totals[i])));
|
||||
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_CALLS", cellPercent(table[i][j], calls_totals[j])));
|
||||
}
|
||||
if (i == VAR_HET || i == VAR_HOM) {
|
||||
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
|
||||
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void addCalledGenotypeConcordance(List<String> s) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("CALLED_GENOTYPE_CONCORDANCE\t");
|
||||
for (int i = 0; i < 4; i++) {
|
||||
int nConcordantCallsI = table[i][i];
|
||||
String value = "N/A";
|
||||
if (i != UNKNOWN)
|
||||
value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i] - table[UNKNOWN][i]));
|
||||
sb.append(value);
|
||||
}
|
||||
s.add(sb.toString());
|
||||
}
|
||||
|
||||
// How many overall calls where made that aren't NO_CALLS or UNKNOWNS?
|
||||
private int getNCalled() {
|
||||
int n = 0;
|
||||
for (int i = 0; i < 4; i++)
|
||||
for (int j = 0; j < 4; j++)
|
||||
if (i != NO_CALL && j != NO_CALL) n += table[i][j];
|
||||
return n;
|
||||
}
|
||||
|
||||
private void addOverallStats(List<String> s) {
|
||||
int nConcordantRefCalls = table[REF][REF];
|
||||
int nConcordantHetCalls = table[VAR_HET][VAR_HET];
|
||||
int nConcordantVarHomCalls = table[VAR_HOM][VAR_HOM];
|
||||
int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM];
|
||||
int nConcordantVarCalls = nConcordantHetCalls + nConcordantVarHomCalls;
|
||||
int nConcordantCalls = nConcordantRefCalls + nConcordantVarCalls;
|
||||
int nTrueVar = truth_totals[VAR_HET] + truth_totals[VAR_HOM];
|
||||
int nCalled = getNCalled();
|
||||
s.add(String.format("VARIANT_SENSITIVITY %s", cellPercent(nVarCalls, nTrueVar)));
|
||||
s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls)));
|
||||
s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ import java.util.*;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
public class PooledGenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
|
||||
public class PooledGenotypeConcordance extends BasicVariantAnalysis implements PoolAnalysis, GenotypeAnalysis {
|
||||
|
||||
private PooledConcordanceTable table;
|
||||
private String[] hapmapNames;
|
||||
|
|
|
|||
|
|
@ -56,8 +56,8 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName = "numPeopleInPool", shortName="PS", doc="If using a variant file from a pooled caller, this field provides the number of individuals in each pool", required=false)
|
||||
public int numPeopleInPool = -1;
|
||||
|
||||
@Argument(fullName = "pathToHapmapPoolFile", shortName="HPF", doc="If using a variant file from a pooled caller on pools of hapmap individuals, this field provides a filepath to the pool construction file listing which hapmap individuals are in which pool", required=false)
|
||||
public String pathToHapmapPoolFile = null;
|
||||
@Argument(fullName = "samplesFile", shortName="samples", doc="When running an analysis on a number of individuals with truth data, this field provides a filepath to the listing of which samples are used (and are used to name corresponding rods with -B)", required=false)
|
||||
public String samplesFile = null;
|
||||
|
||||
String analysisFilenameBase = null;
|
||||
|
||||
|
|
@ -129,7 +129,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
// Add new analyses here!
|
||||
//
|
||||
analyses.add(new ValidationDataAnalysis());
|
||||
analyses.add(new PooledGenotypeConcordance(pathToHapmapPoolFile));
|
||||
analyses.add(new PooledGenotypeConcordance(samplesFile));
|
||||
analyses.add(new VariantCounter());
|
||||
analyses.add(new VariantDBCoverage(knownSNPDBName));
|
||||
analyses.add(new PooledFrequencyAnalysis(numPeopleInPool,knownSNPDBName));
|
||||
|
|
@ -149,7 +149,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
VariantAnalysis analysis = iter.next();
|
||||
boolean disableForGenotyping = evalContainsGenotypes && !(analysis instanceof GenotypeAnalysis);
|
||||
boolean disableForPopulation = !evalContainsGenotypes && !(analysis instanceof PopulationAnalysis);
|
||||
boolean disableForPools = (pathToHapmapPoolFile == null && analysis instanceof PooledGenotypeConcordance) || (numPeopleInPool < 1 && analysis instanceof PoolAnalysis);
|
||||
boolean disableForPools = numPeopleInPool < 1 && analysis instanceof PoolAnalysis;
|
||||
boolean disable = disableForGenotyping | disableForPopulation | disableForPools;
|
||||
String causeName = disableForGenotyping ? "population" : (disableForPopulation ? "genotype" : (disableForPools ? "pool" : null));
|
||||
if (disable) {
|
||||
|
|
|
|||
|
|
@ -1,8 +1,7 @@
|
|||
package org.broadinstitute.sting.utils.genotype.geli;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.util.List;
|
||||
|
|
@ -44,6 +43,33 @@ public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked
|
|||
mReads = new ArrayList<SAMRecord>();
|
||||
}
|
||||
|
||||
public GeliGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError) {
|
||||
mRefBase = ref;
|
||||
mLocation = loc;
|
||||
mBestGenotype = DiploidGenotype.valueOf(genotype);
|
||||
mRefGenotype = DiploidGenotype.createHomGenotype(ref);
|
||||
mNextGenotype = mRefGenotype;
|
||||
|
||||
// set general posteriors to min double value
|
||||
mPosteriors = new double[10];
|
||||
Arrays.fill(mPosteriors, Double.MIN_VALUE);
|
||||
|
||||
// set the ref to PError
|
||||
mPosteriors[mRefGenotype.ordinal()] = -1.0 * negLog10PError;
|
||||
|
||||
// set the best genotype to zero (need to do this after ref in case ref=best)
|
||||
mPosteriors[mBestGenotype.ordinal()] = 0.0;
|
||||
|
||||
// choose a smart next best genotype and set it to PError
|
||||
if ( mBestGenotype == mRefGenotype )
|
||||
mNextGenotype = DiploidGenotype.valueOf(BaseUtils.simpleComplement(genotype));
|
||||
else
|
||||
mNextGenotype = mRefGenotype;
|
||||
mPosteriors[mNextGenotype.ordinal()] = -1.0 * negLog10PError;
|
||||
|
||||
mReads = new ArrayList<SAMRecord>();
|
||||
}
|
||||
|
||||
public void setReads(List<SAMRecord> reads) {
|
||||
mReads = new ArrayList<SAMRecord>(reads);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,7 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
@ -48,6 +47,33 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked,
|
|||
mReads = new ArrayList<SAMRecord>();
|
||||
}
|
||||
|
||||
public VCFGenotypeCall(char ref, GenomeLoc loc, String genotype, double negLog10PError, String sample) {
|
||||
mRefBase = ref;
|
||||
mLocation = loc;
|
||||
mBestGenotype = DiploidGenotype.valueOf(genotype);
|
||||
mRefGenotype = DiploidGenotype.createHomGenotype(ref);
|
||||
mSampleName = sample;
|
||||
|
||||
// set general posteriors to min double value
|
||||
mPosteriors = new double[10];
|
||||
Arrays.fill(mPosteriors, Double.MIN_VALUE);
|
||||
|
||||
// set the ref to PError
|
||||
mPosteriors[mRefGenotype.ordinal()] = -1.0 * negLog10PError;
|
||||
|
||||
// set the best genotype to zero (need to do this after ref in case ref=best)
|
||||
mPosteriors[mBestGenotype.ordinal()] = 0.0;
|
||||
|
||||
// choose a smart next best genotype and set it to PError
|
||||
if ( mBestGenotype == mRefGenotype )
|
||||
mNextGenotype = DiploidGenotype.valueOf(BaseUtils.simpleComplement(genotype));
|
||||
else
|
||||
mNextGenotype = mRefGenotype;
|
||||
mPosteriors[mNextGenotype.ordinal()] = -1.0 * negLog10PError;
|
||||
|
||||
mReads = new ArrayList<SAMRecord>();
|
||||
}
|
||||
|
||||
public void setReads(List<SAMRecord> reads) {
|
||||
mReads = new ArrayList<SAMRecord>(reads);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testEvalGenotypeROD() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("46c381dad05310267cdfd409228d3692");
|
||||
md5.add("87ec21995ab4227404da1dc144c1f4c7");
|
||||
/**
|
||||
* the above MD5 was calculated after running the following command:
|
||||
*
|
||||
|
|
@ -150,7 +150,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testEvalMarksGenotypingExample() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("cb914e7b65e6561685cccf0cd2cc5dfb");
|
||||
md5.add("e9aebff0e3ccba7ba6f64433ba7d3533");
|
||||
/**
|
||||
* Run with the following commands:
|
||||
*
|
||||
|
|
|
|||
Loading…
Reference in New Issue