Bug fixes to support hapmap genotyping concordance.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1285 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2009-07-21 16:20:10 +00:00
parent 7e04313b4e
commit 436a196e2b
3 changed files with 116 additions and 26 deletions

View File

@ -158,7 +158,7 @@ public class rodGFF extends BasicReferenceOrderedDatum implements AllelicVariant
public String getAltBasesFWD() { return null; }
public char getAltSnpFWD() throws IllegalStateException { return 0; }
public boolean isReference() { return ! isSNP(); }
public boolean isSNP() { return score == Double.NaN || score > 5.0; }
public boolean isSNP() { return false; }
public boolean isInsertion() { return false; }
public boolean isDeletion() { return false; }
public boolean isIndel() { return false; }
@ -168,6 +168,7 @@ public class rodGFF extends BasicReferenceOrderedDatum implements AllelicVariant
public double getVariationConfidence() { return score; }
public double getConsensusConfidence() { return score; }
public List<String> getGenotype() throws IllegalStateException {
//System.out.printf("feature = %s%n", feature);
return Arrays.asList(feature);
}

View File

@ -1,12 +1,3 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.io.IOException;
import java.util.List;
import java.util.Arrays;
/*
* Copyright (c) 2009 The Broad Institute
*
@ -32,6 +23,15 @@ import java.util.Arrays;
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.io.IOException;
import java.util.List;
import java.util.Arrays;
public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVariant {
public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
public GenomeLoc loc;
@ -43,7 +43,9 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
public double lodBtnb;
public double[] genotypeLikelihoods = new double[10];
public rodVariants(final String name) { super(name); }
public rodVariants(final String name) {
super(name);
}
public String delimiterRegex() { return "\\s+"; }
@ -90,8 +92,81 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
);
}
public int compareTo(ReferenceOrderedDatum referenceOrderedDatum) {
return 0; //To change body of implemented methods use File | Settings | File Templates.
}
public GenomeLoc getLocation() { return loc; }
public String getRefBasesFWD() {
return String.format("%c", getRefSnpFWD());
}
public char getRefSnpFWD() throws IllegalStateException {
return refBase;
}
public String getAltBasesFWD() {
return String.format("%c", getAltSnpFWD());
}
public char getAltSnpFWD() throws IllegalStateException {
return (bestGenotype.charAt(0) == refBase) ? bestGenotype.charAt(1) : bestGenotype.charAt(0);
}
public boolean isReference() {
return refBase == bestGenotype.charAt(0);
}
public boolean isSNP() {
return !isReference();
}
public boolean isInsertion() {
return false;
}
public boolean isDeletion() {
return false;
}
public boolean isIndel() {
return false;
}
public double getMAF() {
return 0;
}
public double getHeterozygosity() {
return 0;
}
public boolean isGenotype() {
return true;
}
public double getVariationConfidence() {
return lodBtr;
}
public double getConsensusConfidence() {
return lodBtnb;
}
public List<String> getGenotype() throws IllegalStateException {
return Arrays.asList(getBestGenotype());
//throw new IllegalStateException("huh?");
}
public int getPloidy() throws IllegalStateException {
return 2;
}
public boolean isBiallelic() {
return true;
}
public char getReferenceBase() { return refBase; }
public int getPileupDepth() { return depth; }
@ -142,6 +217,7 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
this.lodBtnb = (bestLikelihood - nextBestLikelihood);
}
/*
public String getRefBasesFWD() {
char[] b = { getReferenceBase() };
return new String( b );
@ -173,4 +249,5 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
*/
}

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import java.util.List;
import java.util.ArrayList;
@ -36,50 +37,58 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
public GenotypeConcordance(final String name) {
super("genotype_concordance");
dbName = name;
for ( int i = 0; i < 4; i++ ) {
for ( int j = 0; j < 5; j++ ) {
table[i][j] = 0;
}
}
}
public void inc(AllelicVariant chip, AllelicVariant eval) {
public void inc(AllelicVariant chip, AllelicVariant eval, char ref) {
if ( (chip != null && !chip.isGenotype()) || (eval != null && !eval.isGenotype()) )
throw new StingException("Failure: trying to analyze genotypes of non-genotype data");
int truthIndex, callIndex;
if ( chip == null )
truthIndex = TRUTH_UNKNOWN;
else if ( chip.isReference() )
else if ( chip.isReference() && Utils.countOccurrences(ref, chip.getGenotype().get(0)) == chip.getGenotype().get(0).length() )
truthIndex = TRUTH_REF;
else if ( isHet(chip) )
truthIndex = TRUTH_VAR_HET;
else
truthIndex = TRUTH_VAR_HOM;
// todo -- FIXME on countOccurences
if ( eval == null )
callIndex = UNCALLABLE;
else if ( eval.getVariationConfidence() < 5.0 )
callIndex = CALL_NO_CONF;
else if ( eval.isReference() )
else if ( eval.isReference() && Utils.countOccurrences(ref, eval.getGenotype().get(0)) == eval.getGenotype().get(0).length() )
callIndex = CALL_REF;
else if ( isHet(eval) )
callIndex = CALL_VAR_HET;
else
callIndex = CALL_VAR_HOM;
table[truthIndex][callIndex]++;
if ( chip != null || eval != null ) {
System.out.printf("TESTING ME: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval);
table[truthIndex][callIndex]++;
}
}
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, LocusContext context) {
AllelicVariant chip = (AllelicVariant)tracker.lookup(dbName, null);
inc(chip, eval);
return chip == null && eval != null ? "Novel " + eval : null;
inc(chip, eval, ref);
return null;
}
public List<String> done() {
List<String> s = new ArrayList<String>();
s.add(String.format("name %s", dbName));
s.add(String.format("\t\tCALLED REF\tCALLED VAR_HET\tCALLED_VAR_HOM\tNO CONF\tUNCALLABLE"));
s.add(String.format("IS REF\t%d\t%d\t%d\t%d\t%d", table[TRUTH_REF][CALL_REF], table[TRUTH_REF][CALL_VAR_HET], table[TRUTH_REF][CALL_VAR_HOM], table[TRUTH_REF][CALL_NO_CONF], table[TRUTH_REF][UNCALLABLE]));
s.add(String.format("IS VAR_HET\t%d\t%d\t%d\t%d\t%d", table[TRUTH_VAR_HET][CALL_REF], table[TRUTH_VAR_HET][CALL_VAR_HET], table[TRUTH_VAR_HET][CALL_VAR_HOM], table[TRUTH_VAR_HET][CALL_NO_CONF], table[TRUTH_VAR_HET][UNCALLABLE]));
s.add(String.format("IS VAR_HOM\t%d\t%d\t%d\t%d\t%d", table[TRUTH_VAR_HOM][CALL_REF], table[TRUTH_VAR_HOM][CALL_VAR_HET], table[TRUTH_VAR_HOM][CALL_VAR_HOM], table[TRUTH_VAR_HOM][CALL_NO_CONF], table[TRUTH_VAR_HOM][UNCALLABLE]));
s.add(String.format("UNKNOWN\t%d\t%d\t%d\t%d\t%d", table[TRUTH_UNKNOWN][CALL_REF], table[TRUTH_UNKNOWN][CALL_VAR_HET], table[TRUTH_UNKNOWN][CALL_VAR_HOM], table[TRUTH_UNKNOWN][CALL_NO_CONF], table[TRUTH_UNKNOWN][UNCALLABLE]));
s.add(String.format("\t\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CONF\tUNCALLABLE"));
s.add(String.format("IS_REF\t\t%d\t\t%d\t\t%d\t\t%d\t\t%d", table[TRUTH_REF][CALL_REF], table[TRUTH_REF][CALL_VAR_HET], table[TRUTH_REF][CALL_VAR_HOM], table[TRUTH_REF][CALL_NO_CONF], table[TRUTH_REF][UNCALLABLE]));
s.add(String.format("IS_VAR_HET\t\t%d\t\t%d\t\t%d\t\t%d\t\t%d", table[TRUTH_VAR_HET][CALL_REF], table[TRUTH_VAR_HET][CALL_VAR_HET], table[TRUTH_VAR_HET][CALL_VAR_HOM], table[TRUTH_VAR_HET][CALL_NO_CONF], table[TRUTH_VAR_HET][UNCALLABLE]));
s.add(String.format("IS_VAR_HOM\t\t%d\t\t%d\t\t%d\t\t%d\t\t%d", table[TRUTH_VAR_HOM][CALL_REF], table[TRUTH_VAR_HOM][CALL_VAR_HET], table[TRUTH_VAR_HOM][CALL_VAR_HOM], table[TRUTH_VAR_HOM][CALL_NO_CONF], table[TRUTH_VAR_HOM][UNCALLABLE]));
s.add(String.format("UNKNOWN\t\t%d\t\t%d\t\t%d\t\t%d\t\t%d", table[TRUTH_UNKNOWN][CALL_REF], table[TRUTH_UNKNOWN][CALL_VAR_HET], table[TRUTH_UNKNOWN][CALL_VAR_HOM], table[TRUTH_UNKNOWN][CALL_NO_CONF], table[TRUTH_UNKNOWN][UNCALLABLE]));
return s;
}
@ -88,9 +97,12 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
return ((Genotype)var).isHet();
List<String> genotype = var.getGenotype();
if ( genotype.size() < 2 )
if ( genotype.size() < 1 )
return false;
return (genotype.get(0) != genotype.get(1));
boolean het = genotype.get(0).charAt(0) != genotype.get(0).charAt(1);
System.out.printf("********* Genotype %s is het = %b%n", genotype, het);
return het;
}
}