diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java index 0baea5907..f08409da0 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodGFF.java @@ -158,7 +158,7 @@ public class rodGFF extends BasicReferenceOrderedDatum implements AllelicVariant public String getAltBasesFWD() { return null; } public char getAltSnpFWD() throws IllegalStateException { return 0; } public boolean isReference() { return ! isSNP(); } - public boolean isSNP() { return score == Double.NaN || score > 5.0; } + public boolean isSNP() { return false; } public boolean isInsertion() { return false; } public boolean isDeletion() { return false; } public boolean isIndel() { return false; } @@ -168,6 +168,7 @@ public class rodGFF extends BasicReferenceOrderedDatum implements AllelicVariant public double getVariationConfidence() { return score; } public double getConsensusConfidence() { return score; } public List getGenotype() throws IllegalStateException { + //System.out.printf("feature = %s%n", feature); return Arrays.asList(feature); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java index c4b63e008..76a8b3296 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java @@ -1,12 +1,3 @@ -package org.broadinstitute.sting.gatk.refdata; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; - -import java.io.IOException; -import java.util.List; -import java.util.Arrays; - /* * Copyright (c) 2009 The Broad Institute * @@ -32,6 +23,15 @@ import java.util.Arrays; * OTHER DEALINGS IN THE SOFTWARE. */ +package org.broadinstitute.sting.gatk.refdata; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; + +import java.io.IOException; +import java.util.List; +import java.util.Arrays; + public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVariant { public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT } public GenomeLoc loc; @@ -43,7 +43,9 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa public double lodBtnb; public double[] genotypeLikelihoods = new double[10]; - public rodVariants(final String name) { super(name); } + public rodVariants(final String name) { + super(name); + } public String delimiterRegex() { return "\\s+"; } @@ -90,8 +92,81 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa ); } + public int compareTo(ReferenceOrderedDatum referenceOrderedDatum) { + return 0; //To change body of implemented methods use File | Settings | File Templates. + } + public GenomeLoc getLocation() { return loc; } + public String getRefBasesFWD() { + return String.format("%c", getRefSnpFWD()); + } + + public char getRefSnpFWD() throws IllegalStateException { + return refBase; + } + + public String getAltBasesFWD() { + return String.format("%c", getAltSnpFWD()); + } + + public char getAltSnpFWD() throws IllegalStateException { + return (bestGenotype.charAt(0) == refBase) ? bestGenotype.charAt(1) : bestGenotype.charAt(0); + } + + public boolean isReference() { + return refBase == bestGenotype.charAt(0); + } + + public boolean isSNP() { + return !isReference(); + } + + public boolean isInsertion() { + return false; + } + + public boolean isDeletion() { + return false; + } + + public boolean isIndel() { + return false; + } + + public double getMAF() { + return 0; + } + + public double getHeterozygosity() { + return 0; + } + + public boolean isGenotype() { + return true; + } + + public double getVariationConfidence() { + return lodBtr; + } + + public double getConsensusConfidence() { + return lodBtnb; + } + + public List getGenotype() throws IllegalStateException { + return Arrays.asList(getBestGenotype()); + //throw new IllegalStateException("huh?"); + } + + public int getPloidy() throws IllegalStateException { + return 2; + } + + public boolean isBiallelic() { + return true; + } + public char getReferenceBase() { return refBase; } public int getPileupDepth() { return depth; } @@ -142,6 +217,7 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa this.lodBtnb = (bestLikelihood - nextBestLikelihood); } + /* public String getRefBasesFWD() { char[] b = { getReferenceBase() }; return new String( b ); @@ -173,4 +249,5 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa public int getPloidy() throws IllegalStateException { return 2; } public boolean isBiallelic() { return true; } + */ } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java index 3c5e35094..b2f2b1352 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.Utils; import java.util.List; import java.util.ArrayList; @@ -36,50 +37,58 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp public GenotypeConcordance(final String name) { super("genotype_concordance"); dbName = name; + for ( int i = 0; i < 4; i++ ) { + for ( int j = 0; j < 5; j++ ) { + table[i][j] = 0; + } + } } - public void inc(AllelicVariant chip, AllelicVariant eval) { + public void inc(AllelicVariant chip, AllelicVariant eval, char ref) { if ( (chip != null && !chip.isGenotype()) || (eval != null && !eval.isGenotype()) ) throw new StingException("Failure: trying to analyze genotypes of non-genotype data"); int truthIndex, callIndex; if ( chip == null ) truthIndex = TRUTH_UNKNOWN; - else if ( chip.isReference() ) + else if ( chip.isReference() && Utils.countOccurrences(ref, chip.getGenotype().get(0)) == chip.getGenotype().get(0).length() ) truthIndex = TRUTH_REF; else if ( isHet(chip) ) truthIndex = TRUTH_VAR_HET; else truthIndex = TRUTH_VAR_HOM; + // todo -- FIXME on countOccurences if ( eval == null ) callIndex = UNCALLABLE; - else if ( eval.getVariationConfidence() < 5.0 ) - callIndex = CALL_NO_CONF; - else if ( eval.isReference() ) + else if ( eval.isReference() && Utils.countOccurrences(ref, eval.getGenotype().get(0)) == eval.getGenotype().get(0).length() ) callIndex = CALL_REF; else if ( isHet(eval) ) callIndex = CALL_VAR_HET; else callIndex = CALL_VAR_HOM; - table[truthIndex][callIndex]++; + if ( chip != null || eval != null ) { + System.out.printf("TESTING ME: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval); + + table[truthIndex][callIndex]++; + } } public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, LocusContext context) { AllelicVariant chip = (AllelicVariant)tracker.lookup(dbName, null); - inc(chip, eval); - return chip == null && eval != null ? "Novel " + eval : null; + inc(chip, eval, ref); + return null; } public List done() { List s = new ArrayList(); s.add(String.format("name %s", dbName)); - s.add(String.format("\t\tCALLED REF\tCALLED VAR_HET\tCALLED_VAR_HOM\tNO CONF\tUNCALLABLE")); - s.add(String.format("IS REF\t%d\t%d\t%d\t%d\t%d", table[TRUTH_REF][CALL_REF], table[TRUTH_REF][CALL_VAR_HET], table[TRUTH_REF][CALL_VAR_HOM], table[TRUTH_REF][CALL_NO_CONF], table[TRUTH_REF][UNCALLABLE])); - s.add(String.format("IS VAR_HET\t%d\t%d\t%d\t%d\t%d", table[TRUTH_VAR_HET][CALL_REF], table[TRUTH_VAR_HET][CALL_VAR_HET], table[TRUTH_VAR_HET][CALL_VAR_HOM], table[TRUTH_VAR_HET][CALL_NO_CONF], table[TRUTH_VAR_HET][UNCALLABLE])); - s.add(String.format("IS VAR_HOM\t%d\t%d\t%d\t%d\t%d", table[TRUTH_VAR_HOM][CALL_REF], table[TRUTH_VAR_HOM][CALL_VAR_HET], table[TRUTH_VAR_HOM][CALL_VAR_HOM], table[TRUTH_VAR_HOM][CALL_NO_CONF], table[TRUTH_VAR_HOM][UNCALLABLE])); - s.add(String.format("UNKNOWN\t%d\t%d\t%d\t%d\t%d", table[TRUTH_UNKNOWN][CALL_REF], table[TRUTH_UNKNOWN][CALL_VAR_HET], table[TRUTH_UNKNOWN][CALL_VAR_HOM], table[TRUTH_UNKNOWN][CALL_NO_CONF], table[TRUTH_UNKNOWN][UNCALLABLE])); + s.add(String.format("\t\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CONF\tUNCALLABLE")); + s.add(String.format("IS_REF\t\t%d\t\t%d\t\t%d\t\t%d\t\t%d", table[TRUTH_REF][CALL_REF], table[TRUTH_REF][CALL_VAR_HET], table[TRUTH_REF][CALL_VAR_HOM], table[TRUTH_REF][CALL_NO_CONF], table[TRUTH_REF][UNCALLABLE])); + s.add(String.format("IS_VAR_HET\t\t%d\t\t%d\t\t%d\t\t%d\t\t%d", table[TRUTH_VAR_HET][CALL_REF], table[TRUTH_VAR_HET][CALL_VAR_HET], table[TRUTH_VAR_HET][CALL_VAR_HOM], table[TRUTH_VAR_HET][CALL_NO_CONF], table[TRUTH_VAR_HET][UNCALLABLE])); + s.add(String.format("IS_VAR_HOM\t\t%d\t\t%d\t\t%d\t\t%d\t\t%d", table[TRUTH_VAR_HOM][CALL_REF], table[TRUTH_VAR_HOM][CALL_VAR_HET], table[TRUTH_VAR_HOM][CALL_VAR_HOM], table[TRUTH_VAR_HOM][CALL_NO_CONF], table[TRUTH_VAR_HOM][UNCALLABLE])); + s.add(String.format("UNKNOWN\t\t%d\t\t%d\t\t%d\t\t%d\t\t%d", table[TRUTH_UNKNOWN][CALL_REF], table[TRUTH_UNKNOWN][CALL_VAR_HET], table[TRUTH_UNKNOWN][CALL_VAR_HOM], table[TRUTH_UNKNOWN][CALL_NO_CONF], table[TRUTH_UNKNOWN][UNCALLABLE])); return s; } @@ -88,9 +97,12 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp return ((Genotype)var).isHet(); List genotype = var.getGenotype(); - if ( genotype.size() < 2 ) + if ( genotype.size() < 1 ) return false; - return (genotype.get(0) != genotype.get(1)); + + boolean het = genotype.get(0).charAt(0) != genotype.get(0).charAt(1); + System.out.printf("********* Genotype %s is het = %b%n", genotype, het); + return het; } } \ No newline at end of file