Bug fixes to support hapmap genotyping concordance.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1285 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2009-07-21 16:20:10 +00:00
parent 7e04313b4e
commit 436a196e2b
3 changed files with 116 additions and 26 deletions

View File

@ -158,7 +158,7 @@ public class rodGFF extends BasicReferenceOrderedDatum implements AllelicVariant
public String getAltBasesFWD() { return null; } public String getAltBasesFWD() { return null; }
public char getAltSnpFWD() throws IllegalStateException { return 0; } public char getAltSnpFWD() throws IllegalStateException { return 0; }
public boolean isReference() { return ! isSNP(); } public boolean isReference() { return ! isSNP(); }
public boolean isSNP() { return score == Double.NaN || score > 5.0; } public boolean isSNP() { return false; }
public boolean isInsertion() { return false; } public boolean isInsertion() { return false; }
public boolean isDeletion() { return false; } public boolean isDeletion() { return false; }
public boolean isIndel() { return false; } public boolean isIndel() { return false; }
@ -168,6 +168,7 @@ public class rodGFF extends BasicReferenceOrderedDatum implements AllelicVariant
public double getVariationConfidence() { return score; } public double getVariationConfidence() { return score; }
public double getConsensusConfidence() { return score; } public double getConsensusConfidence() { return score; }
public List<String> getGenotype() throws IllegalStateException { public List<String> getGenotype() throws IllegalStateException {
//System.out.printf("feature = %s%n", feature);
return Arrays.asList(feature); return Arrays.asList(feature);
} }

View File

@ -1,12 +1,3 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.io.IOException;
import java.util.List;
import java.util.Arrays;
/* /*
* Copyright (c) 2009 The Broad Institute * Copyright (c) 2009 The Broad Institute
* *
@ -32,6 +23,15 @@ import java.util.Arrays;
* OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.io.IOException;
import java.util.List;
import java.util.Arrays;
public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVariant { public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVariant {
public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT } public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
public GenomeLoc loc; public GenomeLoc loc;
@ -43,7 +43,9 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
public double lodBtnb; public double lodBtnb;
public double[] genotypeLikelihoods = new double[10]; public double[] genotypeLikelihoods = new double[10];
public rodVariants(final String name) { super(name); } public rodVariants(final String name) {
super(name);
}
public String delimiterRegex() { return "\\s+"; } public String delimiterRegex() { return "\\s+"; }
@ -90,8 +92,81 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
); );
} }
public int compareTo(ReferenceOrderedDatum referenceOrderedDatum) {
return 0; //To change body of implemented methods use File | Settings | File Templates.
}
public GenomeLoc getLocation() { return loc; } public GenomeLoc getLocation() { return loc; }
public String getRefBasesFWD() {
return String.format("%c", getRefSnpFWD());
}
public char getRefSnpFWD() throws IllegalStateException {
return refBase;
}
public String getAltBasesFWD() {
return String.format("%c", getAltSnpFWD());
}
public char getAltSnpFWD() throws IllegalStateException {
return (bestGenotype.charAt(0) == refBase) ? bestGenotype.charAt(1) : bestGenotype.charAt(0);
}
public boolean isReference() {
return refBase == bestGenotype.charAt(0);
}
public boolean isSNP() {
return !isReference();
}
public boolean isInsertion() {
return false;
}
public boolean isDeletion() {
return false;
}
public boolean isIndel() {
return false;
}
public double getMAF() {
return 0;
}
public double getHeterozygosity() {
return 0;
}
public boolean isGenotype() {
return true;
}
public double getVariationConfidence() {
return lodBtr;
}
public double getConsensusConfidence() {
return lodBtnb;
}
public List<String> getGenotype() throws IllegalStateException {
return Arrays.asList(getBestGenotype());
//throw new IllegalStateException("huh?");
}
public int getPloidy() throws IllegalStateException {
return 2;
}
public boolean isBiallelic() {
return true;
}
public char getReferenceBase() { return refBase; } public char getReferenceBase() { return refBase; }
public int getPileupDepth() { return depth; } public int getPileupDepth() { return depth; }
@ -142,6 +217,7 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
this.lodBtnb = (bestLikelihood - nextBestLikelihood); this.lodBtnb = (bestLikelihood - nextBestLikelihood);
} }
/*
public String getRefBasesFWD() { public String getRefBasesFWD() {
char[] b = { getReferenceBase() }; char[] b = { getReferenceBase() };
return new String( b ); return new String( b );
@ -173,4 +249,5 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
public int getPloidy() throws IllegalStateException { return 2; } public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; } public boolean isBiallelic() { return true; }
*/
} }

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import java.util.List; import java.util.List;
import java.util.ArrayList; import java.util.ArrayList;
@ -36,50 +37,58 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
public GenotypeConcordance(final String name) { public GenotypeConcordance(final String name) {
super("genotype_concordance"); super("genotype_concordance");
dbName = name; dbName = name;
for ( int i = 0; i < 4; i++ ) {
for ( int j = 0; j < 5; j++ ) {
table[i][j] = 0;
}
}
} }
public void inc(AllelicVariant chip, AllelicVariant eval) { public void inc(AllelicVariant chip, AllelicVariant eval, char ref) {
if ( (chip != null && !chip.isGenotype()) || (eval != null && !eval.isGenotype()) ) if ( (chip != null && !chip.isGenotype()) || (eval != null && !eval.isGenotype()) )
throw new StingException("Failure: trying to analyze genotypes of non-genotype data"); throw new StingException("Failure: trying to analyze genotypes of non-genotype data");
int truthIndex, callIndex; int truthIndex, callIndex;
if ( chip == null ) if ( chip == null )
truthIndex = TRUTH_UNKNOWN; truthIndex = TRUTH_UNKNOWN;
else if ( chip.isReference() ) else if ( chip.isReference() && Utils.countOccurrences(ref, chip.getGenotype().get(0)) == chip.getGenotype().get(0).length() )
truthIndex = TRUTH_REF; truthIndex = TRUTH_REF;
else if ( isHet(chip) ) else if ( isHet(chip) )
truthIndex = TRUTH_VAR_HET; truthIndex = TRUTH_VAR_HET;
else else
truthIndex = TRUTH_VAR_HOM; truthIndex = TRUTH_VAR_HOM;
// todo -- FIXME on countOccurences
if ( eval == null ) if ( eval == null )
callIndex = UNCALLABLE; callIndex = UNCALLABLE;
else if ( eval.getVariationConfidence() < 5.0 ) else if ( eval.isReference() && Utils.countOccurrences(ref, eval.getGenotype().get(0)) == eval.getGenotype().get(0).length() )
callIndex = CALL_NO_CONF;
else if ( eval.isReference() )
callIndex = CALL_REF; callIndex = CALL_REF;
else if ( isHet(eval) ) else if ( isHet(eval) )
callIndex = CALL_VAR_HET; callIndex = CALL_VAR_HET;
else else
callIndex = CALL_VAR_HOM; callIndex = CALL_VAR_HOM;
if ( chip != null || eval != null ) {
System.out.printf("TESTING ME: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval);
table[truthIndex][callIndex]++; table[truthIndex][callIndex]++;
} }
}
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, LocusContext context) { public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, LocusContext context) {
AllelicVariant chip = (AllelicVariant)tracker.lookup(dbName, null); AllelicVariant chip = (AllelicVariant)tracker.lookup(dbName, null);
inc(chip, eval); inc(chip, eval, ref);
return chip == null && eval != null ? "Novel " + eval : null; return null;
} }
public List<String> done() { public List<String> done() {
List<String> s = new ArrayList<String>(); List<String> s = new ArrayList<String>();
s.add(String.format("name %s", dbName)); s.add(String.format("name %s", dbName));
s.add(String.format("\t\tCALLED REF\tCALLED VAR_HET\tCALLED_VAR_HOM\tNO CONF\tUNCALLABLE")); s.add(String.format("\t\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CONF\tUNCALLABLE"));
s.add(String.format("IS REF\t%d\t%d\t%d\t%d\t%d", table[TRUTH_REF][CALL_REF], table[TRUTH_REF][CALL_VAR_HET], table[TRUTH_REF][CALL_VAR_HOM], table[TRUTH_REF][CALL_NO_CONF], table[TRUTH_REF][UNCALLABLE])); s.add(String.format("IS_REF\t\t%d\t\t%d\t\t%d\t\t%d\t\t%d", table[TRUTH_REF][CALL_REF], table[TRUTH_REF][CALL_VAR_HET], table[TRUTH_REF][CALL_VAR_HOM], table[TRUTH_REF][CALL_NO_CONF], table[TRUTH_REF][UNCALLABLE]));
s.add(String.format("IS VAR_HET\t%d\t%d\t%d\t%d\t%d", table[TRUTH_VAR_HET][CALL_REF], table[TRUTH_VAR_HET][CALL_VAR_HET], table[TRUTH_VAR_HET][CALL_VAR_HOM], table[TRUTH_VAR_HET][CALL_NO_CONF], table[TRUTH_VAR_HET][UNCALLABLE])); s.add(String.format("IS_VAR_HET\t\t%d\t\t%d\t\t%d\t\t%d\t\t%d", table[TRUTH_VAR_HET][CALL_REF], table[TRUTH_VAR_HET][CALL_VAR_HET], table[TRUTH_VAR_HET][CALL_VAR_HOM], table[TRUTH_VAR_HET][CALL_NO_CONF], table[TRUTH_VAR_HET][UNCALLABLE]));
s.add(String.format("IS VAR_HOM\t%d\t%d\t%d\t%d\t%d", table[TRUTH_VAR_HOM][CALL_REF], table[TRUTH_VAR_HOM][CALL_VAR_HET], table[TRUTH_VAR_HOM][CALL_VAR_HOM], table[TRUTH_VAR_HOM][CALL_NO_CONF], table[TRUTH_VAR_HOM][UNCALLABLE])); s.add(String.format("IS_VAR_HOM\t\t%d\t\t%d\t\t%d\t\t%d\t\t%d", table[TRUTH_VAR_HOM][CALL_REF], table[TRUTH_VAR_HOM][CALL_VAR_HET], table[TRUTH_VAR_HOM][CALL_VAR_HOM], table[TRUTH_VAR_HOM][CALL_NO_CONF], table[TRUTH_VAR_HOM][UNCALLABLE]));
s.add(String.format("UNKNOWN\t%d\t%d\t%d\t%d\t%d", table[TRUTH_UNKNOWN][CALL_REF], table[TRUTH_UNKNOWN][CALL_VAR_HET], table[TRUTH_UNKNOWN][CALL_VAR_HOM], table[TRUTH_UNKNOWN][CALL_NO_CONF], table[TRUTH_UNKNOWN][UNCALLABLE])); s.add(String.format("UNKNOWN\t\t%d\t\t%d\t\t%d\t\t%d\t\t%d", table[TRUTH_UNKNOWN][CALL_REF], table[TRUTH_UNKNOWN][CALL_VAR_HET], table[TRUTH_UNKNOWN][CALL_VAR_HOM], table[TRUTH_UNKNOWN][CALL_NO_CONF], table[TRUTH_UNKNOWN][UNCALLABLE]));
return s; return s;
} }
@ -88,9 +97,12 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
return ((Genotype)var).isHet(); return ((Genotype)var).isHet();
List<String> genotype = var.getGenotype(); List<String> genotype = var.getGenotype();
if ( genotype.size() < 2 ) if ( genotype.size() < 1 )
return false; return false;
return (genotype.get(0) != genotype.get(1));
boolean het = genotype.get(0).charAt(0) != genotype.get(0).charAt(1);
System.out.printf("********* Genotype %s is het = %b%n", genotype, het);
return het;
} }
} }