Bug fixes to support hapmap genotyping concordance.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1285 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7e04313b4e
commit
436a196e2b
|
|
@ -158,7 +158,7 @@ public class rodGFF extends BasicReferenceOrderedDatum implements AllelicVariant
|
|||
public String getAltBasesFWD() { return null; }
|
||||
public char getAltSnpFWD() throws IllegalStateException { return 0; }
|
||||
public boolean isReference() { return ! isSNP(); }
|
||||
public boolean isSNP() { return score == Double.NaN || score > 5.0; }
|
||||
public boolean isSNP() { return false; }
|
||||
public boolean isInsertion() { return false; }
|
||||
public boolean isDeletion() { return false; }
|
||||
public boolean isIndel() { return false; }
|
||||
|
|
@ -168,6 +168,7 @@ public class rodGFF extends BasicReferenceOrderedDatum implements AllelicVariant
|
|||
public double getVariationConfidence() { return score; }
|
||||
public double getConsensusConfidence() { return score; }
|
||||
public List<String> getGenotype() throws IllegalStateException {
|
||||
//System.out.printf("feature = %s%n", feature);
|
||||
return Arrays.asList(feature);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,3 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
|
|
@ -32,6 +23,15 @@ import java.util.Arrays;
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
||||
public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVariant {
|
||||
public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
||||
public GenomeLoc loc;
|
||||
|
|
@ -43,7 +43,9 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
|
|||
public double lodBtnb;
|
||||
public double[] genotypeLikelihoods = new double[10];
|
||||
|
||||
public rodVariants(final String name) { super(name); }
|
||||
public rodVariants(final String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
public String delimiterRegex() { return "\\s+"; }
|
||||
|
||||
|
|
@ -90,8 +92,81 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
|
|||
);
|
||||
}
|
||||
|
||||
public int compareTo(ReferenceOrderedDatum referenceOrderedDatum) {
|
||||
return 0; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation() { return loc; }
|
||||
|
||||
public String getRefBasesFWD() {
|
||||
return String.format("%c", getRefSnpFWD());
|
||||
}
|
||||
|
||||
public char getRefSnpFWD() throws IllegalStateException {
|
||||
return refBase;
|
||||
}
|
||||
|
||||
public String getAltBasesFWD() {
|
||||
return String.format("%c", getAltSnpFWD());
|
||||
}
|
||||
|
||||
public char getAltSnpFWD() throws IllegalStateException {
|
||||
return (bestGenotype.charAt(0) == refBase) ? bestGenotype.charAt(1) : bestGenotype.charAt(0);
|
||||
}
|
||||
|
||||
public boolean isReference() {
|
||||
return refBase == bestGenotype.charAt(0);
|
||||
}
|
||||
|
||||
public boolean isSNP() {
|
||||
return !isReference();
|
||||
}
|
||||
|
||||
public boolean isInsertion() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean isDeletion() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean isIndel() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public double getMAF() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public double getHeterozygosity() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public boolean isGenotype() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public double getVariationConfidence() {
|
||||
return lodBtr;
|
||||
}
|
||||
|
||||
public double getConsensusConfidence() {
|
||||
return lodBtnb;
|
||||
}
|
||||
|
||||
public List<String> getGenotype() throws IllegalStateException {
|
||||
return Arrays.asList(getBestGenotype());
|
||||
//throw new IllegalStateException("huh?");
|
||||
}
|
||||
|
||||
public int getPloidy() throws IllegalStateException {
|
||||
return 2;
|
||||
}
|
||||
|
||||
public boolean isBiallelic() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public char getReferenceBase() { return refBase; }
|
||||
|
||||
public int getPileupDepth() { return depth; }
|
||||
|
|
@ -142,6 +217,7 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
|
|||
this.lodBtnb = (bestLikelihood - nextBestLikelihood);
|
||||
}
|
||||
|
||||
/*
|
||||
public String getRefBasesFWD() {
|
||||
char[] b = { getReferenceBase() };
|
||||
return new String( b );
|
||||
|
|
@ -173,4 +249,5 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
|
|||
|
||||
public int getPloidy() throws IllegalStateException { return 2; }
|
||||
public boolean isBiallelic() { return true; }
|
||||
*/
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
|||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -36,50 +37,58 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
|||
public GenotypeConcordance(final String name) {
|
||||
super("genotype_concordance");
|
||||
dbName = name;
|
||||
for ( int i = 0; i < 4; i++ ) {
|
||||
for ( int j = 0; j < 5; j++ ) {
|
||||
table[i][j] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void inc(AllelicVariant chip, AllelicVariant eval) {
|
||||
public void inc(AllelicVariant chip, AllelicVariant eval, char ref) {
|
||||
if ( (chip != null && !chip.isGenotype()) || (eval != null && !eval.isGenotype()) )
|
||||
throw new StingException("Failure: trying to analyze genotypes of non-genotype data");
|
||||
|
||||
int truthIndex, callIndex;
|
||||
if ( chip == null )
|
||||
truthIndex = TRUTH_UNKNOWN;
|
||||
else if ( chip.isReference() )
|
||||
else if ( chip.isReference() && Utils.countOccurrences(ref, chip.getGenotype().get(0)) == chip.getGenotype().get(0).length() )
|
||||
truthIndex = TRUTH_REF;
|
||||
else if ( isHet(chip) )
|
||||
truthIndex = TRUTH_VAR_HET;
|
||||
else
|
||||
truthIndex = TRUTH_VAR_HOM;
|
||||
|
||||
// todo -- FIXME on countOccurences
|
||||
if ( eval == null )
|
||||
callIndex = UNCALLABLE;
|
||||
else if ( eval.getVariationConfidence() < 5.0 )
|
||||
callIndex = CALL_NO_CONF;
|
||||
else if ( eval.isReference() )
|
||||
else if ( eval.isReference() && Utils.countOccurrences(ref, eval.getGenotype().get(0)) == eval.getGenotype().get(0).length() )
|
||||
callIndex = CALL_REF;
|
||||
else if ( isHet(eval) )
|
||||
callIndex = CALL_VAR_HET;
|
||||
else
|
||||
callIndex = CALL_VAR_HOM;
|
||||
|
||||
table[truthIndex][callIndex]++;
|
||||
if ( chip != null || eval != null ) {
|
||||
System.out.printf("TESTING ME: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval);
|
||||
|
||||
table[truthIndex][callIndex]++;
|
||||
}
|
||||
}
|
||||
|
||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
AllelicVariant chip = (AllelicVariant)tracker.lookup(dbName, null);
|
||||
inc(chip, eval);
|
||||
return chip == null && eval != null ? "Novel " + eval : null;
|
||||
inc(chip, eval, ref);
|
||||
return null;
|
||||
}
|
||||
|
||||
public List<String> done() {
|
||||
List<String> s = new ArrayList<String>();
|
||||
s.add(String.format("name %s", dbName));
|
||||
s.add(String.format("\t\tCALLED REF\tCALLED VAR_HET\tCALLED_VAR_HOM\tNO CONF\tUNCALLABLE"));
|
||||
s.add(String.format("IS REF\t%d\t%d\t%d\t%d\t%d", table[TRUTH_REF][CALL_REF], table[TRUTH_REF][CALL_VAR_HET], table[TRUTH_REF][CALL_VAR_HOM], table[TRUTH_REF][CALL_NO_CONF], table[TRUTH_REF][UNCALLABLE]));
|
||||
s.add(String.format("IS VAR_HET\t%d\t%d\t%d\t%d\t%d", table[TRUTH_VAR_HET][CALL_REF], table[TRUTH_VAR_HET][CALL_VAR_HET], table[TRUTH_VAR_HET][CALL_VAR_HOM], table[TRUTH_VAR_HET][CALL_NO_CONF], table[TRUTH_VAR_HET][UNCALLABLE]));
|
||||
s.add(String.format("IS VAR_HOM\t%d\t%d\t%d\t%d\t%d", table[TRUTH_VAR_HOM][CALL_REF], table[TRUTH_VAR_HOM][CALL_VAR_HET], table[TRUTH_VAR_HOM][CALL_VAR_HOM], table[TRUTH_VAR_HOM][CALL_NO_CONF], table[TRUTH_VAR_HOM][UNCALLABLE]));
|
||||
s.add(String.format("UNKNOWN\t%d\t%d\t%d\t%d\t%d", table[TRUTH_UNKNOWN][CALL_REF], table[TRUTH_UNKNOWN][CALL_VAR_HET], table[TRUTH_UNKNOWN][CALL_VAR_HOM], table[TRUTH_UNKNOWN][CALL_NO_CONF], table[TRUTH_UNKNOWN][UNCALLABLE]));
|
||||
s.add(String.format("\t\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CONF\tUNCALLABLE"));
|
||||
s.add(String.format("IS_REF\t\t%d\t\t%d\t\t%d\t\t%d\t\t%d", table[TRUTH_REF][CALL_REF], table[TRUTH_REF][CALL_VAR_HET], table[TRUTH_REF][CALL_VAR_HOM], table[TRUTH_REF][CALL_NO_CONF], table[TRUTH_REF][UNCALLABLE]));
|
||||
s.add(String.format("IS_VAR_HET\t\t%d\t\t%d\t\t%d\t\t%d\t\t%d", table[TRUTH_VAR_HET][CALL_REF], table[TRUTH_VAR_HET][CALL_VAR_HET], table[TRUTH_VAR_HET][CALL_VAR_HOM], table[TRUTH_VAR_HET][CALL_NO_CONF], table[TRUTH_VAR_HET][UNCALLABLE]));
|
||||
s.add(String.format("IS_VAR_HOM\t\t%d\t\t%d\t\t%d\t\t%d\t\t%d", table[TRUTH_VAR_HOM][CALL_REF], table[TRUTH_VAR_HOM][CALL_VAR_HET], table[TRUTH_VAR_HOM][CALL_VAR_HOM], table[TRUTH_VAR_HOM][CALL_NO_CONF], table[TRUTH_VAR_HOM][UNCALLABLE]));
|
||||
s.add(String.format("UNKNOWN\t\t%d\t\t%d\t\t%d\t\t%d\t\t%d", table[TRUTH_UNKNOWN][CALL_REF], table[TRUTH_UNKNOWN][CALL_VAR_HET], table[TRUTH_UNKNOWN][CALL_VAR_HOM], table[TRUTH_UNKNOWN][CALL_NO_CONF], table[TRUTH_UNKNOWN][UNCALLABLE]));
|
||||
return s;
|
||||
}
|
||||
|
||||
|
|
@ -88,9 +97,12 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
|||
return ((Genotype)var).isHet();
|
||||
|
||||
List<String> genotype = var.getGenotype();
|
||||
if ( genotype.size() < 2 )
|
||||
if ( genotype.size() < 1 )
|
||||
return false;
|
||||
|
||||
return (genotype.get(0) != genotype.get(1));
|
||||
|
||||
boolean het = genotype.get(0).charAt(0) != genotype.get(0).charAt(1);
|
||||
System.out.printf("********* Genotype %s is het = %b%n", genotype, het);
|
||||
return het;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue