incremental clean-up and changes for VariantEval, moved DiploidGenotype to a better home, and fixed a spelling error.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1624 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-09-15 04:48:42 +00:00
parent 6783fda42a
commit b401929e41
31 changed files with 369 additions and 184 deletions

View File

@ -119,8 +119,8 @@ public class RodGLF implements ReferenceOrderedDatum, Variation, Iterator<RodGLF
* @return the reference base or bases, as a string
*/
@Override
public char getReference() {
return mRecord.getRefBase().toChar();
public String getReference() {
return mRecord.getRefBase().toString();
}
/**
@ -154,12 +154,26 @@ public class RodGLF implements ReferenceOrderedDatum, Variation, Iterator<RodGLF
@Override
public char getAlternativeBaseForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if (getAlternateBases().charAt(0) == this.getReference())
if (getAlternateBases().charAt(0) == this.getReference().charAt(0))
return getAlternateBases().charAt(1);
return getAlternateBases().charAt(0);
}
/**
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
*
* @return a char, representing the alternate base
*/
@Override
public char getReferenceForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if (getAlternateBases().charAt(0) == this.getReference().charAt(0))
return getAlternateBases().charAt(0);
return getAlternateBases().charAt(1);
}
/**
* Is this variant a SNP?
*

View File

@ -25,7 +25,7 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
@ -38,7 +38,10 @@ import java.util.Arrays;
import java.util.List;
public class RodGeliText extends BasicReferenceOrderedDatum implements Variation, VariantBackedByGenotype, AllelicVariant {
public enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
public enum Genotype {
AA, AC, AG, AT, CC, CG, CT, GG, GT, TT
}
public GenomeLoc loc;
public char refBase = 'N';
public int depth;
@ -47,15 +50,17 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
public double lodBtr;
public double lodBtnb;
public double[] genotypeLikelihoods = new double[10];
public RodGeliText(final String name) {
super(name);
}
public String delimiterRegex() { return "\\s+"; }
public String delimiterRegex() {
return "\\s+";
}
public boolean parseLine(Object header, String[] parts) throws IOException {
if ( parts.length < 18 )
if (parts.length < 18)
throw new IOException("Invalid rodVariant row found -- too few elements. Expected 18+, got " + parts.length);
if (!parts[0].startsWith("#")) {
loc = GenomeLocParser.createGenomeLoc(parts[0], Long.valueOf(parts[1]));
@ -78,28 +83,30 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
public String toString() {
return String.format("%s\t%d\t%c\t%d\t%d\t%s\t%4.4f\t%4.4f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f",
loc.getContig(),
loc.getStart(),
refBase,
depth,
maxMappingQuality,
bestGenotype,
lodBtr,
lodBtnb,
genotypeLikelihoods[0],
genotypeLikelihoods[1],
genotypeLikelihoods[2],
genotypeLikelihoods[3],
genotypeLikelihoods[4],
genotypeLikelihoods[5],
genotypeLikelihoods[6],
genotypeLikelihoods[7],
genotypeLikelihoods[8],
genotypeLikelihoods[9]
loc.getContig(),
loc.getStart(),
refBase,
depth,
maxMappingQuality,
bestGenotype,
lodBtr,
lodBtnb,
genotypeLikelihoods[0],
genotypeLikelihoods[1],
genotypeLikelihoods[2],
genotypeLikelihoods[3],
genotypeLikelihoods[4],
genotypeLikelihoods[5],
genotypeLikelihoods[6],
genotypeLikelihoods[7],
genotypeLikelihoods[8],
genotypeLikelihoods[9]
);
}
public GenomeLoc getLocation() { return loc; }
public GenomeLoc getLocation() {
return loc;
}
/**
* get the reference base(s) at this position
@ -107,8 +114,8 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
* @return the reference base or bases, as a string
*/
@Override
public char getReference() {
return this.refBase;
public String getReference() {
return String.valueOf(this.refBase);
}
@ -161,7 +168,9 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
}
public boolean isSNP() {
return (!bestGenotype.equals(Utils.dupString(this.getReference(),2)));
if (this.getReference().length() == 1)
return (!bestGenotype.equals(this.getReference() + this.getReference()));
return false;
}
public boolean isInsertion() {
@ -195,12 +204,28 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
@Override
public char getAlternativeBaseForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if(this.bestGenotype.toString().charAt(0) == this.getReference())
// we know that if we're a SNP, the reference is a single base
if (this.bestGenotype.toString().charAt(0) == getReference().charAt(0))
return this.bestGenotype.toString().charAt(1);
return this.bestGenotype.toString().charAt(0);
}
/**
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
*
* @return a char, representing the alternate base
*/
@Override
public char getReferenceForSNP() {
if (!isSNP()) throw new IllegalStateException("This site is not a SNP");
// we know that if we're a SNP, the reference is a single base
if (bestGenotype.toString().charAt(0) != getReference().charAt(0))
return bestGenotype.toString().charAt(1);
else
return bestGenotype.toString().charAt(0);
}
public double getMAF() {
return 0;
}
@ -235,21 +260,37 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
}
public int length() { return 1; }
public int length() {
return 1;
}
public char getReferenceBase() { return refBase; }
public char getReferenceBase() {
return refBase;
}
public int getPileupDepth() { return depth; }
public int getPileupDepth() {
return depth;
}
public int getMaxMappingQuality() { return maxMappingQuality; }
public int getMaxMappingQuality() {
return maxMappingQuality;
}
public String getBestGenotype() { return bestGenotype; }
public String getBestGenotype() {
return bestGenotype;
}
public double getLodBtr() { return lodBtr; }
public double getLodBtr() {
return lodBtr;
}
public double getLodBtnb() { return lodBtnb; }
public double getLodBtnb() {
return lodBtnb;
}
public double[] getGenotypeLikelihoods() { return genotypeLikelihoods; }
public double[] getGenotypeLikelihoods() {
return genotypeLikelihoods;
}
public void adjustLikelihoods(double[] likelihoods) {
for (int likelihoodIndex = 0; likelihoodIndex < likelihoods.length; likelihoodIndex++) {
@ -277,7 +318,7 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
for (int likelihoodIndex = 0; likelihoodIndex < likelihoods.length; likelihoodIndex++) {
if (refBase == Genotype.values()[likelihoodIndex].toString().charAt(0) &&
refBase == Genotype.values()[likelihoodIndex].toString().charAt(1)) {
refBase == Genotype.values()[likelihoodIndex].toString().charAt(1)) {
refLikelihood = genotypeLikelihoods[likelihoodIndex];
}
}
@ -287,40 +328,6 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
this.lodBtnb = (bestLikelihood - nextBestLikelihood);
}
/*
public String getRefBasesFWD() {
char[] b = { getReferenceBase() };
return new String( b );
}
public char getRefSnpFWD() throws IllegalStateException { return getReferenceBase(); }
public String getAltBasesFWD() { return getBestGenotype(); }
public char getAltSnpFWD() throws IllegalStateException {
String bases = getBestGenotype();
if ( bases.charAt(0) != getRefSnpFWD() )
return bases.charAt(0);
else
return bases.charAt(1);
}
public boolean isReference() { return ! isSNP(); }
public boolean isSNP() { return getLodBtr() > 5; }
public boolean isInsertion() { return false; }
public boolean isDeletion() { return false; }
public boolean isIndel() { return false; }
public double getMAF() { return 0; }
public double getHeterozygosity() { return 0; }
public boolean isGenotype() { return true; }
public double getVariationConfidence() { return getLodBtr(); }
public double getConsensusConfidence() { return getLodBtnb(); }
public List<String> getGenotype() throws IllegalStateException {
return Arrays.asList(getBestGenotype());
}
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
*/
/**
* get the likelihoods
*
@ -328,7 +335,24 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
*/
@Override
public org.broadinstitute.sting.utils.genotype.Genotype getGenotype(DiploidGenotype x) {
if (x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype");
return new BasicGenotype(this.getLocation(),this.bestGenotype,this.getRefSnpFWD(),this.getConsensusConfidence());
// figure out the best -switch to this
/*Integer values[] = Utils.SortPermutation(this.genotypeLikelihoods);
int indexThis = x.ordinal();
int other = (x.toString().equals(Genotype.values()[values[1]])) ? values[9] : values[8];
double lod = (genotypeLikelihoods[indexThis] - genotypeLikelihoods[other]);*/
return new BasicGenotype(getLocation(),x.toString(), refBase, lodBtnb);
}
/**
* do we have the specified genotype? not all backedByGenotypes
* have all the genotype data.
*
* @param x the genotype
*
* @return true if available, false otherwise
*/
@Override
public boolean hasGenotype(DiploidGenotype x) {
return (x.toString().equals(this.getAltBasesFWD()));
}
}

View File

@ -4,7 +4,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import java.util.*;
import java.util.regex.MatchResult;
@ -82,8 +82,8 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
* @return the reference base or bases, as a string
*/
@Override
public char getReference() {
return 'N';
public String getReference() {
throw new IllegalStateException("Chip data is unable to determine the reference");
}
/**
@ -223,6 +223,16 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
return this.getAltSnpFWD();
}
/**
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
*
* @return a char, representing the alternate base
*/
@Override
public char getReferenceForSNP() {
return this.getRefSnpFWD();
}
public double getMAF() { return 0; }
public double getHeterozygosity() { return 0; }
public boolean isGenotype() { return true; }
@ -247,4 +257,18 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
if (!x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype");
return new BasicGenotype(this.getLocation(),this.feature,this.getRefSnpFWD(),this.getConsensusConfidence());
}
/**
* do we have the specified genotype? not all backedByGenotypes
* have all the genotype data.
*
* @param x the genotype
*
* @return true if available, false otherwise
*/
@Override
public boolean hasGenotype(DiploidGenotype x) {
if (!x.toString().equals(this.getAltBasesFWD())) return false;
return true;
}
}

View File

@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.refdata;
import net.sf.picard.util.SequenceUtil;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import java.util.Arrays;
import java.util.List;
@ -68,8 +68,8 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, V
* @return the reference base or bases, as a string
*/
@Override
public char getReference() {
return getRefSnpFWD();
public String getReference() {
return getRefBasesFWD();
}
/**
@ -183,6 +183,16 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, V
return getAlternateBases().charAt(0); */
}
/**
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
*
* @return a char, representing the alternate base
*/
@Override
public char getReferenceForSNP() {
return 0; //To change body of implemented methods use File | Settings | File Templates.
}
public boolean isReference() { return false; } // snp locations are never "reference", there's always a variant
public boolean isHapmap() { return validationStatus.contains("by-hapmap"); }
@ -314,7 +324,20 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, V
*/
@Override
public org.broadinstitute.sting.utils.genotype.Genotype getGenotype(DiploidGenotype x) {
if (x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype");
if (!x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype");
return new BasicGenotype(this.getLocation(),this.getAltBasesFWD(),this.getRefSnpFWD(),this.getConsensusConfidence());
}
/**
* do we have the specified genotype? not all backedByGenotypes
* have all the genotype data.
*
* @param x the genotype
*
* @return true if available, false otherwise
*/
@Override
public boolean hasGenotype(DiploidGenotype x) {
return (!x.toString().equals(this.getAltBasesFWD())) ? false : true;
}
}

View File

@ -2,15 +2,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import static java.lang.Math.log10;
import static java.lang.Math.pow;
import java.util.Arrays;
import net.sf.samtools.SAMRecord;
public class DiploidGenotypePriors {
// --------------------------------------------------------------------------------------------------------------
//

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import static java.lang.Math.log10;
import static java.lang.Math.pow;

View File

@ -225,7 +225,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
*/
public Variation toVariation() {
double bestRef = Math.abs(mGenotypeLikelihoods.getPosterior(getBestGenotype()) - mGenotypeLikelihoods.getPosterior(getRefGenotype()));
return new BasicVariation(this.getBases(), this.getReference(), 0, this.mLocation, bestRef);
return new BasicVariation(this.getBases(), String.valueOf(this.getReference()), 0, this.mLocation, bestRef);
}
/**

View File

@ -8,7 +8,7 @@ import org.broadinstitute.sting.gatk.refdata.RodGenotypeChipAsGFF;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall;
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.ListUtils;
import org.broadinstitute.sting.utils.Utils;

View File

@ -16,6 +16,7 @@ import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.io.BufferedReader;

View File

@ -1,9 +1,9 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.io.PrintStream;
import java.util.List;
@ -53,6 +53,6 @@ public abstract class BasicPoolVariantAnalysis implements VariantAnalysis{
// no need to finalize data in general
}
public abstract String update(AllelicVariant variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context);
public abstract String update(Variation variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context);
}

View File

@ -2,7 +2,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.Genotype;
@ -78,13 +78,8 @@ public class CallableBasesAnalysis extends BasicVariantAnalysis implements Genot
discoverable_bases[i]++;
if (!eval.isSNP() && genotype.getNegLog10PError() >= threshold)
discoverable_bases[i]++;
if (genotype.getNegLog10PError() >= threshold)
genotypable_bases[i]++;
//System.out.printf("Updating %s SNP=%b, REF=%b VarConf=%f ConConf=%f where threshold=%f: discoverable = %d, genotypable = %d%n",
// eval.getLocation(), eval.isSNP(), eval.isReference(), eval.getVariationConfidence(),
// eval.getConsensusConfidence(), threshold, discoverable_bases[i], genotypable_bases[i]);
}
return null;

View File

@ -2,7 +2,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;

View File

@ -1,13 +1,12 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import java.io.BufferedReader;
import java.io.FileReader;

View File

@ -27,21 +27,13 @@ public class TransitionTranversionAnalysis extends BasicVariantAnalysis implemen
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
if (eval != null && eval.isSNP()) {
if (eval.getAlternateBases().length() != 2) {
throw new StingException("TransitionTranversionAnalysis works only with biallelic variants");
}
char refBase = eval.getReference();
char altBase = (eval.getAlternateBases().charAt(0) == refBase) ? eval.getAlternateBases().charAt(1) : eval.getAlternateBases().charAt(0);
BaseUtils.BaseSubstitutionType subType = BaseUtils.SNPSubstitutionType(refBase, altBase);
char altBase = eval.getAlternativeBaseForSNP();
BaseUtils.BaseSubstitutionType subType = BaseUtils.SNPSubstitutionType(ref, altBase);
if (subType == BaseUtils.BaseSubstitutionType.TRANSITION)
nTransitions++;
else
nTransversions++;
}
return null;
}

View File

@ -2,7 +2,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;

View File

@ -82,27 +82,11 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
public int nSNPsAtIndels() {
return nSNPsCalledAtIndels;
}
public static int count = 0;
public boolean discordantP(Variation dbSNP, Variation eval) {
if (eval != null) {
char alt = (eval.isSNP()) ? eval.getAlternativeBaseForSNP() : eval.getReference();
if (dbSNP != null && dbSNP.isSNP()) {
boolean val = !dbSNP.getAlternateBases().contains(String.valueOf(alt));
if (val) {
//System.err.print(eval.getLocation());
//System.err.print(" eval.alt = " + alt);
//System.err.println(" dbsnp.bases = " + dbSNP.getAlternateBases());
//System.err.println(val);
//System.err.println(count);
}
//else System.err.println("not count == " + count++);
return val;
}
char alt = (eval.isSNP()) ? eval.getAlternativeBaseForSNP() : eval.getReference().charAt(0);
if (dbSNP != null && dbSNP.isSNP())
return !dbSNP.getAlternateBases().contains(String.valueOf(alt));
}
return false;
}
@ -136,7 +120,6 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
return String.format("SNP-at-indel DBSNP=%s %s", dbsnp.getAlternateBases(), eval);
} else {
return null;
// return "Novel " + eval;
}
} else {
return null;

View File

@ -2,8 +2,8 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.cmdLine.Argument;
@ -182,17 +182,8 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
// Iterate over each analysis, and update it
Variation eval = (Variation)tracker.lookup("eval", null);
//if ( eval!=null ) System.out.printf("Eval: %f %d %b%n", eval.getVariationConfidence(), minDiscoveryQ, eval.getVariationConfidence() < minDiscoveryQ);
if ( eval != null ) {
// comment to disable variant filtering by confidence score
if ( evalContainsGenotypes ) {
// Genotyping - use best vs. next best lod
if ( eval.getNegLog10PError() < minConfidenceScore ) eval = null;
} else {
// Variation discovery - use best vs. reference lod
if ( Math.abs(eval.getNegLog10PError()) < minConfidenceScore ) eval = null;
}
}
if ( eval != null )
if ( eval.getNegLog10PError() < minConfidenceScore ) eval = null;
// update stats about all of the SNPs
updateAnalysisSet(ALL_SNPS, eval, tracker, ref.getBase(), context);
@ -204,14 +195,14 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
updateAnalysisSet(noveltySet, eval, tracker, ref.getBase(), context);
}
if ( eval instanceof SNPCallFromGenotypes ) {
// are we a population backed call? then update
if ( eval instanceof SNPCallFromGenotypes) {
SNPCallFromGenotypes call = (SNPCallFromGenotypes)eval;
int nVarGenotypes = call.nHetGenotypes() + call.nHomVarGenotypes();
//System.out.printf("%d variant genotypes at %s%n", nVarGenotypes, calls);
final String s = nVarGenotypes == 1 ? SINGLETON_SNPS : TWOHIT_SNPS;
updateAnalysisSet(s, eval, tracker, ref.getBase(), context);
}
return 1;
}

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.genotype.Variation;
@ -25,7 +24,7 @@ public class VariantMatcher extends BasicVariantAnalysis implements GenotypeAnal
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
String r = null;
AllelicVariant db = (AllelicVariant)tracker.lookup(dbName, null);
Variation db = (Variation)tracker.lookup(dbName, null);
if ( eval != null || db != null ) {
String matchFlag = " ";

View File

@ -138,6 +138,6 @@ public class BasicGenotype implements Genotype {
@Override
public Variation toVariation() {
if (!isVariant(this.mRef)) throw new IllegalStateException("this genotype is not a variant");
return new BasicVariation(this.getBases(),mRef,this.getBases().length(),mLocation,mNetLog10PError);
return new BasicVariation(this.getBases(),String.valueOf(mRef),this.getBases().length(),mLocation,mNetLog10PError);
}
}

View File

@ -1,6 +1,5 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.GenomeLoc;
/**
@ -14,7 +13,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
public class BasicVariation implements Variation {
protected final String mBases;
protected final char mRef;
protected final String mRef;
protected final int mLength;
protected final GenomeLoc mLocation;
protected final double mConfidence;
@ -23,11 +22,11 @@ public class BasicVariation implements Variation {
* the constructor
*
* @param bases the bases that this variant represents
* @param reference the reference base
* @param reference the reference bases
* @param length are we a single base variant, or a indel/deletion? length is negitive for an indel,
* positive for a indel, and 0 for a substitution
*/
public BasicVariation(String bases, char reference, int length, GenomeLoc location, double confidence) {
public BasicVariation(String bases, String reference, int length, GenomeLoc location, double confidence) {
mBases = bases;
mRef = reference;
mLength = length;
@ -49,7 +48,7 @@ public class BasicVariation implements Variation {
@Override
public boolean isSNP() {
if (mLength == 0 && Utils.dupString(mRef, 2).equals(mBases)) return true;
if (mLength == 0) return true;
return false;
}
@ -74,7 +73,7 @@ public class BasicVariation implements Variation {
}
@Override
public char getReference() {
public String getReference() {
return (mRef);
}
@ -85,8 +84,12 @@ public class BasicVariation implements Variation {
@Override
public boolean isReference() {
if (mLength == 0 && mBases.charAt(0) == mRef && mRef != mBases.charAt(1)) return true;
return false;
if (mLength != 0) return true;
int refIndex = 0;
for (char c : mBases.toCharArray()) {
if (mRef.charAt(refIndex) != c) return false;
}
return true;
}
/**
@ -108,10 +111,23 @@ public class BasicVariation implements Variation {
@Override
public char getAlternativeBaseForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if (getAlternateBases().charAt(0) == this.getReference())
if (getAlternateBases().charAt(0) == this.getReference().charAt(0))
return getAlternateBases().charAt(1);
return getAlternateBases().charAt(0);
}
/**
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
*
* @return a char, representing the alternate base
*/
@Override
public char getReferenceForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if (getAlternateBases().charAt(0) == this.getReference().charAt(0))
return getAlternateBases().charAt(0);
return getAlternateBases().charAt(1);
}
}

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.Utils;
@ -52,4 +52,13 @@ public enum DiploidGenotype {
default: return true;
}
}
/**
* create a diploid genotype, given a character to make into a hom genotype
* @param hom the character to turn into a hom genotype, i.e. if it is A, then returned will be AA
* @return the diploid genotype
*/
public static DiploidGenotype createGenotype(char hom) {
return DiploidGenotype.valueOf((String.valueOf(hom) + String.valueOf(hom)).toUpperCase());
}
}

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
/**
* @author aaron

View File

@ -57,11 +57,11 @@ public class LikelihoodObject {
// possible types of likihoods to store
public enum LIKELIHOOD_TYPE {
NEGITIVE_LOG, LOG, RAW;
NEGATIVE_LOG, LOG, RAW;
}
// our liklihood storage type
protected LIKELIHOOD_TYPE mLikelihoodType = LIKELIHOOD_TYPE.NEGITIVE_LOG;
protected LIKELIHOOD_TYPE mLikelihoodType = LIKELIHOOD_TYPE.NEGATIVE_LOG;
// default the bestGenotype likelihoods to the allele AA
protected GENOTYPE bestGenotype = GENOTYPE.AA;
@ -199,7 +199,7 @@ public class LikelihoodObject {
double[] ft = toDoubleArray();
float[] db = new float[ft.length];
int index = 0;
if (this.mLikelihoodType == LIKELIHOOD_TYPE.NEGITIVE_LOG) {
if (this.mLikelihoodType == LIKELIHOOD_TYPE.NEGATIVE_LOG) {
for (; index < ft.length; index++) {
db[index] = ((float) ft[index] * -1.0f);
}
@ -229,9 +229,9 @@ public class LikelihoodObject {
public void validateScore(double score) {
int x = 0;
switch (mLikelihoodType) {
case NEGITIVE_LOG:
case NEGATIVE_LOG:
if (score < 0)
throw new StingException("Likelikhood score of " + score + " is invalid, for NEGITIVE_LOG it must be greater than or equal to 0");
throw new StingException("Likelikhood score of " + score + " is invalid, for NEGATIVE_LOG it must be greater than or equal to 0");
break;
case LOG:
if (score > 0)
@ -256,7 +256,7 @@ public class LikelihoodObject {
return;
if (mLikelihoodType == LIKELIHOOD_TYPE.RAW) {
double mult = 1.0;
if (likelihood == LIKELIHOOD_TYPE.NEGITIVE_LOG) {
if (likelihood == LIKELIHOOD_TYPE.NEGATIVE_LOG) {
mult = -1.0;
}
// one of us in log, the other negitive log, it doesn't matter which
@ -265,7 +265,7 @@ public class LikelihoodObject {
}
} else if (likelihood == LIKELIHOOD_TYPE.RAW) {
double mult = 1.0;
if (mLikelihoodType == LIKELIHOOD_TYPE.NEGITIVE_LOG) {
if (mLikelihoodType == LIKELIHOOD_TYPE.NEGATIVE_LOG) {
mult = -1.0;
}
// one of us in log, the other negitive log, it doesn't matter which

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
/**
* @author aaron
@ -12,7 +12,21 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
public interface VariantBackedByGenotype {
/**
* get the likelihoods
*
* @param x the genotype
*
* @return an array in lexigraphical order of the likelihoods
*/
public Genotype getGenotype(DiploidGenotype x);
/**
* do we have the specified genotype? not all backedByGenotypes
* have all the genotype data.
*
* @param x the genotype
*
* @return true if available, false otherwise
*/
public boolean hasGenotype(DiploidGenotype x);
}

View File

@ -58,7 +58,7 @@ public interface Variation {
*
* @return the reference base or bases, as a string
*/
public char getReference();
public String getReference();
/**
* get the -1 * (log 10 of the error value)
@ -80,16 +80,22 @@ public interface Variation {
public String getAlternateBases();
/**
* are we an insertion or a deletion? yes, then return true. No? Well, false it is.
* are we an insertion or a deletion? yes, then return true. No? Well, false then.
* @return true if we're an insertion or deletion
*/
public boolean isIndel();
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
* gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
* of
* @return a char, representing the alternate base
*/
public char getAlternativeBaseForSNP();
/**
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
* @return a char, representing the alternate base
*/
public char getReferenceForSNP();
}

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.utils.genotype.geli;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.*;

View File

@ -6,7 +6,6 @@ import net.sf.samtools.util.BinaryCodec;
import net.sf.samtools.util.BlockCompressedOutputStream;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.*;
import java.io.DataOutputStream;
@ -121,7 +120,7 @@ public class GLFWriter implements GenotypeWriter {
Arrays.fill(values,-255.0);
obj = new LikelihoodObject(values, LikelihoodObject.LIKELIHOOD_TYPE.LOG);
}
obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG); // transform! ... to negitive log likelihoods
obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGATIVE_LOG); // transform! ... to negitive log likelihoods
double rms = -1;
// calculate the RMS mapping qualities and the read depth

View File

@ -184,7 +184,7 @@ public class RodGLFTest extends BaseTest {
for (int x = 0; x < vals.size(); x++) {
ret[x] = vals.get(x);
}
return new LikelihoodObject(ret, LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG);
return new LikelihoodObject(ret, LikelihoodObject.LIKELIHOOD_TYPE.NEGATIVE_LOG);
}

View File

@ -1,15 +1,11 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalData;
import org.junit.Before;
import org.junit.Test;
import org.junit.Assert;
import static java.lang.Math.log10;
import static java.lang.Math.pow;
public class GenotypeLikelihoodsTest extends BaseTest {
private final static double DELTA = 1e-8;

View File

@ -0,0 +1,104 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.BaseTest;
import org.junit.Assert;
import org.junit.Test;
/**
*
* @author aaron
*
* Class DiploidGenotypeTest
*
* Testing the basic functionality of the diploid genotype class
*/
public class DiploidGenotypeTest extends BaseTest {
@Test
public void testCreateDiploidFromString() {
String genotype = "AA";
DiploidGenotype g = DiploidGenotype.valueOf(genotype);
Assert.assertTrue(genotype.equals(g.toString()));
genotype = "AC";
g = DiploidGenotype.valueOf(genotype);
Assert.assertTrue(genotype.equals(g.toString()));
genotype = "AG";
g = DiploidGenotype.valueOf(genotype);
Assert.assertTrue(genotype.equals(g.toString()));
genotype = "AT";
g = DiploidGenotype.valueOf(genotype);
Assert.assertTrue(genotype.equals(g.toString()));
genotype = "CC";
g = DiploidGenotype.valueOf(genotype);
Assert.assertTrue(genotype.equals(g.toString()));
genotype = "CG";
g = DiploidGenotype.valueOf(genotype);
Assert.assertTrue(genotype.equals(g.toString()));
genotype = "CT";
g = DiploidGenotype.valueOf(genotype);
Assert.assertTrue(genotype.equals(g.toString()));
genotype = "GG";
g = DiploidGenotype.valueOf(genotype);
Assert.assertTrue(genotype.equals(g.toString()));
genotype = "GT";
g = DiploidGenotype.valueOf(genotype);
Assert.assertTrue(genotype.equals(g.toString()));
genotype = "TT";
g = DiploidGenotype.valueOf(genotype);
Assert.assertTrue(genotype.equals(g.toString()));
}
@Test
public void testIsHet() {
String genotype = "AG";
DiploidGenotype g = DiploidGenotype.valueOf(genotype);
Assert.assertTrue(g.isHet());
genotype = "AA";
g = DiploidGenotype.valueOf(genotype);
Assert.assertTrue(!g.isHet());
}
@Test
public void testIsHom() {
String genotype = "AA";
DiploidGenotype g = DiploidGenotype.valueOf(genotype);
Assert.assertTrue(g.isHom());
genotype = "AG";
g = DiploidGenotype.valueOf(genotype);
Assert.assertTrue(!g.isHom());
}
@Test
public void testCreateGenotype() {
char ref = 'A';
DiploidGenotype g = DiploidGenotype.createGenotype(ref);
Assert.assertTrue("AA".equals(g.toString()));
ref = 'a';
g = DiploidGenotype.createGenotype(ref);
Assert.assertTrue("AA".equals(g.toString()));
ref = 't';
g = DiploidGenotype.createGenotype(ref);
Assert.assertTrue("TT".equals(g.toString()));
ref = 'T';
g = DiploidGenotype.createGenotype(ref);
Assert.assertTrue("TT".equals(g.toString()));
}
}

View File

@ -61,7 +61,7 @@ public class LikelihoodObjectTest extends BaseTest {
for (int x = 0; x < 10; x++) {
ray[x] = ( x * 25 );
}
mLO = new LikelihoodObject(ray,LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG);
mLO = new LikelihoodObject(ray,LikelihoodObject.LIKELIHOOD_TYPE.NEGATIVE_LOG);
assertTrue(mLO.likelihoods.size() == LikelihoodObject.GENOTYPE.values().length);
int index = 0;
@ -77,7 +77,7 @@ public class LikelihoodObjectTest extends BaseTest {
for (int x = 0; x < 10; x++) {
ray[x] = ( x * 25.0 );
}
mLO = new LikelihoodObject(ray,LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG);
mLO = new LikelihoodObject(ray,LikelihoodObject.LIKELIHOOD_TYPE.NEGATIVE_LOG);
assertTrue(mLO.likelihoods.size() == LikelihoodObject.GENOTYPE.values().length);
int index = 0;
@ -104,7 +104,7 @@ public class LikelihoodObjectTest extends BaseTest {
ray[x] = ( 240.0 );
}
ray [5] = 0;
mLO = new LikelihoodObject(ray, LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG);
mLO = new LikelihoodObject(ray, LikelihoodObject.LIKELIHOOD_TYPE.NEGATIVE_LOG);
assertTrue(mLO.likelihoods.size() == LikelihoodObject.GENOTYPE.values().length);
short smallest = (short)mLO.getBestLikelihood();
assertTrue(smallest == 0);