- Cleaned up annotation code some more

- Use QualityUtils when phred-scaling now



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2217 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-02 17:45:29 +00:00
parent 7055a3ea2d
commit a5dfc9107d
16 changed files with 40 additions and 43 deletions

View File

@ -11,7 +11,7 @@ import java.util.ArrayList;
public class AlleleBalance extends StandardVariantAnnotation {
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
if ( genotypes.size() == 0 )
return null;
@ -40,7 +40,7 @@ public class AlleleBalance extends StandardVariantAnnotation {
ratio = computeSingleBalance(ref.getBase(), genotypeStr, bases);
}
return new Pair<String, String>(getKeyName(), String.format("%.2f", ratio));
return String.format("%.2f", ratio);
}
public String getKeyName() { return "AB"; }

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
@ -11,9 +10,9 @@ import java.util.List;
public class DepthOfCoverage extends StandardVariantAnnotation {
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
int depth = pileup.getReads().size();
return new Pair<String, String>(getKeyName(), String.format("%d", depth));
return String.format("%d", depth);
}
public String getKeyName() { return "DP"; }

View File

@ -1,8 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
@ -14,7 +13,7 @@ import java.util.List;
public class FisherStrand implements VariantAnnotation {
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
// this test doesn't make sense for homs
Genotype genotype = VariantAnnotator.getFirstHetVariant(genotypes);
@ -33,7 +32,7 @@ public class FisherStrand implements VariantAnnotation {
return null;
// use Math.abs to prevent -0's
return new Pair<String, String>(getKeyName(), String.format("%.1f", Math.abs(10.0 * Math.log10(pvalue))));
return String.format("%.1f", Math.abs(QualityUtils.phredScaleErrorRate(pvalue)));
}
public String getKeyName() { return "FisherStrand"; }

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.Genotype;
@ -12,13 +11,13 @@ import java.util.List;
public class HomopolymerRun extends StandardVariantAnnotation {
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
if ( !variation.isBiallelic() || !variation.isSNP() )
return null;
int run = computeHomopolymerRun(variation.getAlternativeBaseForSNP(), ref);
return new Pair<String, String>(getKeyName(), String.format("%d", run));
return String.format("%d", run);
}
public String getKeyName() { return "HRun"; }

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
@ -12,14 +11,14 @@ import java.util.List;
public class MappingQualityZero extends StandardVariantAnnotation {
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
List<SAMRecord> reads = pileup.getReads();
int MQ0Count = 0;
for (int i=0; i < reads.size(); i++) {
if ( reads.get(i).getMappingQuality() == 0 )
MQ0Count++;
}
return new Pair<String, String>(getKeyName(), String.format("%d", MQ0Count));
return String.format("%d", MQ0Count);
}
public String getKeyName() { return "MQ0"; }

View File

@ -28,7 +28,7 @@ public class PrimaryBaseSecondaryBaseSymmetry implements VariantAnnotation{
public boolean useZeroQualityReads() { return USE_ZERO_MAPQ_READS; }
public Pair<String,String> annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
// todo -- this code doesn't work, should't be called
if ( true )
return null;
@ -42,8 +42,7 @@ public class PrimaryBaseSecondaryBaseSymmetry implements VariantAnnotation{
} else {
//System.out.printf("refSecondBasePair = %s, nonrefPrimaryBasePair = %s%n", refSecondBasePair, nonrefPrimaryBasePair);
double primary_secondary_stat = refSecondBasePair.second - nonrefPrimaryBasePair.second;
String annotation = String.format("%f", primary_secondary_stat);
return new Pair<String,String>(KEY_NAME, annotation);
return String.format("%f", primary_secondary_stat);
}
} else {
return null;

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
@ -13,13 +12,13 @@ import java.util.List;
public class RMSMappingQuality extends StandardVariantAnnotation {
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
List<SAMRecord> reads = pileup.getReads();
int[] qualities = new int[reads.size()];
for (int i=0; i < reads.size(); i++)
qualities[i] = reads.get(i).getMappingQuality();
double rms = MathUtils.rms(qualities);
return new Pair<String, String>(getKeyName(), String.format("%.2f", rms));
return String.format("%.2f", rms);
}
public String getKeyName() { return "MQ"; }

View File

@ -12,7 +12,7 @@ import java.util.ArrayList;
public class RankSumTest implements VariantAnnotation {
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
if ( genotypes.size() == 0 )
return null;
@ -40,7 +40,7 @@ public class RankSumTest implements VariantAnnotation {
if ( MathUtils.compareDoubles(pvalue, 0.0) == 0 )
return null;
return new Pair<String, String>(getKeyName(), String.format("%.1f", -10.0 * Math.log10(pvalue)));
return String.format("%.1f", QualityUtils.phredScaleErrorRate(pvalue));
}
public String getKeyName() { return "RankSum"; }

View File

@ -1,6 +1,5 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.genotype.Genotype;
@ -22,7 +21,7 @@ public class ResidualQuality implements VariantAnnotation{
public boolean useZeroQualityReads() { return true; } // for robustness
public Pair<String,String> annotate( ReferenceContext ref, ReadBackedPileup p, Variation variation, List<Genotype> genotypes) {
public String annotate( ReferenceContext ref, ReadBackedPileup p, Variation variation, List<Genotype> genotypes) {
Character snp = getSNPChar(ref, genotypes);
if ( snp == null ) {
@ -35,7 +34,7 @@ public class ResidualQuality implements VariantAnnotation{
return null;
}
return new Pair<String,String>(KEY_NAME, String.format("%f", logResidQual ));
return String.format("%f", logResidQual);
}
public String getKeyName() { return KEY_NAME; }

View File

@ -30,7 +30,7 @@ public class SecondBaseSkew implements VariantAnnotation {
public String getDescription() { return KEY_NAME + ",1,Float,\"Chi-square Secondary Base Skew\""; }
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
if ( variation.isSNP() && variation.isBiallelic() ) {
char snp = variation.getAlternativeBaseForSNP();
// try {
@ -50,7 +50,7 @@ public class SecondBaseSkew implements VariantAnnotation {
// System.out.println("p_transformed="+p_transformed+" e_transformed="+expected_transformed+" variantDepth="+depthProp.getFirst());
// System.out.println("Proportion variant bases with ref 2bb="+depthProp.getSecond()+" Expected="+proportionExpectations[0]);
double chi_square = Math.signum(depthProp.getSecond() - proportionExpectations[0])*Math.min(Math.pow(p_transformed - expected_transformed, 2), Double.MAX_VALUE);
return new Pair<String,String>(KEY_NAME, String.format("%f", chi_square));
return String.format("%f", chi_square);
}
} else {
return null;

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
@ -11,9 +10,9 @@ import java.util.List;
public class SpanningDeletions extends StandardVariantAnnotation {
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
int deletions = pileup.getNumberOfDeletions();
return new Pair<String, String>(getKeyName(), String.format("%.2f", (double)deletions/(double)pileup.size()));
return String.format("%.2f", (double)deletions/(double)pileup.size());
}
public String getKeyName() { return "Dels"; }

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
@ -10,9 +9,16 @@ import java.util.List;
public interface VariantAnnotation {
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes);
// return the annotation for the given locus data (return null for no annotation)
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes);
// return true if you want to use a context with mapping quality zero reads, false otherwise
public boolean useZeroQualityReads();
// return the INFO key
public String getKeyName();
// return the description used for the VCF INFO meta field
public String getDescription();
}

View File

@ -191,10 +191,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
HashMap<String, String> results = new HashMap<String, String>();
for ( VariantAnnotation annotator : annotations) {
Pair<String, String> annot = annotator.annotate(ref, (annotator.useZeroQualityReads() ? fullPileup : MQ0freePileup), variation, genotypes);
String annot = annotator.annotate(ref, (annotator.useZeroQualityReads() ? fullPileup : MQ0freePileup), variation, genotypes);
if ( annot != null ) {
// System.out.println("Annotating: First="+annot.getFirst()+" Second="+annot.getSecond());
results.put(annot.first, annot.second);
results.put(annotator.getKeyName(), annot);
}
}

View File

@ -37,9 +37,9 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
double phredScaledConfidence;
boolean bestIsRef = false;
if ( PofD > PofNull ) {
phredScaledConfidence = -10.0 * Math.log10(PofNull / sum);
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofNull / sum);
} else {
phredScaledConfidence = -10.0 * Math.log10(PofD / sum);
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofD / sum);
bestIsRef = true;
}

View File

@ -280,7 +280,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
char base = Character.toLowerCase(altAllele);
int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele);
if ( MathUtils.compareDoubles(PofFs[baseIndex], 0.0) != 0 ) {
double phredScaledConfidence = -10.0 * Math.log10(alleleFrequencyPosteriors[baseIndex][0]);
double phredScaledConfidence = QualityUtils.phredScaleErrorRate(alleleFrequencyPosteriors[baseIndex][0]);
if ( Double.isInfinite(phredScaledConfidence) ) {
phredScaledConfidence = -10.0 * log10PofDgivenAFi[baseIndex][0];
verboseWriter.println("P(f>0)_" + base + " = 1");
@ -288,7 +288,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
verboseWriter.println("LOD_" + base + " = " + phredScaledConfidence);
} else {
verboseWriter.println("P(f>0)_" + base + " = " + Math.log10(PofFs[baseIndex]));
verboseWriter.println("Qscore_" + base + " = " + (-10.0 * Math.log10(alleleFrequencyPosteriors[baseIndex][0])));
verboseWriter.println("Qscore_" + base + " = " + (QualityUtils.phredScaleErrorRate(alleleFrequencyPosteriors[baseIndex][0])));
verboseWriter.println("LOD_" + base + " = " + (Math.log10(PofFs[baseIndex]) - Math.log10(alleleFrequencyPosteriors[baseIndex][0])));
}
}
@ -306,7 +306,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
// only need to look at the most likely alternate allele
int indexOfMax = BaseUtils.simpleBaseToBaseIndex(bestAlternateAllele);
double phredScaledConfidence = -10.0 * Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
double phredScaledConfidence = QualityUtils.phredScaleErrorRate(alleleFrequencyPosteriors[indexOfMax][0]);
if ( Double.isInfinite(phredScaledConfidence) )
phredScaledConfidence = -10.0 * log10PofDgivenAFi[indexOfMax][0];
int bestAFguess = Utils.findIndexOfMaxEntry(alleleFrequencyPosteriors[indexOfMax]);

View File

@ -58,12 +58,12 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
// calculate the phred-scaled confidence score
double phredScaledConfidence;
if ( GENOTYPE_MODE ) {
phredScaledConfidence = -10.0 * Math.log10(1.0 - posteriors[bestIndex]);
phredScaledConfidence = QualityUtils.phredScaleErrorRate(1.0 - posteriors[bestIndex]);
} else {
int refIndex = DiploidGenotype.createHomGenotype(ref).ordinal();
bestIsRef = (refIndex == bestIndex);
double pError = (bestIsRef ? 1.0 - posteriors[refIndex] : posteriors[refIndex]);
phredScaledConfidence = -10.0 * Math.log10(pError);
phredScaledConfidence = QualityUtils.phredScaleErrorRate(pError);
}
// are we above the lod threshold for emitting calls (and not in all-bases mode)?