diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java index b02c58945..c0045d83f 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java @@ -66,6 +66,8 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); double strandScore = Math.max(forwardLod - lod, reverseLod - lod); logger.debug(String.format("SLOD=%f", strandScore)); + // rescale by a factor of 10 + strandScore *= 10.0; ((SLODBacked)locusdata).setSLOD(strandScore); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index 1d5139de4..a5dedad3c 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -333,6 +333,61 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo if ( locusdata instanceof AlleleFrequencyBacked ) { ((AlleleFrequencyBacked)locusdata).setAlleleFrequency(bestAFguess); } + if ( locusdata instanceof AlleleBalanceBacked ) { + ArrayList refBalances = new ArrayList(); + ArrayList onOffBalances = new ArrayList(); + ArrayList weights = new ArrayList(); + + // accumulate ratios and weights + for ( java.util.Map.Entry entry : GLs.entrySet() ) { + // determine the best genotype + Integer sorted[] = Utils.SortPermutation(entry.getValue().getPosteriors()); + DiploidGenotype bestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]]; + + // we care only about het calls + if ( bestGenotype.isHetRef(ref) ) { + + // make sure the alt base is our target alt + char altBase = bestGenotype.base1 != ref ? bestGenotype.base1 : bestGenotype.base2; + if ( altBase != baseOfMax ) + continue; + + // get the base counts at this pileup (minus deletions) + ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(entry.getKey()).getContext(StratifiedContext.OVERALL)); + int[] counts = pileup.getBasePileupAsCounts(); + int refCount = counts[BaseUtils.simpleBaseToBaseIndex(ref)]; + int altCount = counts[BaseUtils.simpleBaseToBaseIndex(altBase)]; + int totalCount = 0; + for (int i = 0; i < counts.length; i++) + totalCount += counts[i]; + + // add the entries + weights.add(entry.getValue().getNormalizedPosteriors()[bestGenotype.ordinal()]); + refBalances.add((double)refCount / (double)(refCount + altCount)); + onOffBalances.add((double)(refCount + altCount) / (double)totalCount); + } + } + + if ( weights.size() > 0 ) { + // normalize the weights + double sum = 0.0; + for (int i = 0; i < weights.size(); i++) + sum += weights.get(i); + for (int i = 0; i < weights.size(); i++) + weights.set(i, weights.get(i) / sum); + + // calculate total weighted ratios + double normalizedRefRatio = 0.0; + double normalizedOnOffRatio = 0.0; + for (int i = 0; i < weights.size(); i++) { + normalizedRefRatio += weights.get(i) * refBalances.get(i); + normalizedOnOffRatio += weights.get(i) * onOffBalances.get(i); + } + + ((AlleleBalanceBacked)locusdata).setRefRatio(normalizedRefRatio); + ((AlleleBalanceBacked)locusdata).setOnOffRatio(normalizedOnOffRatio); + } + } if ( locusdata instanceof SLODBacked ) { // the overall lod double overallLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); @@ -354,7 +409,11 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofNull; double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofNull; //logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); + + // strand score is max bias between forward and reverse strands double strandScore = Math.max(forwardLod - lod, reverseLod - lod); + // rescale by a factor of 10 + strandScore *= 10.0; //logger.debug(String.format("SLOD=%f", strandScore)); ((SLODBacked)locusdata).setSLOD(strandScore); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java index 0dcee8a3c..b6f757964 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java @@ -90,6 +90,27 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation if ( locusdata instanceof ConfidenceBacked ) { ((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence); } + if ( locusdata instanceof AlleleBalanceBacked ) { + + DiploidGenotype bestGenotype = DiploidGenotype.values()[bestIndex]; + // we care only about het calls + if ( bestGenotype.isHetRef(ref) ) { + + char altBase = bestGenotype.base1 != ref ? bestGenotype.base1 : bestGenotype.base2; + + // get the base counts at this pileup (minus deletions) + int[] counts = discoveryGL.first.getBasePileupAsCounts(); + int refCount = counts[BaseUtils.simpleBaseToBaseIndex(ref)]; + int altCount = counts[BaseUtils.simpleBaseToBaseIndex(altBase)]; + int totalCount = 0; + for (int i = 0; i < counts.length; i++) + totalCount += counts[i]; + + // set the ratios + ((AlleleBalanceBacked)locusdata).setRefRatio((double)refCount / (double)(refCount + altCount)); + ((AlleleBalanceBacked)locusdata).setOnOffRatio((double)(refCount + altCount) / (double)totalCount); + } + } } return new Pair, GenotypeLocusData>(Arrays.asList(call), locusdata); diff --git a/java/src/org/broadinstitute/sting/utils/ReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/ReadBackedPileup.java index ed56588b1..9e2bde8c3 100755 --- a/java/src/org/broadinstitute/sting/utils/ReadBackedPileup.java +++ b/java/src/org/broadinstitute/sting/utils/ReadBackedPileup.java @@ -80,24 +80,28 @@ public class ReadBackedPileup extends BasicPileup { return secondaryQualPileupAsString(reads, offsets); } - public String getBasePileupAsCountsString() { - String bases = basePileupAsString(reads, offsets, includeDeletions); + public int[] getBasePileupAsCounts() { + int[] counts = new int[4]; + for (int i = 0; i < reads.size(); i++) { + // skip deletion sites + if ( offsets.get(i) == -1 ) + continue; + char base = Character.toUpperCase((char)(reads.get(i).getReadBases()[offsets.get(i)])); + if (BaseUtils.simpleBaseToBaseIndex(base) == -1) + continue; + counts[BaseUtils.simpleBaseToBaseIndex(base)]++; + } - int[] counts = new int[4]; - for (int i = 0; i < reads.size(); i++) - { - // skip deletion sites - if ( offsets.get(i) == -1 ) - continue; - char base = Character.toUpperCase((char)(reads.get(i).getReadBases()[offsets.get(i)])); - if (BaseUtils.simpleBaseToBaseIndex(base) == -1) { continue; } - counts[BaseUtils.simpleBaseToBaseIndex(base)]++; - } - return String.format("A[%d] C[%d] G[%d] T[%d]", - counts[0], - counts[1], - counts[2], - counts[3]); + return counts; + } + + public String getBasePileupAsCountsString() { + int[] counts = getBasePileupAsCounts(); + return String.format("A[%d] C[%d] G[%d] T[%d]", + counts[0], + counts[1], + counts[2], + counts[3]); } public String getProbDistPileup() { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/AlleleBalanceBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/AlleleBalanceBacked.java new file mode 100755 index 000000000..404c01857 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/AlleleBalanceBacked.java @@ -0,0 +1,35 @@ +package org.broadinstitute.sting.utils.genotype; + +/** + * @author ebanks + * Interface AlleleBalanceBacked + * + * this interface indicates that the genotype is + * backed up by allele balance information. + */ +public interface AlleleBalanceBacked { + + /** + * + * @return returns the ratio of ref/(ref+alt) bases for hets + */ + public double getRefRatio(); + + /** + * + * @param ratio the ref/(ref+alt) ratio + */ + public void setRefRatio(double ratio); + + /** + * + * @return returns the ratio of (ref+alt)/total bases for hets + */ + public double getOnOffRatio(); + + /** + * + * @param ratio the (ref+alt)/total ratio + */ + public void setOnOffRatio(double ratio); +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java index e18ca77b5..6473e2cd5 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java @@ -33,7 +33,6 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked, // the sample name, used to propulate the SampleBacked interface private String mSampleName; - public VCFGenotypeCall(char ref, GenomeLoc loc) { mRefBase = ref; mLocation = loc; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java index f3df77b9e..4fb9e02f1 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java @@ -10,7 +10,7 @@ import org.broadinstitute.sting.utils.genotype.*; *

* represents the meta data for a genotype object. */ -public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, AlleleFrequencyBacked { +public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, AlleleFrequencyBacked, AlleleBalanceBacked { // the discovery lod score private double mConfidence = 0.0; @@ -27,6 +27,10 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked // the ref base private char mRefBase; + // the various allele ratios + private double mRefRatio = 0.0; + private double mOnOffRatio = 0.0; + /** * create a basic genotype meta data pbject, given the following fields * @@ -105,4 +109,40 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked public void setAlleleFrequency(double frequency) { mAlleleFrequency = frequency; } + + /** + * @return returns true if the ref/(ref+alt) ratio for this genotype has been set + */ + public boolean hasRefRatio() { + return mRefRatio > 0.0; + } + + /** + * @return returns the ref/(ref+alt) ratio for this genotype + */ + public double getRefRatio() { + return mRefRatio; + } + + public void setRefRatio(double ratio) { + mRefRatio = ratio; + } + + /** + * @return returns true if the (ref+alt)/total ratio for this genotype has been set + */ + public boolean hasOnOffRatio() { + return mOnOffRatio > 0.0; + } + + /** + * @return returns the (ref+alt)/total ratio for this genotype + */ + public double getOnOffRatio() { + return mOnOffRatio; + } + + public void setOnOffRatio(double ratio) { + mOnOffRatio = ratio; + } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index 5a2a84bd7..a388adc40 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -164,8 +164,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { infoFields.put("DP", String.format("%d", totalReadDepth)); double qual = (locusdata == null) ? 0 : ((VCFGenotypeLocusData)locusdata).getConfidence(); - // maintain 0-99 based Q-scores - qual = Math.min(qual, 99); + // min Q-score is zero qual = Math.max(qual, 0); VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(), @@ -195,6 +194,10 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { if ( locusdata != null ) { infoFields.put("SB", String.format("%.2f", locusdata.getSLOD())); infoFields.put("AF", String.format("%.2f", locusdata.getAlleleFrequency())); + if ( locusdata.hasRefRatio() ) + infoFields.put("AB", String.format("%.2f", locusdata.getRefRatio())); + if ( locusdata.hasOnOffRatio() ) + infoFields.put("OO", String.format("%.2f", locusdata.getOnOffRatio())); } infoFields.put("NS", String.valueOf(params.getGenotypesRecords().size())); return infoFields; diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 8f1495ad0..48c83274b 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 50", 1, - Arrays.asList("4f14792e79adef415efaf97e946460e3")); + Arrays.asList("8f14c1efea00b32984ec52db7a800d1b")); executeTest("testMultiSamplePilot1", spec); } @@ -51,7 +51,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 50", 1, - Arrays.asList("e3a701fd5f1fe4a84746f17ad4b3b3c3")); + Arrays.asList("89c600d72a815c09412c97f82fa2281e")); executeTest("testMultiSamplePilot2", spec); }