Output changes for VCF and UG:
1. Don't cap q-scores at 99 2. Scale SLOD to allow more resolution in the output 3. UG outputs weighted allele balance (AB) and on-off genotype (OO) info fields for het genotype calls (works for joint estimation model and SSG) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2011 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1e7ddd2d9f
commit
6a37090529
|
|
@ -66,6 +66,8 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
|||
logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
|
||||
double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
|
||||
logger.debug(String.format("SLOD=%f", strandScore));
|
||||
// rescale by a factor of 10
|
||||
strandScore *= 10.0;
|
||||
|
||||
((SLODBacked)locusdata).setSLOD(strandScore);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -333,6 +333,61 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
|
|||
if ( locusdata instanceof AlleleFrequencyBacked ) {
|
||||
((AlleleFrequencyBacked)locusdata).setAlleleFrequency(bestAFguess);
|
||||
}
|
||||
if ( locusdata instanceof AlleleBalanceBacked ) {
|
||||
ArrayList<Double> refBalances = new ArrayList<Double>();
|
||||
ArrayList<Double> onOffBalances = new ArrayList<Double>();
|
||||
ArrayList<Double> weights = new ArrayList<Double>();
|
||||
|
||||
// accumulate ratios and weights
|
||||
for ( java.util.Map.Entry<String, GenotypeLikelihoods> entry : GLs.entrySet() ) {
|
||||
// determine the best genotype
|
||||
Integer sorted[] = Utils.SortPermutation(entry.getValue().getPosteriors());
|
||||
DiploidGenotype bestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
|
||||
|
||||
// we care only about het calls
|
||||
if ( bestGenotype.isHetRef(ref) ) {
|
||||
|
||||
// make sure the alt base is our target alt
|
||||
char altBase = bestGenotype.base1 != ref ? bestGenotype.base1 : bestGenotype.base2;
|
||||
if ( altBase != baseOfMax )
|
||||
continue;
|
||||
|
||||
// get the base counts at this pileup (minus deletions)
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(entry.getKey()).getContext(StratifiedContext.OVERALL));
|
||||
int[] counts = pileup.getBasePileupAsCounts();
|
||||
int refCount = counts[BaseUtils.simpleBaseToBaseIndex(ref)];
|
||||
int altCount = counts[BaseUtils.simpleBaseToBaseIndex(altBase)];
|
||||
int totalCount = 0;
|
||||
for (int i = 0; i < counts.length; i++)
|
||||
totalCount += counts[i];
|
||||
|
||||
// add the entries
|
||||
weights.add(entry.getValue().getNormalizedPosteriors()[bestGenotype.ordinal()]);
|
||||
refBalances.add((double)refCount / (double)(refCount + altCount));
|
||||
onOffBalances.add((double)(refCount + altCount) / (double)totalCount);
|
||||
}
|
||||
}
|
||||
|
||||
if ( weights.size() > 0 ) {
|
||||
// normalize the weights
|
||||
double sum = 0.0;
|
||||
for (int i = 0; i < weights.size(); i++)
|
||||
sum += weights.get(i);
|
||||
for (int i = 0; i < weights.size(); i++)
|
||||
weights.set(i, weights.get(i) / sum);
|
||||
|
||||
// calculate total weighted ratios
|
||||
double normalizedRefRatio = 0.0;
|
||||
double normalizedOnOffRatio = 0.0;
|
||||
for (int i = 0; i < weights.size(); i++) {
|
||||
normalizedRefRatio += weights.get(i) * refBalances.get(i);
|
||||
normalizedOnOffRatio += weights.get(i) * onOffBalances.get(i);
|
||||
}
|
||||
|
||||
((AlleleBalanceBacked)locusdata).setRefRatio(normalizedRefRatio);
|
||||
((AlleleBalanceBacked)locusdata).setOnOffRatio(normalizedOnOffRatio);
|
||||
}
|
||||
}
|
||||
if ( locusdata instanceof SLODBacked ) {
|
||||
// the overall lod
|
||||
double overallLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
|
||||
|
|
@ -354,7 +409,11 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
|
|||
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofNull;
|
||||
double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofNull;
|
||||
//logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
|
||||
|
||||
// strand score is max bias between forward and reverse strands
|
||||
double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
|
||||
// rescale by a factor of 10
|
||||
strandScore *= 10.0;
|
||||
//logger.debug(String.format("SLOD=%f", strandScore));
|
||||
|
||||
((SLODBacked)locusdata).setSLOD(strandScore);
|
||||
|
|
|
|||
|
|
@ -90,6 +90,27 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
if ( locusdata instanceof ConfidenceBacked ) {
|
||||
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);
|
||||
}
|
||||
if ( locusdata instanceof AlleleBalanceBacked ) {
|
||||
|
||||
DiploidGenotype bestGenotype = DiploidGenotype.values()[bestIndex];
|
||||
// we care only about het calls
|
||||
if ( bestGenotype.isHetRef(ref) ) {
|
||||
|
||||
char altBase = bestGenotype.base1 != ref ? bestGenotype.base1 : bestGenotype.base2;
|
||||
|
||||
// get the base counts at this pileup (minus deletions)
|
||||
int[] counts = discoveryGL.first.getBasePileupAsCounts();
|
||||
int refCount = counts[BaseUtils.simpleBaseToBaseIndex(ref)];
|
||||
int altCount = counts[BaseUtils.simpleBaseToBaseIndex(altBase)];
|
||||
int totalCount = 0;
|
||||
for (int i = 0; i < counts.length; i++)
|
||||
totalCount += counts[i];
|
||||
|
||||
// set the ratios
|
||||
((AlleleBalanceBacked)locusdata).setRefRatio((double)refCount / (double)(refCount + altCount));
|
||||
((AlleleBalanceBacked)locusdata).setOnOffRatio((double)(refCount + altCount) / (double)totalCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair<List<Genotype>, GenotypeLocusData>(Arrays.asList(call), locusdata);
|
||||
|
|
|
|||
|
|
@ -80,24 +80,28 @@ public class ReadBackedPileup extends BasicPileup {
|
|||
return secondaryQualPileupAsString(reads, offsets);
|
||||
}
|
||||
|
||||
public String getBasePileupAsCountsString() {
|
||||
String bases = basePileupAsString(reads, offsets, includeDeletions);
|
||||
public int[] getBasePileupAsCounts() {
|
||||
int[] counts = new int[4];
|
||||
for (int i = 0; i < reads.size(); i++) {
|
||||
// skip deletion sites
|
||||
if ( offsets.get(i) == -1 )
|
||||
continue;
|
||||
char base = Character.toUpperCase((char)(reads.get(i).getReadBases()[offsets.get(i)]));
|
||||
if (BaseUtils.simpleBaseToBaseIndex(base) == -1)
|
||||
continue;
|
||||
counts[BaseUtils.simpleBaseToBaseIndex(base)]++;
|
||||
}
|
||||
|
||||
int[] counts = new int[4];
|
||||
for (int i = 0; i < reads.size(); i++)
|
||||
{
|
||||
// skip deletion sites
|
||||
if ( offsets.get(i) == -1 )
|
||||
continue;
|
||||
char base = Character.toUpperCase((char)(reads.get(i).getReadBases()[offsets.get(i)]));
|
||||
if (BaseUtils.simpleBaseToBaseIndex(base) == -1) { continue; }
|
||||
counts[BaseUtils.simpleBaseToBaseIndex(base)]++;
|
||||
}
|
||||
return String.format("A[%d] C[%d] G[%d] T[%d]",
|
||||
counts[0],
|
||||
counts[1],
|
||||
counts[2],
|
||||
counts[3]);
|
||||
return counts;
|
||||
}
|
||||
|
||||
public String getBasePileupAsCountsString() {
|
||||
int[] counts = getBasePileupAsCounts();
|
||||
return String.format("A[%d] C[%d] G[%d] T[%d]",
|
||||
counts[0],
|
||||
counts[1],
|
||||
counts[2],
|
||||
counts[3]);
|
||||
}
|
||||
|
||||
public String getProbDistPileup() {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,35 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* Interface AlleleBalanceBacked
|
||||
*
|
||||
* this interface indicates that the genotype is
|
||||
* backed up by allele balance information.
|
||||
*/
|
||||
public interface AlleleBalanceBacked {
|
||||
|
||||
/**
|
||||
*
|
||||
* @return returns the ratio of ref/(ref+alt) bases for hets
|
||||
*/
|
||||
public double getRefRatio();
|
||||
|
||||
/**
|
||||
*
|
||||
* @param ratio the ref/(ref+alt) ratio
|
||||
*/
|
||||
public void setRefRatio(double ratio);
|
||||
|
||||
/**
|
||||
*
|
||||
* @return returns the ratio of (ref+alt)/total bases for hets
|
||||
*/
|
||||
public double getOnOffRatio();
|
||||
|
||||
/**
|
||||
*
|
||||
* @param ratio the (ref+alt)/total ratio
|
||||
*/
|
||||
public void setOnOffRatio(double ratio);
|
||||
}
|
||||
|
|
@ -33,7 +33,6 @@ public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked,
|
|||
// the sample name, used to propulate the SampleBacked interface
|
||||
private String mSampleName;
|
||||
|
||||
|
||||
public VCFGenotypeCall(char ref, GenomeLoc loc) {
|
||||
mRefBase = ref;
|
||||
mLocation = loc;
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ import org.broadinstitute.sting.utils.genotype.*;
|
|||
* <p/>
|
||||
* represents the meta data for a genotype object.
|
||||
*/
|
||||
public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, AlleleFrequencyBacked {
|
||||
public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, AlleleFrequencyBacked, AlleleBalanceBacked {
|
||||
|
||||
// the discovery lod score
|
||||
private double mConfidence = 0.0;
|
||||
|
|
@ -27,6 +27,10 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked
|
|||
// the ref base
|
||||
private char mRefBase;
|
||||
|
||||
// the various allele ratios
|
||||
private double mRefRatio = 0.0;
|
||||
private double mOnOffRatio = 0.0;
|
||||
|
||||
/**
|
||||
* create a basic genotype meta data pbject, given the following fields
|
||||
*
|
||||
|
|
@ -105,4 +109,40 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked
|
|||
public void setAlleleFrequency(double frequency) {
|
||||
mAlleleFrequency = frequency;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return returns true if the ref/(ref+alt) ratio for this genotype has been set
|
||||
*/
|
||||
public boolean hasRefRatio() {
|
||||
return mRefRatio > 0.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return returns the ref/(ref+alt) ratio for this genotype
|
||||
*/
|
||||
public double getRefRatio() {
|
||||
return mRefRatio;
|
||||
}
|
||||
|
||||
public void setRefRatio(double ratio) {
|
||||
mRefRatio = ratio;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return returns true if the (ref+alt)/total ratio for this genotype has been set
|
||||
*/
|
||||
public boolean hasOnOffRatio() {
|
||||
return mOnOffRatio > 0.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return returns the (ref+alt)/total ratio for this genotype
|
||||
*/
|
||||
public double getOnOffRatio() {
|
||||
return mOnOffRatio;
|
||||
}
|
||||
|
||||
public void setOnOffRatio(double ratio) {
|
||||
mOnOffRatio = ratio;
|
||||
}
|
||||
}
|
||||
|
|
@ -164,8 +164,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
infoFields.put("DP", String.format("%d", totalReadDepth));
|
||||
|
||||
double qual = (locusdata == null) ? 0 : ((VCFGenotypeLocusData)locusdata).getConfidence();
|
||||
// maintain 0-99 based Q-scores
|
||||
qual = Math.min(qual, 99);
|
||||
// min Q-score is zero
|
||||
qual = Math.max(qual, 0);
|
||||
|
||||
VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(),
|
||||
|
|
@ -195,6 +194,10 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
if ( locusdata != null ) {
|
||||
infoFields.put("SB", String.format("%.2f", locusdata.getSLOD()));
|
||||
infoFields.put("AF", String.format("%.2f", locusdata.getAlleleFrequency()));
|
||||
if ( locusdata.hasRefRatio() )
|
||||
infoFields.put("AB", String.format("%.2f", locusdata.getRefRatio()));
|
||||
if ( locusdata.hasOnOffRatio() )
|
||||
infoFields.put("OO", String.format("%.2f", locusdata.getOnOffRatio()));
|
||||
}
|
||||
infoFields.put("NS", String.valueOf(params.getGenotypesRecords().size()));
|
||||
return infoFields;
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 50", 1,
|
||||
Arrays.asList("4f14792e79adef415efaf97e946460e3"));
|
||||
Arrays.asList("8f14c1efea00b32984ec52db7a800d1b"));
|
||||
executeTest("testMultiSamplePilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -51,7 +51,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 50", 1,
|
||||
Arrays.asList("e3a701fd5f1fe4a84746f17ad4b3b3c3"));
|
||||
Arrays.asList("89c600d72a815c09412c97f82fa2281e"));
|
||||
executeTest("testMultiSamplePilot2", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue