Pull out StratifiedAlignmentContext code so other walkers can use it.

This is basically a wrapper class around AlignmentContext which allows you to stratify a context by e.g. reads on forward vs. reverse strands.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2312 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-10 19:21:16 +00:00
parent adb2fdbee7
commit 11ac7885b0
8 changed files with 252 additions and 250 deletions

View File

@ -0,0 +1,153 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.contexts;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
/**
* Useful class for storing different AlignmentContexts
* User: ebanks
*/
public class StratifiedAlignmentContext {
public enum StratifiedContextType { OVERALL, FORWARD, REVERSE }
private AlignmentContext overall = null;
private AlignmentContext forward = null;
private AlignmentContext reverse = null;
private GenomeLoc loc;
private ArrayList<SAMRecord> allReads = new ArrayList<SAMRecord>();
private ArrayList<SAMRecord> forwardReads = new ArrayList<SAMRecord>();
private ArrayList<SAMRecord> reverseReads = new ArrayList<SAMRecord>();
private ArrayList<Integer> allOffsets = new ArrayList<Integer>();
private ArrayList<Integer> forwardOffsets = new ArrayList<Integer>();
private ArrayList<Integer> reverseOffsets = new ArrayList<Integer>();
public StratifiedAlignmentContext(GenomeLoc loc) {
this.loc = loc;
}
public AlignmentContext getContext(StratifiedContextType context) {
switch ( context ) {
case OVERALL: return getOverallContext();
case FORWARD: return getForwardContext();
case REVERSE: return getReverseContext();
}
return null;
}
private AlignmentContext getOverallContext() {
if ( overall == null )
overall = new AlignmentContext(loc, new ReadBackedPileup(loc, allReads, allOffsets));
return overall;
}
private AlignmentContext getForwardContext() {
if ( forward == null )
forward = new AlignmentContext(loc, new ReadBackedPileup(loc, forwardReads, forwardOffsets));
return forward;
}
private AlignmentContext getReverseContext() {
if ( reverse == null )
reverse = new AlignmentContext(loc, new ReadBackedPileup(loc, reverseReads, reverseOffsets));
return reverse;
}
public void add(SAMRecord read, int offset) {
if ( read.getReadNegativeStrandFlag() ) {
reverseReads.add(read);
reverseOffsets.add(offset);
} else {
forwardReads.add(read);
forwardOffsets.add(offset);
}
allReads.add(read);
allOffsets.add(offset);
}
/**
* Splits the given AlignmentContext into a StratifiedAlignmentContext per sample.
*
* @param context the original AlignmentContext
* @param assumedSingleSample if not null, any read without a readgroup will be given this sample name
* @param collapseToThisSample if not null, all reads will be assigned this read group regardless of their actual read group
*
* @return a Map of sample name to StratifiedAlignmentContext
*
**/
public static Map<String, StratifiedAlignmentContext> splitContextBySample(AlignmentContext context, String assumedSingleSample, String collapseToThisSample) {
HashMap<String, StratifiedAlignmentContext> contexts = new HashMap<String, StratifiedAlignmentContext>();
ReadBackedPileup pileup = context.getPileup();
for (PileupElement p : pileup ) {
// get the read
SAMRecord read = p.getRead();
// find the sample
String sample;
if ( collapseToThisSample != null ) {
sample = collapseToThisSample;
} else {
SAMReadGroupRecord readGroup = read.getReadGroup();
if ( readGroup == null ) {
if ( assumedSingleSample == null )
throw new StingException("Missing read group for read " + read.getReadName());
sample = assumedSingleSample;
} else {
sample = readGroup.getSample();
}
}
// create a new context object if this is the first time we're seeing a read for this sample
StratifiedAlignmentContext myContext = contexts.get(sample);
if ( myContext == null ) {
myContext = new StratifiedAlignmentContext(context.getLocation());
contexts.put(sample, myContext);
}
// add the read to this sample's context
// note that bad bases are added to the context (for DoC calculations later)
myContext.add(read, p.getOffset());
}
return contexts;
}
}

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import java.util.*;
@ -16,7 +17,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
private enum GenotypeType { REF, HET, HOM }
protected void initialize(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
protected void initialize(char ref, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
// initialize the GenotypeLikelihoods
GLs.clear();
AFMatrixMap.clear();
@ -32,7 +33,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
int index = 0;
for ( String sample : contexts.keySet() ) {
AlignmentContextBySample context = contexts.get(sample);
StratifiedAlignmentContext context = contexts.get(sample);
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
// create the GenotypeLikelihoods object
@ -55,7 +56,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
}
}
protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int numFrequencies, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int numFrequencies, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
AlleleFrequencyMatrix matrix = AFMatrixMap.get(alt);
int baseIndex = BaseUtils.simpleBaseToBaseIndex(alt);
@ -73,7 +74,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
}
}
protected List<Genotype> makeGenotypeCalls(char ref, char alt, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc) {
protected List<Genotype> makeGenotypeCalls(char ref, char alt, Map<String, StratifiedAlignmentContext> contexts, GenomeLoc loc) {
ArrayList<Genotype> calls = new ArrayList<Genotype>();
for ( String sample : GLs.keySet() ) {
@ -82,7 +83,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc);
if ( call instanceof ReadBacked ) {
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedContext.OVERALL).getPileup();
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL).getPileup();
((ReadBacked)call).setPileup(pileup);
}
if ( call instanceof SampleBacked ) {

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.utils.*;
@ -19,15 +19,10 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
protected EMGenotypeCalculationModel() {}
public Pair<VariationCall, List<Genotype>> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
// keep track of the context for each sample, overall and separated by strand
HashMap<String, AlignmentContextBySample> contexts = splitContextBySample(context);
if ( contexts == null )
return null;
public Pair<VariationCall, List<Genotype>> calculateGenotype(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) {
// run the EM calculation
EMOutput overall = runEM(ref, contexts, priors, StratifiedContext.OVERALL);
EMOutput overall = runEM(ref, contexts, priors, StratifiedAlignmentContext.StratifiedContextType.OVERALL);
double PofD = Math.pow(10, overall.getPofD());
double PofNull = Math.pow(10, overall.getPofNull());
@ -50,7 +45,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
// generate the calls
List<Genotype> calls = genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts);
VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation(), bestIsRef ? Variation.VARIANT_TYPE.REFERENCE : Variation.VARIANT_TYPE.SNP);
VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestIsRef ? Variation.VARIANT_TYPE.REFERENCE : Variation.VARIANT_TYPE.SNP);
if ( locusdata != null ) {
if ( locusdata instanceof ConfidenceBacked ) {
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);
@ -67,8 +62,8 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
double lod = overall.getPofD() - overall.getPofNull();
logger.debug(String.format("LOD=%f", lod));
EMOutput forward = runEM(ref, contexts, priors, StratifiedContext.FORWARD);
EMOutput reverse = runEM(ref, contexts, priors, StratifiedContext.REVERSE);
EMOutput forward = runEM(ref, contexts, priors, StratifiedAlignmentContext.StratifiedContextType.FORWARD);
EMOutput reverse = runEM(ref, contexts, priors, StratifiedAlignmentContext.StratifiedContextType.REVERSE);
double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull();
double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull();
logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
@ -89,7 +84,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
return new Pair<VariationCall, List<Genotype>>(locusdata, calls);
}
protected List<Genotype> genotypeCallsFromGenotypeLikelihoods(EMOutput results, char ref, HashMap<String, AlignmentContextBySample> contexts) {
protected List<Genotype> genotypeCallsFromGenotypeLikelihoods(EMOutput results, char ref, Map<String, StratifiedAlignmentContext> contexts) {
HashMap<String, GenotypeLikelihoods> GLs = results.getGenotypeLikelihoods();
ArrayList<Genotype> calls = new ArrayList<Genotype>();
@ -98,11 +93,11 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
for ( String sample : GLs.keySet() ) {
// create the call
AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL);
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL);
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation());
if ( call instanceof ReadBacked ) {
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedContext.OVERALL).getPileup();
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL).getPileup();
((ReadBacked)call).setPileup(pileup);
}
if ( call instanceof SampleBacked ) {
@ -129,7 +124,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
return calls;
}
public EMOutput runEM(char ref, HashMap<String, AlignmentContextBySample> contexts, DiploidGenotypePriors priors, StratifiedContext contextType) {
public EMOutput runEM(char ref, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors, StratifiedAlignmentContext.StratifiedContextType contextType) {
// initialize the allele frequencies
initializeAlleleFrequencies(contexts.size(), ref);
@ -157,7 +152,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
}
protected abstract void initializeAlleleFrequencies(int numSamplesInContext, char ref);
protected abstract void initializeGenotypeLikelihoods(char ref, HashMap<String, AlignmentContextBySample> contexts, DiploidGenotypePriors priors, StratifiedContext contextType);
protected abstract void initializeGenotypeLikelihoods(char ref, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors, StratifiedAlignmentContext.StratifiedContextType contextType);
protected abstract void calculateAlleleFrequencyPosteriors();
protected abstract void applyAlleleFrequencyToGenotypeLikelihoods();
protected abstract boolean isStable();

View File

@ -1,14 +1,10 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.apache.log4j.Logger;
import java.io.*;
@ -41,8 +37,6 @@ public abstract class GenotypeCalculationModel implements Cloneable {
protected double CONFIDENCE_THRESHOLD;
protected double MINIMUM_ALLELE_FREQUENCY;
protected boolean REPORT_SLOD;
protected double maxDeletionFractionInPileup;
protected String assumedSingleSample;
protected PrintWriter verboseWriter;
/**
@ -72,11 +66,6 @@ public abstract class GenotypeCalculationModel implements Cloneable {
POOL_SIZE = UAC.POOLSIZE;
CONFIDENCE_THRESHOLD = UAC.CONFIDENCE_THRESHOLD;
MINIMUM_ALLELE_FREQUENCY = UAC.MINIMUM_ALLELE_FREQUENCY;
maxDeletionFractionInPileup = UAC.MAX_DELETION_FRACTION;
// disable if that's what is requested
if ( maxDeletionFractionInPileup < 0.0 || maxDeletionFractionInPileup > 1.0 )
maxDeletionFractionInPileup = -1.0;
assumedSingleSample = UAC.ASSUME_SINGLE_SAMPLE;
if ( UAC.VERBOSE != null ) {
try {
verboseWriter = new PrintWriter(UAC.VERBOSE);
@ -97,16 +86,18 @@ public abstract class GenotypeCalculationModel implements Cloneable {
/**
* Must be overridden by concrete subclasses
* @param tracker rod data
* @param ref reference base
* @param context alignment context
* @param priors priors to use for GL
* @param tracker rod data
* @param ref reference base
* @param loc GenomeLoc
* @param stratifiedContexts stratified alignment contexts
* @param priors priors to use for GL
*
* @return calls
*/
public abstract Pair<VariationCall, List<Genotype>> calculateGenotype(RefMetaDataTracker tracker,
char ref,
AlignmentContext context,
GenomeLoc loc,
Map<String, StratifiedAlignmentContext> stratifiedContexts,
DiploidGenotypePriors priors);
/**
@ -128,119 +119,4 @@ public abstract class GenotypeCalculationModel implements Cloneable {
private static boolean isHapmapSite(RefMetaDataTracker tracker) {
return tracker.getTrackData("hapmap", null) != null;
}
/**
* Create the mapping from sample to alignment contexts.
* @param context original alignment context
*
* @return stratified contexts, or null if there are too many deletions at this position
*/
protected HashMap<String, AlignmentContextBySample> splitContextBySample(AlignmentContext context) {
HashMap<String, AlignmentContextBySample> contexts = new HashMap<String, AlignmentContextBySample>();
ReadBackedPileup pileup = context.getPileup();
for (PileupElement p : pileup ) {
// get the read and offset
SAMRecord read = p.getRead();
// find the sample
String sample;
SAMReadGroupRecord readGroup = read.getReadGroup();
if ( readGroup == null ) {
if ( assumedSingleSample == null )
throw new StingException("Missing read group for read " + read.getReadName());
sample = assumedSingleSample;
} else {
sample = readGroup.getSample();
}
// create a new context object if this is the first time we're seeing a read for this sample
AlignmentContextBySample myContext = contexts.get(sample);
if ( myContext == null ) {
myContext = new AlignmentContextBySample(context.getLocation());
contexts.put(sample, myContext);
}
// add the read to this sample's context
// note that bad bases are added to the context (for DoC calculations later)
myContext.add(read, p.getOffset());
}
// are there too many deletions in the pileup?
if ( maxDeletionFractionInPileup != -1.0 &&
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > maxDeletionFractionInPileup )
return null;
return contexts;
}
/**
* A class to keep track of the alignment context observed for a given sample.
* we currently store the overall context and strand-stratified sets,
* but any arbitrary stratification could be added.
*/
protected enum StratifiedContext { OVERALL, FORWARD, REVERSE }
protected class AlignmentContextBySample {
private AlignmentContext overall = null;
private AlignmentContext forward = null;
private AlignmentContext reverse = null;
private GenomeLoc loc;
private ArrayList<SAMRecord> allReads = new ArrayList<SAMRecord>();
private ArrayList<SAMRecord> forwardReads = new ArrayList<SAMRecord>();
private ArrayList<SAMRecord> reverseReads = new ArrayList<SAMRecord>();
private ArrayList<Integer> allOffsets = new ArrayList<Integer>();
private ArrayList<Integer> forwardOffsets = new ArrayList<Integer>();
private ArrayList<Integer> reverseOffsets = new ArrayList<Integer>();
AlignmentContextBySample(GenomeLoc loc) {
this.loc = loc;
}
public AlignmentContext getContext(StratifiedContext context) {
switch ( context ) {
case OVERALL: return getOverallContext();
case FORWARD: return getForwardContext();
case REVERSE: return getReverseContext();
}
return null;
}
private AlignmentContext getOverallContext() {
if ( overall == null )
overall = new AlignmentContext(loc, new ReadBackedPileup(loc, allReads, allOffsets));
return overall;
}
private AlignmentContext getForwardContext() {
if ( forward == null )
forward = new AlignmentContext(loc, new ReadBackedPileup(loc, forwardReads, forwardOffsets));
return forward;
}
private AlignmentContext getReverseContext() {
if ( reverse == null )
reverse = new AlignmentContext(loc, new ReadBackedPileup(loc, reverseReads, reverseOffsets));
return reverse;
}
public void add(SAMRecord read, int offset) {
if ( read.getReadNegativeStrandFlag() ) {
reverseReads.add(read);
reverseOffsets.add(offset);
} else {
forwardReads.add(read);
forwardOffsets.add(offset);
}
allReads.add(read);
allOffsets.add(offset);
}
}
}

View File

@ -6,7 +6,7 @@ import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.Variation.VARIANT_TYPE;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.*;
import java.util.*;
import java.io.PrintWriter;
@ -35,18 +35,13 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
protected JointEstimateGenotypeCalculationModel() {}
public Pair<VariationCall, List<Genotype>> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
// keep track of the context for each sample, overall and separated by strand
HashMap<String, AlignmentContextBySample> contexts = createContexts(context);
if ( contexts == null )
return null;
public Pair<VariationCall, List<Genotype>> calculateGenotype(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) {
int numSamples = getNSamples(contexts);
int frequencyEstimationPoints = (2 * numSamples) + 1; // (add 1 for allele frequency of zero)
// find the alternate allele with the largest sum of quality scores
initializeBestAlternateAllele(ref, context);
initializeBestAlternateAllele(ref, contexts);
// if there are no non-ref bases...
if ( bestAlternateAllele == null ) {
@ -60,38 +55,38 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
initializeAlleleFrequencies(frequencyEstimationPoints);
initialize(ref, contexts, StratifiedContext.OVERALL);
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.OVERALL);
initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.OVERALL);
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.OVERALL);
calculatePofFs(ref, frequencyEstimationPoints);
// print out stats if we have a writer
if ( verboseWriter != null )
printAlleleFrequencyData(ref, context.getLocation(), frequencyEstimationPoints);
printAlleleFrequencyData(ref, loc, frequencyEstimationPoints);
return createCalls(tracker, ref, contexts, context.getLocation(), frequencyEstimationPoints);
return createCalls(tracker, ref, contexts, loc, frequencyEstimationPoints);
}
protected HashMap<String, AlignmentContextBySample> createContexts(AlignmentContext context) {
return splitContextBySample(context);
}
protected int getNSamples(HashMap<String, AlignmentContextBySample> contexts) {
protected int getNSamples(Map<String, StratifiedAlignmentContext> contexts) {
return contexts.size();
}
protected void initializeBestAlternateAllele(char ref, AlignmentContext context) {
protected void initializeBestAlternateAllele(char ref, Map<String, StratifiedAlignmentContext> contexts) {
int[] qualCounts = new int[4];
// calculate the sum of quality scores for each base
ReadBackedPileup pileup = context.getPileup();
for ( PileupElement p : pileup ) {
// ignore deletions
if ( p.isDeletion() )
continue;
for ( String sample : contexts.keySet() ) {
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL);
int index = BaseUtils.simpleBaseToBaseIndex((char)p.getBase());
if ( index >= 0 )
qualCounts[index] += p.getQual();
// calculate the sum of quality scores for each base
ReadBackedPileup pileup = context.getPileup();
for ( PileupElement p : pileup ) {
// ignore deletions
if ( p.isDeletion() )
continue;
int index = BaseUtils.simpleBaseToBaseIndex((char)p.getBase());
if ( index >= 0 )
qualCounts[index] += p.getQual();
}
}
// set the non-ref base with maximum quality score sum
@ -118,7 +113,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
verboseWriter.println(header);
}
protected void initialize(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
protected void initialize(char ref, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
// by default, no initialization is done
return;
}
@ -160,7 +155,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
return AFs;
}
protected void calculateAlleleFrequencyPosteriors(char ref, int frequencyEstimationPoints, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
protected void calculateAlleleFrequencyPosteriors(char ref, int frequencyEstimationPoints, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
// initialization
for ( char altAllele : BaseUtils.BASES ) {
@ -185,7 +180,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
* @param contexts stratified alignment contexts
* @param contextType which stratification to use
*/
protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int numFrequencies, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int numFrequencies, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
int baseIndex = BaseUtils.simpleBaseToBaseIndex(alt);
// for each minor allele frequency, calculate log10PofDgivenAFi
@ -204,7 +199,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
*
* @return value of PofDgivenAF for allele frequency f
*/
protected double calculateLog10PofDgivenAFforF(char ref, char alt, double f, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
protected double calculateLog10PofDgivenAFforF(char ref, char alt, double f, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
return 0.0;
}
@ -304,12 +299,12 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
verboseWriter.println();
}
protected List<Genotype> makeGenotypeCalls(char ref, char alt, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc) {
protected List<Genotype> makeGenotypeCalls(char ref, char alt, Map<String, StratifiedAlignmentContext> contexts, GenomeLoc loc) {
// by default, we return no genotypes
return new ArrayList<Genotype>();
}
protected Pair<VariationCall, List<Genotype>> createCalls(RefMetaDataTracker tracker, char ref, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc, int frequencyEstimationPoints) {
protected Pair<VariationCall, List<Genotype>> createCalls(RefMetaDataTracker tracker, char ref, Map<String, StratifiedAlignmentContext> contexts, GenomeLoc loc, int frequencyEstimationPoints) {
// only need to look at the most likely alternate allele
int indexOfMax = BaseUtils.simpleBaseToBaseIndex(bestAlternateAllele);
@ -363,8 +358,8 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
if ( !Double.isInfinite(lod) ) {
// the forward lod
initialize(ref, contexts, StratifiedContext.FORWARD);
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.FORWARD);
initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD);
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD);
calculatePofFs(ref, frequencyEstimationPoints);
double forwardLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
double forwardLog10PofF = Math.log10(PofFs[indexOfMax]);
@ -372,8 +367,8 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
lod = -1.0 * log10PofDgivenAFi[indexOfMax][0];
// the reverse lod
initialize(ref, contexts, StratifiedContext.REVERSE);
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.REVERSE);
initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE);
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE);
calculatePofFs(ref, frequencyEstimationPoints);
double reverseLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
double reverseLog10PofF = Math.log10(PofFs[indexOfMax]);

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.utils.*;
@ -26,26 +26,21 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
// overload this method so we can special-case the single sample
public Pair<VariationCall, List<Genotype>> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
public Pair<VariationCall, List<Genotype>> calculateGenotype(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) {
// we don't actually want to run EM for single samples
if ( samples.size() == 1 ) {
// split the context (so we can get forward/reverse contexts for free)
HashMap<String, AlignmentContextBySample> contexts = splitContextBySample(context);
if ( contexts == null )
return null;
// get the context for the sample
String sample = samples.iterator().next();
AlignmentContextBySample sampleContext = contexts.get(sample);
StratifiedAlignmentContext sampleContext = contexts.get(sample);
// if there were no good bases, the context wouldn't exist
if ( sampleContext == null )
return null;
// get the genotype likelihoods
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedContext.OVERALL);
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedAlignmentContext.StratifiedContextType.OVERALL);
// find the index of the best genotype
double[] posteriors = discoveryGL.second.getNormalizedPosteriors();
@ -71,7 +66,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
return new Pair<VariationCall, List<Genotype>>(null, null);
// we can now create the genotype call object
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation());
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc);
if ( call instanceof ReadBacked ) {
((ReadBacked)call).setPileup(discoveryGL.first);
@ -86,7 +81,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
((PosteriorsBacked)call).setPosteriors(discoveryGL.second.getPosteriors());
}
VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation(), bestIsRef ? Variation.VARIANT_TYPE.REFERENCE : Variation.VARIANT_TYPE.SNP);
VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestIsRef ? Variation.VARIANT_TYPE.REFERENCE : Variation.VARIANT_TYPE.SNP);
if ( locusdata != null ) {
if ( locusdata instanceof ConfidenceBacked ) {
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);
@ -104,10 +99,10 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
return new Pair<VariationCall, List<Genotype>>(locusdata, Arrays.asList((Genotype)call));
}
return super.calculateGenotype(tracker, ref, context, priors);
return super.calculateGenotype(tracker, ref, loc, contexts, priors);
}
private Pair<ReadBackedPileup, GenotypeLikelihoods> getSingleSampleLikelihoods(AlignmentContextBySample sampleContext, DiploidGenotypePriors priors, StratifiedContext contextType) {
private Pair<ReadBackedPileup, GenotypeLikelihoods> getSingleSampleLikelihoods(StratifiedAlignmentContext sampleContext, DiploidGenotypePriors priors, StratifiedAlignmentContext.StratifiedContextType contextType) {
// create the pileup
AlignmentContext myContext = sampleContext.getContext(contextType);
ReadBackedPileup pileup = myContext.getPileup();
@ -127,13 +122,13 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
logger.debug("Initial allele frequency for " + BaseUtils.baseIndexToSimpleBase(i) + ": " + alleleFrequencies[i]);
}
protected void initializeGenotypeLikelihoods(char ref, HashMap<String, AlignmentContextBySample> contexts, DiploidGenotypePriors priors, StratifiedContext contextType) {
protected void initializeGenotypeLikelihoods(char ref, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors, StratifiedAlignmentContext.StratifiedContextType contextType) {
GLs.clear();
DiploidGenotypePriors AFPriors = calculateAlleleFreqBasedPriors(alleleFrequencies);
for ( String sample : contexts.keySet() ) {
AlignmentContextBySample context = contexts.get(sample);
StratifiedAlignmentContext context = contexts.get(sample);
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
// create the GenotypeLikelihoods object

View File

@ -1,6 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.QualityUtils;
@ -10,7 +10,7 @@ import java.util.*;
import net.sf.samtools.SAMRecord;
public class PooledCalculationModel extends JointEstimateGenotypeCalculationModel {
private static final String POOL_SAMPLE_NAME = "POOL";
protected static final String POOL_SAMPLE_NAME = "POOL";
private static FourBaseProbabilities fourBaseLikelihoods = null;
private static boolean USE_CACHE = true;
@ -26,7 +26,7 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
* @param contexts
* @param contextType
*/
protected void initialize(char ref, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
protected void initialize(char ref, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
super.initialize(ref, contexts, contextType);
// todo -- move this code to a static initializer
@ -40,43 +40,13 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
makeCache(POOL_SIZE);
}
protected int getNSamples(HashMap<String, AlignmentContextBySample> contexts) {
protected int getNSamples(Map<String, StratifiedAlignmentContext> contexts) {
return POOL_SIZE;
}
protected HashMap<String, AlignmentContextBySample> createContexts(AlignmentContext context) {
// for testing purposes, we may want to throw multi-samples at pooled mode,
// so we can't use the standard splitContextBySample() method here
AlignmentContextBySample pooledContext = new AlignmentContextBySample(context.getLocation());
int deletionsInPileup = 0;
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();
protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int nChromosomes, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
for (int i = 0; i < reads.size(); i++) {
// check for deletions
int offset = offsets.get(i);
if ( offset == -1 )
deletionsInPileup++;
// add the read to this sample's context
// note that bad bases are added to the context (for DoC calculations later)
pooledContext.add(reads.get(i), offset);
}
// are there too many deletions in the pileup?
if ( maxDeletionFractionInPileup != -1.0 &&
(double)deletionsInPileup / (double)reads.size() > maxDeletionFractionInPileup )
return null;
HashMap<String, AlignmentContextBySample> contexts = new HashMap<String, AlignmentContextBySample>();
contexts.put(POOL_SAMPLE_NAME, pooledContext);
return contexts;
}
protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int nChromosomes, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME);
StratifiedAlignmentContext context = contexts.get(POOL_SAMPLE_NAME);
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref);

View File

@ -25,22 +25,23 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMReadGroupRecord;import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator;
import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.cmdLine.*;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import net.sf.samtools.SAMReadGroupRecord;
import java.io.File;
import java.util.*;
import java.lang.reflect.Field;
@Reference(window=@Window(start=-20,stop=20))
@ -178,8 +179,8 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
// all of the arguments from the argument collection
Map<String,String> commandLineArgs = CommandLineUtils.getApproximateCommandLineArguments(Collections.<Object>singleton(UAC));
for(Map.Entry<String,String> commandLineArg: commandLineArgs.entrySet())
headerInfo.add(String.format("UG_%s=%s",commandLineArg.getKey(),commandLineArg.getValue()));
for ( Map.Entry<String, String> commandLineArg : commandLineArgs.entrySet() )
headerInfo.add(String.format("UG_%s=%s", commandLineArg.getKey(), commandLineArg.getValue()));
return headerInfo;
}
@ -204,8 +205,20 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
(UAC.MAX_READS_IN_PILEUP > 0 && MQ0freeContext.getPileup().size() > UAC.MAX_READS_IN_PILEUP) )
return null;
// are there too many deletions in the pileup?
ReadBackedPileup pileup = MQ0freeContext.getPileup();
if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) &&
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > UAC.MAX_DELETION_FRACTION )
return null;
// stratify the AlignmentContext and cut by sample
// Note that for testing purposes, we may want to throw multi-samples at pooled mode
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(MQ0freeContext, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
if ( stratifiedContexts == null )
return null;
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
Pair<VariationCall, List<Genotype>> call = gcm.calculateGenotype(tracker, ref, MQ0freeContext, priors);
Pair<VariationCall, List<Genotype>> call = gcm.calculateGenotype(tracker, ref, fullContext.getLocation(), stratifiedContexts, priors);
// annotate the call, if possible
if ( call != null && call.first != null && call.first instanceof ArbitraryFieldsBacked ) {
@ -220,7 +233,11 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
return call;
}
private AlignmentContext filterAlignmentContext(AlignmentContext context) {
private static boolean isValidDeletionFraction(double d) {
return ( d >= 0.0 && d <= 1.0 );
}
private static AlignmentContext filterAlignmentContext(AlignmentContext context) {
return new AlignmentContext(context.getLocation(),
context.getPileup().getPileupWithoutMappingQualityZeroReads(),
0);