Generalized the StratifiedAlignmentContext code so that it's easy to add new ways to stratify. Then added an MQ0-free stratification so we don't need to be carrying around 2 different alignment contexts (full vs. mq0-free) anymore.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2314 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-10 19:50:06 +00:00
parent 0c396f04a2
commit 31b1d60d28
7 changed files with 47 additions and 66 deletions

View File

@ -42,63 +42,49 @@ import java.util.Map;
*/
public class StratifiedAlignmentContext {
public enum StratifiedContextType { OVERALL, FORWARD, REVERSE }
// Definitions:
// COMPLETE = full alignment context
// MQ0FREE = full context without MQ0 reads
// FORWARD = reads on forward strand (*no* MQ0 reads)
// REVERSE = reads on forward strand (*no* MQ0 reads)
//
public enum StratifiedContextType { COMPLETE, MQ0FREE, FORWARD, REVERSE }
private AlignmentContext overall = null;
private AlignmentContext forward = null;
private AlignmentContext reverse = null;
private GenomeLoc loc;
private ArrayList<SAMRecord> allReads = new ArrayList<SAMRecord>();
private ArrayList<SAMRecord> forwardReads = new ArrayList<SAMRecord>();
private ArrayList<SAMRecord> reverseReads = new ArrayList<SAMRecord>();
private ArrayList<Integer> allOffsets = new ArrayList<Integer>();
private ArrayList<Integer> forwardOffsets = new ArrayList<Integer>();
private ArrayList<Integer> reverseOffsets = new ArrayList<Integer>();
private AlignmentContext[] contexts = new AlignmentContext[StratifiedContextType.values().length];
private ArrayList<SAMRecord>[] reads = new ArrayList[StratifiedContextType.values().length];
private ArrayList<Integer>[] offsets = new ArrayList[StratifiedContextType.values().length];
public StratifiedAlignmentContext(GenomeLoc loc) {
this.loc = loc;
for ( int i = 0; i < StratifiedContextType.values().length; i++) {
reads[i] = new ArrayList<SAMRecord>();
offsets[i] = new ArrayList<Integer>();
}
}
public AlignmentContext getContext(StratifiedContextType context) {
switch ( context ) {
case OVERALL: return getOverallContext();
case FORWARD: return getForwardContext();
case REVERSE: return getReverseContext();
}
return null;
}
private AlignmentContext getOverallContext() {
if ( overall == null )
overall = new AlignmentContext(loc, new ReadBackedPileup(loc, allReads, allOffsets));
return overall;
}
private AlignmentContext getForwardContext() {
if ( forward == null )
forward = new AlignmentContext(loc, new ReadBackedPileup(loc, forwardReads, forwardOffsets));
return forward;
}
private AlignmentContext getReverseContext() {
if ( reverse == null )
reverse = new AlignmentContext(loc, new ReadBackedPileup(loc, reverseReads, reverseOffsets));
return reverse;
int index = context.ordinal();
if ( contexts[index] == null )
contexts[index] = new AlignmentContext(loc, new ReadBackedPileup(loc, reads[index], offsets[index]));
return contexts[index];
}
public void add(SAMRecord read, int offset) {
if ( read.getReadNegativeStrandFlag() ) {
reverseReads.add(read);
reverseOffsets.add(offset);
} else {
forwardReads.add(read);
forwardOffsets.add(offset);
if ( read.getMappingQuality() > 0 ) {
reads[StratifiedContextType.MQ0FREE.ordinal()].add(read);
offsets[StratifiedContextType.MQ0FREE.ordinal()].add(offset);
if ( read.getReadNegativeStrandFlag() ) {
reads[StratifiedContextType.REVERSE.ordinal()].add(read);
offsets[StratifiedContextType.REVERSE.ordinal()].add(offset);
} else {
reads[StratifiedContextType.FORWARD.ordinal()].add(read);
offsets[StratifiedContextType.FORWARD.ordinal()].add(offset);
}
}
allReads.add(read);
allOffsets.add(offset);
reads[StratifiedContextType.COMPLETE.ordinal()].add(read);
offsets[StratifiedContextType.COMPLETE.ordinal()].add(offset);
}
/**

View File

@ -83,7 +83,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc);
if ( call instanceof ReadBacked ) {
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL).getPileup();
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup();
((ReadBacked)call).setPileup(pileup);
}
if ( call instanceof SampleBacked ) {

View File

@ -22,7 +22,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
public Pair<VariationCall, List<Genotype>> calculateGenotype(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) {
// run the EM calculation
EMOutput overall = runEM(ref, contexts, priors, StratifiedAlignmentContext.StratifiedContextType.OVERALL);
EMOutput overall = runEM(ref, contexts, priors, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
double PofD = Math.pow(10, overall.getPofD());
double PofNull = Math.pow(10, overall.getPofNull());
@ -93,11 +93,11 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
for ( String sample : GLs.keySet() ) {
// create the call
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL);
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation());
if ( call instanceof ReadBacked ) {
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL).getPileup();
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup();
((ReadBacked)call).setPileup(pileup);
}
if ( call instanceof SampleBacked ) {

View File

@ -10,8 +10,6 @@ import org.apache.log4j.Logger;
import java.io.*;
import java.util.*;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMReadGroupRecord;
/**
* The model representing how we calculate a genotype given the priors and a pile

View File

@ -55,8 +55,8 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
initializeAlleleFrequencies(frequencyEstimationPoints);
initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.OVERALL);
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.OVERALL);
initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
calculatePofFs(ref, frequencyEstimationPoints);
// print out stats if we have a writer
@ -74,7 +74,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
int[] qualCounts = new int[4];
for ( String sample : contexts.keySet() ) {
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL);
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
// calculate the sum of quality scores for each base
ReadBackedPileup pileup = context.getPileup();

View File

@ -40,7 +40,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
return null;
// get the genotype likelihoods
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedAlignmentContext.StratifiedContextType.OVERALL);
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
// find the index of the best genotype
double[] posteriors = discoveryGL.second.getNormalizedPosteriors();

View File

@ -190,43 +190,40 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
*
* @param tracker the meta data tracker
* @param refContext the reference base
* @param fullContext contextual information around the locus
* @param context contextual information around the locus
*/
public Pair<VariationCall, List<Genotype>> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext fullContext) {
public Pair<VariationCall, List<Genotype>> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
char ref = Character.toUpperCase(refContext.getBase());
if ( !BaseUtils.isRegularBase(ref) )
return null;
// remove mapping quality zero reads
AlignmentContext MQ0freeContext = filterAlignmentContext(fullContext);
// an optimization to speed things up when there is no coverage or when overly covered
if ( MQ0freeContext.getPileup().size() == 0 ||
(UAC.MAX_READS_IN_PILEUP > 0 && MQ0freeContext.getPileup().size() > UAC.MAX_READS_IN_PILEUP) )
if ( context.getPileup().size() == 0 ||
(UAC.MAX_READS_IN_PILEUP > 0 && context.getPileup().size() > UAC.MAX_READS_IN_PILEUP) )
return null;
// are there too many deletions in the pileup?
ReadBackedPileup pileup = MQ0freeContext.getPileup();
ReadBackedPileup pileup = context.getPileup();
if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) &&
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > UAC.MAX_DELETION_FRACTION )
return null;
// stratify the AlignmentContext and cut by sample
// Note that for testing purposes, we may want to throw multi-samples at pooled mode
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(MQ0freeContext, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
if ( stratifiedContexts == null )
return null;
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
Pair<VariationCall, List<Genotype>> call = gcm.calculateGenotype(tracker, ref, fullContext.getLocation(), stratifiedContexts, priors);
Pair<VariationCall, List<Genotype>> call = gcm.calculateGenotype(tracker, ref, context.getLocation(), stratifiedContexts, priors);
// annotate the call, if possible
if ( call != null && call.first != null && call.first instanceof ArbitraryFieldsBacked ) {
Map<String, String> annotations;
if ( UAC.ALL_ANNOTATIONS )
annotations = VariantAnnotator.getAllAnnotations(refContext, fullContext, call.first);
annotations = VariantAnnotator.getAllAnnotations(refContext, context, call.first);
else
annotations = VariantAnnotator.getAnnotations(refContext, fullContext, call.first);
annotations = VariantAnnotator.getAnnotations(refContext, context, call.first);
((ArbitraryFieldsBacked)call.first).setFields(annotations);
}