Generalized the StratifiedAlignmentContext code so that it's easy to add new ways to stratify. Then added an MQ0-free stratification so we don't need to be carrying around 2 different alignment contexts (full vs. mq0-free) anymore.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2314 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-10 19:50:06 +00:00
parent 0c396f04a2
commit 31b1d60d28
7 changed files with 47 additions and 66 deletions

View File

@ -42,63 +42,49 @@ import java.util.Map;
*/ */
public class StratifiedAlignmentContext { public class StratifiedAlignmentContext {
public enum StratifiedContextType { OVERALL, FORWARD, REVERSE } // Definitions:
// COMPLETE = full alignment context
// MQ0FREE = full context without MQ0 reads
// FORWARD = reads on forward strand (*no* MQ0 reads)
// REVERSE = reads on forward strand (*no* MQ0 reads)
//
public enum StratifiedContextType { COMPLETE, MQ0FREE, FORWARD, REVERSE }
private AlignmentContext overall = null;
private AlignmentContext forward = null;
private AlignmentContext reverse = null;
private GenomeLoc loc; private GenomeLoc loc;
private AlignmentContext[] contexts = new AlignmentContext[StratifiedContextType.values().length];
private ArrayList<SAMRecord> allReads = new ArrayList<SAMRecord>(); private ArrayList<SAMRecord>[] reads = new ArrayList[StratifiedContextType.values().length];
private ArrayList<SAMRecord> forwardReads = new ArrayList<SAMRecord>(); private ArrayList<Integer>[] offsets = new ArrayList[StratifiedContextType.values().length];
private ArrayList<SAMRecord> reverseReads = new ArrayList<SAMRecord>();
private ArrayList<Integer> allOffsets = new ArrayList<Integer>();
private ArrayList<Integer> forwardOffsets = new ArrayList<Integer>();
private ArrayList<Integer> reverseOffsets = new ArrayList<Integer>();
public StratifiedAlignmentContext(GenomeLoc loc) { public StratifiedAlignmentContext(GenomeLoc loc) {
this.loc = loc; this.loc = loc;
for ( int i = 0; i < StratifiedContextType.values().length; i++) {
reads[i] = new ArrayList<SAMRecord>();
offsets[i] = new ArrayList<Integer>();
}
} }
public AlignmentContext getContext(StratifiedContextType context) { public AlignmentContext getContext(StratifiedContextType context) {
switch ( context ) { int index = context.ordinal();
case OVERALL: return getOverallContext(); if ( contexts[index] == null )
case FORWARD: return getForwardContext(); contexts[index] = new AlignmentContext(loc, new ReadBackedPileup(loc, reads[index], offsets[index]));
case REVERSE: return getReverseContext(); return contexts[index];
}
return null;
}
private AlignmentContext getOverallContext() {
if ( overall == null )
overall = new AlignmentContext(loc, new ReadBackedPileup(loc, allReads, allOffsets));
return overall;
}
private AlignmentContext getForwardContext() {
if ( forward == null )
forward = new AlignmentContext(loc, new ReadBackedPileup(loc, forwardReads, forwardOffsets));
return forward;
}
private AlignmentContext getReverseContext() {
if ( reverse == null )
reverse = new AlignmentContext(loc, new ReadBackedPileup(loc, reverseReads, reverseOffsets));
return reverse;
} }
public void add(SAMRecord read, int offset) { public void add(SAMRecord read, int offset) {
if ( read.getReadNegativeStrandFlag() ) { if ( read.getMappingQuality() > 0 ) {
reverseReads.add(read); reads[StratifiedContextType.MQ0FREE.ordinal()].add(read);
reverseOffsets.add(offset); offsets[StratifiedContextType.MQ0FREE.ordinal()].add(offset);
} else { if ( read.getReadNegativeStrandFlag() ) {
forwardReads.add(read); reads[StratifiedContextType.REVERSE.ordinal()].add(read);
forwardOffsets.add(offset); offsets[StratifiedContextType.REVERSE.ordinal()].add(offset);
} else {
reads[StratifiedContextType.FORWARD.ordinal()].add(read);
offsets[StratifiedContextType.FORWARD.ordinal()].add(offset);
}
} }
allReads.add(read); reads[StratifiedContextType.COMPLETE.ordinal()].add(read);
allOffsets.add(offset); offsets[StratifiedContextType.COMPLETE.ordinal()].add(offset);
} }
/** /**

View File

@ -83,7 +83,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc); GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc);
if ( call instanceof ReadBacked ) { if ( call instanceof ReadBacked ) {
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL).getPileup(); ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup();
((ReadBacked)call).setPileup(pileup); ((ReadBacked)call).setPileup(pileup);
} }
if ( call instanceof SampleBacked ) { if ( call instanceof SampleBacked ) {

View File

@ -22,7 +22,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
public Pair<VariationCall, List<Genotype>> calculateGenotype(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) { public Pair<VariationCall, List<Genotype>> calculateGenotype(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) {
// run the EM calculation // run the EM calculation
EMOutput overall = runEM(ref, contexts, priors, StratifiedAlignmentContext.StratifiedContextType.OVERALL); EMOutput overall = runEM(ref, contexts, priors, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
double PofD = Math.pow(10, overall.getPofD()); double PofD = Math.pow(10, overall.getPofD());
double PofNull = Math.pow(10, overall.getPofNull()); double PofNull = Math.pow(10, overall.getPofNull());
@ -93,11 +93,11 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
for ( String sample : GLs.keySet() ) { for ( String sample : GLs.keySet() ) {
// create the call // create the call
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL); AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation()); GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation());
if ( call instanceof ReadBacked ) { if ( call instanceof ReadBacked ) {
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL).getPileup(); ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup();
((ReadBacked)call).setPileup(pileup); ((ReadBacked)call).setPileup(pileup);
} }
if ( call instanceof SampleBacked ) { if ( call instanceof SampleBacked ) {

View File

@ -10,8 +10,6 @@ import org.apache.log4j.Logger;
import java.io.*; import java.io.*;
import java.util.*; import java.util.*;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMReadGroupRecord;
/** /**
* The model representing how we calculate a genotype given the priors and a pile * The model representing how we calculate a genotype given the priors and a pile

View File

@ -55,8 +55,8 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
initializeAlleleFrequencies(frequencyEstimationPoints); initializeAlleleFrequencies(frequencyEstimationPoints);
initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.OVERALL); initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.OVERALL); calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
calculatePofFs(ref, frequencyEstimationPoints); calculatePofFs(ref, frequencyEstimationPoints);
// print out stats if we have a writer // print out stats if we have a writer
@ -74,7 +74,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
int[] qualCounts = new int[4]; int[] qualCounts = new int[4];
for ( String sample : contexts.keySet() ) { for ( String sample : contexts.keySet() ) {
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL); AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
// calculate the sum of quality scores for each base // calculate the sum of quality scores for each base
ReadBackedPileup pileup = context.getPileup(); ReadBackedPileup pileup = context.getPileup();

View File

@ -40,7 +40,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
return null; return null;
// get the genotype likelihoods // get the genotype likelihoods
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedAlignmentContext.StratifiedContextType.OVERALL); Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
// find the index of the best genotype // find the index of the best genotype
double[] posteriors = discoveryGL.second.getNormalizedPosteriors(); double[] posteriors = discoveryGL.second.getNormalizedPosteriors();

View File

@ -190,43 +190,40 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
* *
* @param tracker the meta data tracker * @param tracker the meta data tracker
* @param refContext the reference base * @param refContext the reference base
* @param fullContext contextual information around the locus * @param context contextual information around the locus
*/ */
public Pair<VariationCall, List<Genotype>> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext fullContext) { public Pair<VariationCall, List<Genotype>> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
char ref = Character.toUpperCase(refContext.getBase()); char ref = Character.toUpperCase(refContext.getBase());
if ( !BaseUtils.isRegularBase(ref) ) if ( !BaseUtils.isRegularBase(ref) )
return null; return null;
// remove mapping quality zero reads
AlignmentContext MQ0freeContext = filterAlignmentContext(fullContext);
// an optimization to speed things up when there is no coverage or when overly covered // an optimization to speed things up when there is no coverage or when overly covered
if ( MQ0freeContext.getPileup().size() == 0 || if ( context.getPileup().size() == 0 ||
(UAC.MAX_READS_IN_PILEUP > 0 && MQ0freeContext.getPileup().size() > UAC.MAX_READS_IN_PILEUP) ) (UAC.MAX_READS_IN_PILEUP > 0 && context.getPileup().size() > UAC.MAX_READS_IN_PILEUP) )
return null; return null;
// are there too many deletions in the pileup? // are there too many deletions in the pileup?
ReadBackedPileup pileup = MQ0freeContext.getPileup(); ReadBackedPileup pileup = context.getPileup();
if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) && if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) &&
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > UAC.MAX_DELETION_FRACTION ) (double)pileup.getNumberOfDeletions() / (double)pileup.size() > UAC.MAX_DELETION_FRACTION )
return null; return null;
// stratify the AlignmentContext and cut by sample // stratify the AlignmentContext and cut by sample
// Note that for testing purposes, we may want to throw multi-samples at pooled mode // Note that for testing purposes, we may want to throw multi-samples at pooled mode
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(MQ0freeContext, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null)); Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
if ( stratifiedContexts == null ) if ( stratifiedContexts == null )
return null; return null;
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE); DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
Pair<VariationCall, List<Genotype>> call = gcm.calculateGenotype(tracker, ref, fullContext.getLocation(), stratifiedContexts, priors); Pair<VariationCall, List<Genotype>> call = gcm.calculateGenotype(tracker, ref, context.getLocation(), stratifiedContexts, priors);
// annotate the call, if possible // annotate the call, if possible
if ( call != null && call.first != null && call.first instanceof ArbitraryFieldsBacked ) { if ( call != null && call.first != null && call.first instanceof ArbitraryFieldsBacked ) {
Map<String, String> annotations; Map<String, String> annotations;
if ( UAC.ALL_ANNOTATIONS ) if ( UAC.ALL_ANNOTATIONS )
annotations = VariantAnnotator.getAllAnnotations(refContext, fullContext, call.first); annotations = VariantAnnotator.getAllAnnotations(refContext, context, call.first);
else else
annotations = VariantAnnotator.getAnnotations(refContext, fullContext, call.first); annotations = VariantAnnotator.getAnnotations(refContext, context, call.first);
((ArbitraryFieldsBacked)call.first).setFields(annotations); ((ArbitraryFieldsBacked)call.first).setFields(annotations);
} }