Generalized the StratifiedAlignmentContext code so that it's easy to add new ways to stratify. Then added an MQ0-free stratification so we don't need to be carrying around 2 different alignment contexts (full vs. mq0-free) anymore.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2314 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0c396f04a2
commit
31b1d60d28
|
|
@ -42,63 +42,49 @@ import java.util.Map;
|
|||
*/
|
||||
public class StratifiedAlignmentContext {
|
||||
|
||||
public enum StratifiedContextType { OVERALL, FORWARD, REVERSE }
|
||||
// Definitions:
|
||||
// COMPLETE = full alignment context
|
||||
// MQ0FREE = full context without MQ0 reads
|
||||
// FORWARD = reads on forward strand (*no* MQ0 reads)
|
||||
// REVERSE = reads on forward strand (*no* MQ0 reads)
|
||||
//
|
||||
public enum StratifiedContextType { COMPLETE, MQ0FREE, FORWARD, REVERSE }
|
||||
|
||||
private AlignmentContext overall = null;
|
||||
private AlignmentContext forward = null;
|
||||
private AlignmentContext reverse = null;
|
||||
private GenomeLoc loc;
|
||||
|
||||
private ArrayList<SAMRecord> allReads = new ArrayList<SAMRecord>();
|
||||
private ArrayList<SAMRecord> forwardReads = new ArrayList<SAMRecord>();
|
||||
private ArrayList<SAMRecord> reverseReads = new ArrayList<SAMRecord>();
|
||||
|
||||
private ArrayList<Integer> allOffsets = new ArrayList<Integer>();
|
||||
private ArrayList<Integer> forwardOffsets = new ArrayList<Integer>();
|
||||
private ArrayList<Integer> reverseOffsets = new ArrayList<Integer>();
|
||||
private AlignmentContext[] contexts = new AlignmentContext[StratifiedContextType.values().length];
|
||||
private ArrayList<SAMRecord>[] reads = new ArrayList[StratifiedContextType.values().length];
|
||||
private ArrayList<Integer>[] offsets = new ArrayList[StratifiedContextType.values().length];
|
||||
|
||||
|
||||
public StratifiedAlignmentContext(GenomeLoc loc) {
|
||||
this.loc = loc;
|
||||
for ( int i = 0; i < StratifiedContextType.values().length; i++) {
|
||||
reads[i] = new ArrayList<SAMRecord>();
|
||||
offsets[i] = new ArrayList<Integer>();
|
||||
}
|
||||
}
|
||||
|
||||
public AlignmentContext getContext(StratifiedContextType context) {
|
||||
switch ( context ) {
|
||||
case OVERALL: return getOverallContext();
|
||||
case FORWARD: return getForwardContext();
|
||||
case REVERSE: return getReverseContext();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private AlignmentContext getOverallContext() {
|
||||
if ( overall == null )
|
||||
overall = new AlignmentContext(loc, new ReadBackedPileup(loc, allReads, allOffsets));
|
||||
return overall;
|
||||
}
|
||||
|
||||
private AlignmentContext getForwardContext() {
|
||||
if ( forward == null )
|
||||
forward = new AlignmentContext(loc, new ReadBackedPileup(loc, forwardReads, forwardOffsets));
|
||||
return forward;
|
||||
}
|
||||
|
||||
private AlignmentContext getReverseContext() {
|
||||
if ( reverse == null )
|
||||
reverse = new AlignmentContext(loc, new ReadBackedPileup(loc, reverseReads, reverseOffsets));
|
||||
return reverse;
|
||||
int index = context.ordinal();
|
||||
if ( contexts[index] == null )
|
||||
contexts[index] = new AlignmentContext(loc, new ReadBackedPileup(loc, reads[index], offsets[index]));
|
||||
return contexts[index];
|
||||
}
|
||||
|
||||
public void add(SAMRecord read, int offset) {
|
||||
if ( read.getReadNegativeStrandFlag() ) {
|
||||
reverseReads.add(read);
|
||||
reverseOffsets.add(offset);
|
||||
} else {
|
||||
forwardReads.add(read);
|
||||
forwardOffsets.add(offset);
|
||||
if ( read.getMappingQuality() > 0 ) {
|
||||
reads[StratifiedContextType.MQ0FREE.ordinal()].add(read);
|
||||
offsets[StratifiedContextType.MQ0FREE.ordinal()].add(offset);
|
||||
if ( read.getReadNegativeStrandFlag() ) {
|
||||
reads[StratifiedContextType.REVERSE.ordinal()].add(read);
|
||||
offsets[StratifiedContextType.REVERSE.ordinal()].add(offset);
|
||||
} else {
|
||||
reads[StratifiedContextType.FORWARD.ordinal()].add(read);
|
||||
offsets[StratifiedContextType.FORWARD.ordinal()].add(offset);
|
||||
}
|
||||
}
|
||||
allReads.add(read);
|
||||
allOffsets.add(offset);
|
||||
reads[StratifiedContextType.COMPLETE.ordinal()].add(read);
|
||||
offsets[StratifiedContextType.COMPLETE.ordinal()].add(offset);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc);
|
||||
|
||||
if ( call instanceof ReadBacked ) {
|
||||
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL).getPileup();
|
||||
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup();
|
||||
((ReadBacked)call).setPileup(pileup);
|
||||
}
|
||||
if ( call instanceof SampleBacked ) {
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
|||
public Pair<VariationCall, List<Genotype>> calculateGenotype(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) {
|
||||
|
||||
// run the EM calculation
|
||||
EMOutput overall = runEM(ref, contexts, priors, StratifiedAlignmentContext.StratifiedContextType.OVERALL);
|
||||
EMOutput overall = runEM(ref, contexts, priors, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
|
||||
|
||||
double PofD = Math.pow(10, overall.getPofD());
|
||||
double PofNull = Math.pow(10, overall.getPofNull());
|
||||
|
|
@ -93,11 +93,11 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
|||
for ( String sample : GLs.keySet() ) {
|
||||
|
||||
// create the call
|
||||
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL);
|
||||
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
|
||||
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation());
|
||||
|
||||
if ( call instanceof ReadBacked ) {
|
||||
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL).getPileup();
|
||||
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup();
|
||||
((ReadBacked)call).setPileup(pileup);
|
||||
}
|
||||
if ( call instanceof SampleBacked ) {
|
||||
|
|
|
|||
|
|
@ -10,8 +10,6 @@ import org.apache.log4j.Logger;
|
|||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
|
||||
/**
|
||||
* The model representing how we calculate a genotype given the priors and a pile
|
||||
|
|
|
|||
|
|
@ -55,8 +55,8 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
|
||||
initializeAlleleFrequencies(frequencyEstimationPoints);
|
||||
|
||||
initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.OVERALL);
|
||||
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.OVERALL);
|
||||
initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
|
||||
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
|
||||
calculatePofFs(ref, frequencyEstimationPoints);
|
||||
|
||||
// print out stats if we have a writer
|
||||
|
|
@ -74,7 +74,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
int[] qualCounts = new int[4];
|
||||
|
||||
for ( String sample : contexts.keySet() ) {
|
||||
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.OVERALL);
|
||||
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
|
||||
|
||||
// calculate the sum of quality scores for each base
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
return null;
|
||||
|
||||
// get the genotype likelihoods
|
||||
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedAlignmentContext.StratifiedContextType.OVERALL);
|
||||
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
|
||||
|
||||
// find the index of the best genotype
|
||||
double[] posteriors = discoveryGL.second.getNormalizedPosteriors();
|
||||
|
|
|
|||
|
|
@ -190,43 +190,40 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
*
|
||||
* @param tracker the meta data tracker
|
||||
* @param refContext the reference base
|
||||
* @param fullContext contextual information around the locus
|
||||
* @param context contextual information around the locus
|
||||
*/
|
||||
public Pair<VariationCall, List<Genotype>> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext fullContext) {
|
||||
public Pair<VariationCall, List<Genotype>> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
|
||||
char ref = Character.toUpperCase(refContext.getBase());
|
||||
if ( !BaseUtils.isRegularBase(ref) )
|
||||
return null;
|
||||
|
||||
// remove mapping quality zero reads
|
||||
AlignmentContext MQ0freeContext = filterAlignmentContext(fullContext);
|
||||
|
||||
// an optimization to speed things up when there is no coverage or when overly covered
|
||||
if ( MQ0freeContext.getPileup().size() == 0 ||
|
||||
(UAC.MAX_READS_IN_PILEUP > 0 && MQ0freeContext.getPileup().size() > UAC.MAX_READS_IN_PILEUP) )
|
||||
if ( context.getPileup().size() == 0 ||
|
||||
(UAC.MAX_READS_IN_PILEUP > 0 && context.getPileup().size() > UAC.MAX_READS_IN_PILEUP) )
|
||||
return null;
|
||||
|
||||
// are there too many deletions in the pileup?
|
||||
ReadBackedPileup pileup = MQ0freeContext.getPileup();
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) &&
|
||||
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > UAC.MAX_DELETION_FRACTION )
|
||||
return null;
|
||||
|
||||
// stratify the AlignmentContext and cut by sample
|
||||
// Note that for testing purposes, we may want to throw multi-samples at pooled mode
|
||||
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(MQ0freeContext, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
|
||||
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
|
||||
if ( stratifiedContexts == null )
|
||||
return null;
|
||||
|
||||
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
||||
Pair<VariationCall, List<Genotype>> call = gcm.calculateGenotype(tracker, ref, fullContext.getLocation(), stratifiedContexts, priors);
|
||||
Pair<VariationCall, List<Genotype>> call = gcm.calculateGenotype(tracker, ref, context.getLocation(), stratifiedContexts, priors);
|
||||
|
||||
// annotate the call, if possible
|
||||
if ( call != null && call.first != null && call.first instanceof ArbitraryFieldsBacked ) {
|
||||
Map<String, String> annotations;
|
||||
if ( UAC.ALL_ANNOTATIONS )
|
||||
annotations = VariantAnnotator.getAllAnnotations(refContext, fullContext, call.first);
|
||||
annotations = VariantAnnotator.getAllAnnotations(refContext, context, call.first);
|
||||
else
|
||||
annotations = VariantAnnotator.getAnnotations(refContext, fullContext, call.first);
|
||||
annotations = VariantAnnotator.getAnnotations(refContext, context, call.first);
|
||||
((ArbitraryFieldsBacked)call.first).setFields(annotations);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue