Plumbing is now in place to emit indel calls from the UnifiedGenotyper.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2975 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-03-10 04:30:12 +00:00
parent 5c35be39ef
commit c85ed1ce90
8 changed files with 180 additions and 80 deletions

View File

@ -31,6 +31,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import java.util.ArrayList;
import java.util.HashMap;
@ -108,48 +109,84 @@ public class StratifiedAlignmentContext {
public static Map<String, StratifiedAlignmentContext> splitContextBySample(ReadBackedPileup pileup, String assumedSingleSample, String collapseToThisSample) {
HashMap<String, StratifiedAlignmentContext> contexts = new HashMap<String, StratifiedAlignmentContext>();
GenomeLoc loc = pileup.getLocation();
for (PileupElement p : pileup ) {
// get the read
SAMRecord read = p.getRead();
// find the sample
String sample;
if ( collapseToThisSample != null ) {
sample = collapseToThisSample;
} else {
SAMReadGroupRecord readGroup = read.getReadGroup();
if ( readGroup == null ) {
if ( assumedSingleSample == null )
throw new StingException("Missing read group for read " + read.getReadName());
sample = assumedSingleSample;
} else {
sample = readGroup.getSample();
}
}
// create a new context object if this is the first time we're seeing a read for this sample
StratifiedAlignmentContext myContext = contexts.get(sample);
if ( myContext == null ) {
myContext = new StratifiedAlignmentContext(pileup.getLocation());
contexts.put(sample, myContext);
}
// add the read to this sample's context
// note that bad bases are added to the context (for DoC calculations later)
myContext.add(read, p.getOffset());
}
for (PileupElement p : pileup )
addToContext(contexts, p, loc, assumedSingleSample, collapseToThisSample);
return contexts;
}
/**
* Splits the given AlignmentContext into a StratifiedAlignmentContext per sample.
*
* @param pileup the original pileup
*
* @return a Map of sample name to StratifiedAlignmentContext
*
**/
public static Map<String, StratifiedAlignmentContext> splitContextBySample(ReadBackedExtendedEventPileup pileup) {
return splitContextBySample(pileup, null, null);
}
/**
* Splits the given AlignmentContext into a StratifiedAlignmentContext per sample.
*
* @param pileup the original pileup
* @param assumedSingleSample if not null, any read without a readgroup will be given this sample name
* @param collapseToThisSample if not null, all reads will be assigned this read group regardless of their actual read group
*
* @return a Map of sample name to StratifiedAlignmentContext
*
**/
public static Map<String, StratifiedAlignmentContext> splitContextBySample(ReadBackedExtendedEventPileup pileup, String assumedSingleSample, String collapseToThisSample) {
HashMap<String, StratifiedAlignmentContext> contexts = new HashMap<String, StratifiedAlignmentContext>();
GenomeLoc loc = pileup.getLocation();
for (PileupElement p : pileup )
addToContext(contexts, p, loc, assumedSingleSample, collapseToThisSample);
return contexts;
}
private static void addToContext(HashMap<String, StratifiedAlignmentContext> contexts, PileupElement p, GenomeLoc loc, String assumedSingleSample, String collapseToThisSample) {
// get the read
SAMRecord read = p.getRead();
// find the sample
String sample;
if ( collapseToThisSample != null ) {
sample = collapseToThisSample;
} else {
SAMReadGroupRecord readGroup = read.getReadGroup();
if ( readGroup == null ) {
if ( assumedSingleSample == null )
throw new StingException("Missing read group for read " + read.getReadName());
sample = assumedSingleSample;
} else {
sample = readGroup.getSample();
}
}
// create a new context object if this is the first time we're seeing a read for this sample
StratifiedAlignmentContext myContext = contexts.get(sample);
if ( myContext == null ) {
myContext = new StratifiedAlignmentContext(loc);
contexts.put(sample, myContext);
}
// add the read to this sample's context
// note that bad bases are added to the context (for DoC calculations later)
myContext.add(read, p.getOffset());
}
/**
* Splits the given AlignmentContext into a StratifiedAlignmentContext per read group.
*
* @param pileup the original pileup
* @return a Map of sample name to StratifiedAlignmentContext
* @todo - support for collapsing or assuming read groups if they are missing
* TODO - support for collapsing or assuming read groups if they are missing
*
**/
public static Map<String,StratifiedAlignmentContext> splitContextByReadGroup(ReadBackedPileup pileup) {

View File

@ -83,7 +83,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
* @param stratifiedContexts stratified alignment contexts
* @param priors priors to use for GL
*
* @return calls
* @return call
*/
public abstract VariantCallContext callLocus(RefMetaDataTracker tracker,
char ref,
@ -91,6 +91,19 @@ public abstract class GenotypeCalculationModel implements Cloneable {
Map<String, StratifiedAlignmentContext> stratifiedContexts,
DiploidGenotypePriors priors);
/**
* @param tracker rod data
* @param ref reference base
* @param loc GenomeLoc
* @param stratifiedContexts stratified alignment contexts
*
* @return call
*/
public abstract VariantCallContext callExtendedLocus(RefMetaDataTracker tracker,
char ref,
GenomeLoc loc,
Map<String, StratifiedAlignmentContext> stratifiedContexts);
/**
* @param tracker rod data
*

View File

@ -39,6 +39,10 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
protected JointEstimateGenotypeCalculationModel() {}
public VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> stratifiedContexts) {
return null;
}
public VariantCallContext callLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) {
int numSamples = getNSamples(contexts);
int frequencyEstimationPoints = (2 * numSamples) + 1; // (add 1 for allele frequency of zero)

View File

@ -11,13 +11,25 @@ import java.util.*;
public class SimpleIndelCalculationModel extends GenotypeCalculationModel {
// the previous normal event context
private Map<String, StratifiedAlignmentContext> cachedContext;
protected SimpleIndelCalculationModel() {}
public VariantCallContext callLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) {
cachedContext = contexts;
return null;
}
public VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts) {
System.out.println("Reached " + loc + " through an extended event");
// TODO -- implement me
//VariantContext vc = new MutableVariantContext("UG_indel_call", loc, alleles, genotypes, phredScaledConfidence/10.0, null, attributes);
//return new VariantCallContext(vc, phredScaledConfidence >= CONFIDENCE_THRESHOLD);
return null;
}
}

View File

@ -81,9 +81,12 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
/** The number of bases called confidently (according to user threshold), either ref or other */
long nBasesCalledConfidently = 0;
double percentCallableOfAll() { return (100.0 * nBasesCallable) / nBasesVisited; }
double percentCalledOfAll() { return (100.0 * nBasesCalledConfidently) / nBasesVisited; }
double percentCalledOfCallable() { return (100.0 * nBasesCalledConfidently) / nBasesCallable; }
/** The total number of extended events encountered */
long nExtendedEvents = 0;
double percentCallableOfAll() { return (100.0 * nBasesCallable) / (nBasesVisited-nExtendedEvents); }
double percentCalledOfAll() { return (100.0 * nBasesCalledConfidently) / (nBasesVisited-nExtendedEvents); }
double percentCalledOfCallable() { return (100.0 * nBasesCalledConfidently) / (nBasesVisited-nExtendedEvents); }
}
/**
@ -186,7 +189,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
}
public UGStatistics reduce(VariantCallContext value, UGStatistics sum) {
// We get a point for reaching reduce :-)
// we get a point for reaching reduce
sum.nBasesVisited++;
// can't call the locus because of no coverage
@ -196,7 +199,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
// A call was attempted -- the base was potentially callable
sum.nBasesCallable++;
// if the base was confidently called something, print it out
// if the base was confidently called something, print it out
sum.nBasesCalledConfidently += value.confidentlyCalled ? 1 : 0;
// can't make a confident variant call here

View File

@ -167,43 +167,70 @@ public class UnifiedGenotyperEngine {
if ( rawContext.hasExceededMaxPileup() )
return null;
ReadBackedPileup rawPileup = rawContext.getBasePileup();
VariantCallContext call;
// filter the context based on min base and mapping qualities
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
if ( rawContext.hasExtendedEventPileup() ) {
// filter the context based on mapping quality and mismatch rate
pileup = filterPileup(pileup, refContext);
ReadBackedExtendedEventPileup rawPileup = rawContext.getExtendedEventPileup();
// don't call when there is no coverage
if ( pileup.size() == 0 )
return null;
// filter the context based on min mapping quality
ReadBackedExtendedEventPileup pileup = rawPileup.getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);
// are there too many deletions in the pileup?
if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) &&
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > UAC.MAX_DELETION_FRACTION )
return null;
// filter the context based on bad mates and mismatch rate
pileup = filterPileup(pileup, refContext);
// stratify the AlignmentContext and cut by sample
// Note that for testing purposes, we may want to throw multi-samples at pooled mode
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
if ( stratifiedContexts == null )
return null;
// don't call when there is no coverage
if ( pileup.size() == 0 )
return null;
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_REFERENCE_ERROR);
VariantCallContext call = gcm.get().callLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors);
// stratify the AlignmentContext and cut by sample
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE, null);
if ( stratifiedContexts == null )
return null;
// annotate the call, if possible
if ( call != null && call.vc != null ) {
// first off, we want to use the *unfiltered* context for the annotations
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup());
call = gcm.get().callExtendedLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts);
Map<String, String> annotations;
if ( UAC.ALL_ANNOTATIONS )
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.vc, annotateDbsnp, annotateHapmap2, annotateHapmap3);
else
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.vc, annotateDbsnp, annotateHapmap2, annotateHapmap3);
((MutableVariantContext)call.vc).putAttributes(annotations);
} else {
ReadBackedPileup rawPileup = rawContext.getBasePileup();
// filter the context based on min base and mapping qualities
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
// filter the context based on bad mates and mismatch rate
pileup = filterPileup(pileup, refContext);
// don't call when there is no coverage
if ( pileup.size() == 0 )
return null;
// are there too many deletions in the pileup?
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.INDELS &&
isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) &&
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > UAC.MAX_DELETION_FRACTION )
return null;
// stratify the AlignmentContext and cut by sample
// Note that for testing purposes, we may want to throw multi-samples at pooled mode
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
if ( stratifiedContexts == null )
return null;
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_REFERENCE_ERROR);
call = gcm.get().callLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors);
// annotate the call, if possible
if ( call != null && call.vc != null ) {
// first off, we want to use the *unfiltered* context for the annotations
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup());
Map<String, String> annotations;
if ( UAC.ALL_ANNOTATIONS )
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.vc, annotateDbsnp, annotateHapmap2, annotateHapmap3);
else
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.vc, annotateDbsnp, annotateHapmap2, annotateHapmap3);
((MutableVariantContext)call.vc).putAttributes(annotations);
}
}
return call;
@ -221,6 +248,18 @@ public class UnifiedGenotyperEngine {
return new ReadBackedPileup(pileup.getLocation(), filteredPileup);
}
// filter based on maximum mismatches and bad mates
private ReadBackedExtendedEventPileup filterPileup(ReadBackedExtendedEventPileup pileup, ReferenceContext refContext) {
ArrayList<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
for ( ExtendedEventPileupElement p : pileup ) {
if ( (UAC.USE_BADLY_MATED_READS || !p.getRead().getReadPairedFlag() || p.getRead().getMateUnmappedFlag() || p.getRead().getMateReferenceIndex() == p.getRead().getReferenceIndex()) &&
AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES )
filteredPileup.add(p);
}
return new ReadBackedExtendedEventPileup(pileup.getLocation(), filteredPileup);
}
private static boolean isValidDeletionFraction(double d) {
return ( d >= 0.0 && d <= 1.0 );
}

View File

@ -5,7 +5,6 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import net.sf.samtools.SAMRecord;
import java.util.Arrays;
import java.util.Collections;
/**
* In the "standard" locus traversal mode,
@ -33,7 +32,7 @@ import java.util.Collections;
* Time: 2:57:55 PM
* To change this template use File | Settings | File Templates.
*/
public class ExtendedEventPileupElement {
public class ExtendedEventPileupElement extends PileupElement {
public enum Type {
NOEVENT, DELETION, INSERTION
};
@ -53,8 +52,7 @@ public class ExtendedEventPileupElement {
* @param eventBases inserted bases. null indicates that the event is a deletion; ignored if length<=0 (noevent)
*/
public ExtendedEventPileupElement( SAMRecord read, int offset, int length, byte[] eventBases ) {
this.read = read;
this.offset = offset;
super(read, offset);
this.eventLength = length;
if ( length <= 0 ) type = Type.NOEVENT;
else {
@ -89,12 +87,6 @@ public class ExtendedEventPileupElement {
return isDeletion() || isInsertion();
}
public SAMRecord getRead() { return read; }
public int getOffset() { return offset; }
public int getMappingQual() { return read.getMappingQuality(); }
public Type getType() { return type; }
public GenomeLoc getLocation() {

View File

@ -19,8 +19,8 @@ public class PileupElement {
public static final byte DELETION_BASE = 'D';
public static final byte DELETION_QUAL = 0;
private SAMRecord read;
private int offset;
protected SAMRecord read;
protected int offset;
public PileupElement( SAMRecord read, int offset ) {
this.read = read;