Plumbing is now in place to emit indel calls from the UnifiedGenotyper.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2975 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
5c35be39ef
commit
c85ed1ce90
|
|
@ -31,6 +31,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -108,48 +109,84 @@ public class StratifiedAlignmentContext {
|
|||
public static Map<String, StratifiedAlignmentContext> splitContextBySample(ReadBackedPileup pileup, String assumedSingleSample, String collapseToThisSample) {
|
||||
|
||||
HashMap<String, StratifiedAlignmentContext> contexts = new HashMap<String, StratifiedAlignmentContext>();
|
||||
GenomeLoc loc = pileup.getLocation();
|
||||
|
||||
for (PileupElement p : pileup ) {
|
||||
|
||||
// get the read
|
||||
SAMRecord read = p.getRead();
|
||||
|
||||
// find the sample
|
||||
String sample;
|
||||
if ( collapseToThisSample != null ) {
|
||||
sample = collapseToThisSample;
|
||||
} else {
|
||||
SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||
if ( readGroup == null ) {
|
||||
if ( assumedSingleSample == null )
|
||||
throw new StingException("Missing read group for read " + read.getReadName());
|
||||
sample = assumedSingleSample;
|
||||
} else {
|
||||
sample = readGroup.getSample();
|
||||
}
|
||||
}
|
||||
|
||||
// create a new context object if this is the first time we're seeing a read for this sample
|
||||
StratifiedAlignmentContext myContext = contexts.get(sample);
|
||||
if ( myContext == null ) {
|
||||
myContext = new StratifiedAlignmentContext(pileup.getLocation());
|
||||
contexts.put(sample, myContext);
|
||||
}
|
||||
|
||||
// add the read to this sample's context
|
||||
// note that bad bases are added to the context (for DoC calculations later)
|
||||
myContext.add(read, p.getOffset());
|
||||
}
|
||||
for (PileupElement p : pileup )
|
||||
addToContext(contexts, p, loc, assumedSingleSample, collapseToThisSample);
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits the given AlignmentContext into a StratifiedAlignmentContext per sample.
|
||||
*
|
||||
* @param pileup the original pileup
|
||||
*
|
||||
* @return a Map of sample name to StratifiedAlignmentContext
|
||||
*
|
||||
**/
|
||||
public static Map<String, StratifiedAlignmentContext> splitContextBySample(ReadBackedExtendedEventPileup pileup) {
|
||||
return splitContextBySample(pileup, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits the given AlignmentContext into a StratifiedAlignmentContext per sample.
|
||||
*
|
||||
* @param pileup the original pileup
|
||||
* @param assumedSingleSample if not null, any read without a readgroup will be given this sample name
|
||||
* @param collapseToThisSample if not null, all reads will be assigned this read group regardless of their actual read group
|
||||
*
|
||||
* @return a Map of sample name to StratifiedAlignmentContext
|
||||
*
|
||||
**/
|
||||
public static Map<String, StratifiedAlignmentContext> splitContextBySample(ReadBackedExtendedEventPileup pileup, String assumedSingleSample, String collapseToThisSample) {
|
||||
|
||||
HashMap<String, StratifiedAlignmentContext> contexts = new HashMap<String, StratifiedAlignmentContext>();
|
||||
GenomeLoc loc = pileup.getLocation();
|
||||
|
||||
for (PileupElement p : pileup )
|
||||
addToContext(contexts, p, loc, assumedSingleSample, collapseToThisSample);
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
||||
private static void addToContext(HashMap<String, StratifiedAlignmentContext> contexts, PileupElement p, GenomeLoc loc, String assumedSingleSample, String collapseToThisSample) {
|
||||
// get the read
|
||||
SAMRecord read = p.getRead();
|
||||
|
||||
// find the sample
|
||||
String sample;
|
||||
if ( collapseToThisSample != null ) {
|
||||
sample = collapseToThisSample;
|
||||
} else {
|
||||
SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||
if ( readGroup == null ) {
|
||||
if ( assumedSingleSample == null )
|
||||
throw new StingException("Missing read group for read " + read.getReadName());
|
||||
sample = assumedSingleSample;
|
||||
} else {
|
||||
sample = readGroup.getSample();
|
||||
}
|
||||
}
|
||||
|
||||
// create a new context object if this is the first time we're seeing a read for this sample
|
||||
StratifiedAlignmentContext myContext = contexts.get(sample);
|
||||
if ( myContext == null ) {
|
||||
myContext = new StratifiedAlignmentContext(loc);
|
||||
contexts.put(sample, myContext);
|
||||
}
|
||||
|
||||
// add the read to this sample's context
|
||||
// note that bad bases are added to the context (for DoC calculations later)
|
||||
myContext.add(read, p.getOffset());
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits the given AlignmentContext into a StratifiedAlignmentContext per read group.
|
||||
*
|
||||
* @param pileup the original pileup
|
||||
* @return a Map of sample name to StratifiedAlignmentContext
|
||||
* @todo - support for collapsing or assuming read groups if they are missing
|
||||
* TODO - support for collapsing or assuming read groups if they are missing
|
||||
*
|
||||
**/
|
||||
public static Map<String,StratifiedAlignmentContext> splitContextByReadGroup(ReadBackedPileup pileup) {
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
|||
* @param stratifiedContexts stratified alignment contexts
|
||||
* @param priors priors to use for GL
|
||||
*
|
||||
* @return calls
|
||||
* @return call
|
||||
*/
|
||||
public abstract VariantCallContext callLocus(RefMetaDataTracker tracker,
|
||||
char ref,
|
||||
|
|
@ -91,6 +91,19 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
|||
Map<String, StratifiedAlignmentContext> stratifiedContexts,
|
||||
DiploidGenotypePriors priors);
|
||||
|
||||
/**
|
||||
* @param tracker rod data
|
||||
* @param ref reference base
|
||||
* @param loc GenomeLoc
|
||||
* @param stratifiedContexts stratified alignment contexts
|
||||
*
|
||||
* @return call
|
||||
*/
|
||||
public abstract VariantCallContext callExtendedLocus(RefMetaDataTracker tracker,
|
||||
char ref,
|
||||
GenomeLoc loc,
|
||||
Map<String, StratifiedAlignmentContext> stratifiedContexts);
|
||||
|
||||
/**
|
||||
* @param tracker rod data
|
||||
*
|
||||
|
|
|
|||
|
|
@ -39,6 +39,10 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
|
||||
protected JointEstimateGenotypeCalculationModel() {}
|
||||
|
||||
public VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> stratifiedContexts) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public VariantCallContext callLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) {
|
||||
int numSamples = getNSamples(contexts);
|
||||
int frequencyEstimationPoints = (2 * numSamples) + 1; // (add 1 for allele frequency of zero)
|
||||
|
|
|
|||
|
|
@ -11,13 +11,25 @@ import java.util.*;
|
|||
|
||||
public class SimpleIndelCalculationModel extends GenotypeCalculationModel {
|
||||
|
||||
// the previous normal event context
|
||||
private Map<String, StratifiedAlignmentContext> cachedContext;
|
||||
|
||||
protected SimpleIndelCalculationModel() {}
|
||||
|
||||
public VariantCallContext callLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) {
|
||||
|
||||
cachedContext = contexts;
|
||||
return null;
|
||||
}
|
||||
|
||||
public VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts) {
|
||||
|
||||
System.out.println("Reached " + loc + " through an extended event");
|
||||
|
||||
// TODO -- implement me
|
||||
|
||||
//VariantContext vc = new MutableVariantContext("UG_indel_call", loc, alleles, genotypes, phredScaledConfidence/10.0, null, attributes);
|
||||
//return new VariantCallContext(vc, phredScaledConfidence >= CONFIDENCE_THRESHOLD);
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -81,9 +81,12 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
/** The number of bases called confidently (according to user threshold), either ref or other */
|
||||
long nBasesCalledConfidently = 0;
|
||||
|
||||
double percentCallableOfAll() { return (100.0 * nBasesCallable) / nBasesVisited; }
|
||||
double percentCalledOfAll() { return (100.0 * nBasesCalledConfidently) / nBasesVisited; }
|
||||
double percentCalledOfCallable() { return (100.0 * nBasesCalledConfidently) / nBasesCallable; }
|
||||
/** The total number of extended events encountered */
|
||||
long nExtendedEvents = 0;
|
||||
|
||||
double percentCallableOfAll() { return (100.0 * nBasesCallable) / (nBasesVisited-nExtendedEvents); }
|
||||
double percentCalledOfAll() { return (100.0 * nBasesCalledConfidently) / (nBasesVisited-nExtendedEvents); }
|
||||
double percentCalledOfCallable() { return (100.0 * nBasesCalledConfidently) / (nBasesVisited-nExtendedEvents); }
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -186,7 +189,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
}
|
||||
|
||||
public UGStatistics reduce(VariantCallContext value, UGStatistics sum) {
|
||||
// We get a point for reaching reduce :-)
|
||||
// we get a point for reaching reduce
|
||||
sum.nBasesVisited++;
|
||||
|
||||
// can't call the locus because of no coverage
|
||||
|
|
@ -196,7 +199,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
// A call was attempted -- the base was potentially callable
|
||||
sum.nBasesCallable++;
|
||||
|
||||
// if the base was confidently called something, print it out
|
||||
// if the base was confidently called something, print it out
|
||||
sum.nBasesCalledConfidently += value.confidentlyCalled ? 1 : 0;
|
||||
|
||||
// can't make a confident variant call here
|
||||
|
|
|
|||
|
|
@ -167,43 +167,70 @@ public class UnifiedGenotyperEngine {
|
|||
if ( rawContext.hasExceededMaxPileup() )
|
||||
return null;
|
||||
|
||||
ReadBackedPileup rawPileup = rawContext.getBasePileup();
|
||||
VariantCallContext call;
|
||||
|
||||
// filter the context based on min base and mapping qualities
|
||||
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||
if ( rawContext.hasExtendedEventPileup() ) {
|
||||
|
||||
// filter the context based on mapping quality and mismatch rate
|
||||
pileup = filterPileup(pileup, refContext);
|
||||
ReadBackedExtendedEventPileup rawPileup = rawContext.getExtendedEventPileup();
|
||||
|
||||
// don't call when there is no coverage
|
||||
if ( pileup.size() == 0 )
|
||||
return null;
|
||||
// filter the context based on min mapping quality
|
||||
ReadBackedExtendedEventPileup pileup = rawPileup.getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||
|
||||
// are there too many deletions in the pileup?
|
||||
if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) &&
|
||||
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > UAC.MAX_DELETION_FRACTION )
|
||||
return null;
|
||||
// filter the context based on bad mates and mismatch rate
|
||||
pileup = filterPileup(pileup, refContext);
|
||||
|
||||
// stratify the AlignmentContext and cut by sample
|
||||
// Note that for testing purposes, we may want to throw multi-samples at pooled mode
|
||||
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
|
||||
if ( stratifiedContexts == null )
|
||||
return null;
|
||||
// don't call when there is no coverage
|
||||
if ( pileup.size() == 0 )
|
||||
return null;
|
||||
|
||||
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_REFERENCE_ERROR);
|
||||
VariantCallContext call = gcm.get().callLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors);
|
||||
// stratify the AlignmentContext and cut by sample
|
||||
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE, null);
|
||||
if ( stratifiedContexts == null )
|
||||
return null;
|
||||
|
||||
// annotate the call, if possible
|
||||
if ( call != null && call.vc != null ) {
|
||||
// first off, we want to use the *unfiltered* context for the annotations
|
||||
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup());
|
||||
call = gcm.get().callExtendedLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts);
|
||||
|
||||
Map<String, String> annotations;
|
||||
if ( UAC.ALL_ANNOTATIONS )
|
||||
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.vc, annotateDbsnp, annotateHapmap2, annotateHapmap3);
|
||||
else
|
||||
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.vc, annotateDbsnp, annotateHapmap2, annotateHapmap3);
|
||||
((MutableVariantContext)call.vc).putAttributes(annotations);
|
||||
} else {
|
||||
|
||||
ReadBackedPileup rawPileup = rawContext.getBasePileup();
|
||||
|
||||
// filter the context based on min base and mapping qualities
|
||||
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||
|
||||
// filter the context based on bad mates and mismatch rate
|
||||
pileup = filterPileup(pileup, refContext);
|
||||
|
||||
// don't call when there is no coverage
|
||||
if ( pileup.size() == 0 )
|
||||
return null;
|
||||
|
||||
// are there too many deletions in the pileup?
|
||||
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.INDELS &&
|
||||
isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) &&
|
||||
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > UAC.MAX_DELETION_FRACTION )
|
||||
return null;
|
||||
|
||||
// stratify the AlignmentContext and cut by sample
|
||||
// Note that for testing purposes, we may want to throw multi-samples at pooled mode
|
||||
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
|
||||
if ( stratifiedContexts == null )
|
||||
return null;
|
||||
|
||||
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_REFERENCE_ERROR);
|
||||
call = gcm.get().callLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors);
|
||||
|
||||
// annotate the call, if possible
|
||||
if ( call != null && call.vc != null ) {
|
||||
// first off, we want to use the *unfiltered* context for the annotations
|
||||
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup());
|
||||
|
||||
Map<String, String> annotations;
|
||||
if ( UAC.ALL_ANNOTATIONS )
|
||||
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.vc, annotateDbsnp, annotateHapmap2, annotateHapmap3);
|
||||
else
|
||||
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.vc, annotateDbsnp, annotateHapmap2, annotateHapmap3);
|
||||
((MutableVariantContext)call.vc).putAttributes(annotations);
|
||||
}
|
||||
}
|
||||
|
||||
return call;
|
||||
|
|
@ -221,6 +248,18 @@ public class UnifiedGenotyperEngine {
|
|||
return new ReadBackedPileup(pileup.getLocation(), filteredPileup);
|
||||
}
|
||||
|
||||
// filter based on maximum mismatches and bad mates
|
||||
private ReadBackedExtendedEventPileup filterPileup(ReadBackedExtendedEventPileup pileup, ReferenceContext refContext) {
|
||||
|
||||
ArrayList<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
|
||||
for ( ExtendedEventPileupElement p : pileup ) {
|
||||
if ( (UAC.USE_BADLY_MATED_READS || !p.getRead().getReadPairedFlag() || p.getRead().getMateUnmappedFlag() || p.getRead().getMateReferenceIndex() == p.getRead().getReferenceIndex()) &&
|
||||
AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES )
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
return new ReadBackedExtendedEventPileup(pileup.getLocation(), filteredPileup);
|
||||
}
|
||||
|
||||
private static boolean isValidDeletionFraction(double d) {
|
||||
return ( d >= 0.0 && d <= 1.0 );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
||||
/**
|
||||
* In the "standard" locus traversal mode,
|
||||
|
|
@ -33,7 +32,7 @@ import java.util.Collections;
|
|||
* Time: 2:57:55 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ExtendedEventPileupElement {
|
||||
public class ExtendedEventPileupElement extends PileupElement {
|
||||
public enum Type {
|
||||
NOEVENT, DELETION, INSERTION
|
||||
};
|
||||
|
|
@ -53,8 +52,7 @@ public class ExtendedEventPileupElement {
|
|||
* @param eventBases inserted bases. null indicates that the event is a deletion; ignored if length<=0 (noevent)
|
||||
*/
|
||||
public ExtendedEventPileupElement( SAMRecord read, int offset, int length, byte[] eventBases ) {
|
||||
this.read = read;
|
||||
this.offset = offset;
|
||||
super(read, offset);
|
||||
this.eventLength = length;
|
||||
if ( length <= 0 ) type = Type.NOEVENT;
|
||||
else {
|
||||
|
|
@ -89,12 +87,6 @@ public class ExtendedEventPileupElement {
|
|||
return isDeletion() || isInsertion();
|
||||
}
|
||||
|
||||
public SAMRecord getRead() { return read; }
|
||||
|
||||
public int getOffset() { return offset; }
|
||||
|
||||
public int getMappingQual() { return read.getMappingQuality(); }
|
||||
|
||||
public Type getType() { return type; }
|
||||
|
||||
public GenomeLoc getLocation() {
|
||||
|
|
|
|||
|
|
@ -19,8 +19,8 @@ public class PileupElement {
|
|||
public static final byte DELETION_BASE = 'D';
|
||||
public static final byte DELETION_QUAL = 0;
|
||||
|
||||
private SAMRecord read;
|
||||
private int offset;
|
||||
protected SAMRecord read;
|
||||
protected int offset;
|
||||
|
||||
public PileupElement( SAMRecord read, int offset ) {
|
||||
this.read = read;
|
||||
|
|
|
|||
Loading…
Reference in New Issue