First preliminary big refactoring of UG annotation engine. Goals: a) Remove gigantic hack that cached per-read haplotype likelihoods in a static array so that annotations would go back and retrieve them, b) unify interface for annotations between HaplotypeCaller and UnifiedGenotyper, c) as a consequence, removed and cleaned duplicated code. As a bonus, annotations have now more relevant info to help them compute values.
Major idea is that per-read haplotype likelihoods are now stored in a single unified object of class PerReadAlleleLikelihoodMap. Class implementation in theory hides internal storage details from outside work (still may need work cleaning up interface), and this object(or rather, a Map from Sample->perReadAlleleLikelihoodMap) is produced by UGCalcLikelihoods. The genotype calculation is also able to potentially use this info if needed. All InfoFieldAnnotations now get an extra argument with this map. Currently, this map is only produced for indels in UG, or for all variants within HaplotypeCaller. If this map is absent (SNPs in UG), the old Pileup interface is used, but it's avoided whenever possible. FORMAT annotations are not yet changed but will be focus of second step. Major benefit will be that annotations will be able to very easily discard non-informative reads for certain events. HaplotypeCaller also uses this new class, and no longer hard-codes the mapping of allele ->list(reads) but instead uses the same objects and interfaces as the rest of the modules. Code still needs further testing/cleaning/reviewing/debugging
This commit is contained in:
parent
04be0c92bf
commit
d26183e0ec
|
|
@ -41,15 +41,6 @@ import java.util.*;
|
||||||
|
|
||||||
public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
|
public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
|
||||||
|
|
||||||
//protected Set<String> laneIDs;
|
|
||||||
public enum Model {
|
|
||||||
SNP,
|
|
||||||
INDEL,
|
|
||||||
POOLSNP,
|
|
||||||
POOLINDEL,
|
|
||||||
BOTH
|
|
||||||
}
|
|
||||||
|
|
||||||
final protected UnifiedArgumentCollection UAC;
|
final protected UnifiedArgumentCollection UAC;
|
||||||
|
|
||||||
protected GeneralPloidyGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
protected GeneralPloidyGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||||
|
|
@ -203,7 +194,8 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
|
||||||
final AlignmentContextUtils.ReadOrientation contextType,
|
final AlignmentContextUtils.ReadOrientation contextType,
|
||||||
final List<Allele> allAllelesToUse,
|
final List<Allele> allAllelesToUse,
|
||||||
final boolean useBAQedPileup,
|
final boolean useBAQedPileup,
|
||||||
final GenomeLocParser locParser) {
|
final GenomeLocParser locParser,
|
||||||
|
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||||
|
|
||||||
HashMap<String, ErrorModel> perLaneErrorModels = getPerLaneErrorModels(tracker, ref, contexts);
|
HashMap<String, ErrorModel> perLaneErrorModels = getPerLaneErrorModels(tracker, ref, contexts);
|
||||||
if (perLaneErrorModels == null && UAC.referenceSampleName != null)
|
if (perLaneErrorModels == null && UAC.referenceSampleName != null)
|
||||||
|
|
@ -215,8 +207,11 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
|
||||||
newContext.put(DUMMY_SAMPLE_NAME,mergedContext);
|
newContext.put(DUMMY_SAMPLE_NAME,mergedContext);
|
||||||
contexts = newContext;
|
contexts = newContext;
|
||||||
}
|
}
|
||||||
|
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
|
||||||
// get initial alleles to genotype
|
// starting a new site: clear allele list
|
||||||
|
perReadAlleleLikelihoodMap.clear(); // clean mapping sample-> per read, per allele likelihoods
|
||||||
|
}
|
||||||
|
// get initial alleles to genotype
|
||||||
final List<Allele> allAlleles = new ArrayList<Allele>();
|
final List<Allele> allAlleles = new ArrayList<Allele>();
|
||||||
if (allAllelesToUse == null || allAllelesToUse.isEmpty())
|
if (allAllelesToUse == null || allAllelesToUse.isEmpty())
|
||||||
allAlleles.addAll(getInitialAllelesToUse(tracker, ref,contexts,contextType,locParser, allAllelesToUse));
|
allAlleles.addAll(getInitialAllelesToUse(tracker, ref,contexts,contextType,locParser, allAllelesToUse));
|
||||||
|
|
@ -234,9 +229,13 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
|
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
|
||||||
|
if (!perReadAlleleLikelihoodMap.containsKey(sample.getKey())){
|
||||||
|
// no likelihoods have been computed for this sample at this site
|
||||||
|
perReadAlleleLikelihoodMap.put(sample.getKey(), new PerReadAlleleLikelihoodMap());
|
||||||
|
}
|
||||||
|
|
||||||
// create the GenotypeLikelihoods object
|
// create the GenotypeLikelihoods object
|
||||||
final GeneralPloidyGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
|
final GeneralPloidyGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO, perReadAlleleLikelihoodMap.get(sample.getKey()));
|
||||||
// actually compute likelihoods
|
// actually compute likelihoods
|
||||||
final int nGoodBases = GL.add(pileup, UAC);
|
final int nGoodBases = GL.add(pileup, UAC);
|
||||||
if ( nGoodBases > 0 )
|
if ( nGoodBases > 0 )
|
||||||
|
|
@ -333,7 +332,8 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
|
||||||
final HashMap<String, ErrorModel> perLaneErrorModels,
|
final HashMap<String, ErrorModel> perLaneErrorModels,
|
||||||
final boolean useBQAedPileup,
|
final boolean useBQAedPileup,
|
||||||
final ReferenceContext ref,
|
final ReferenceContext ref,
|
||||||
final boolean ignoreLaneInformation);
|
final boolean ignoreLaneInformation,
|
||||||
|
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap);
|
||||||
|
|
||||||
protected abstract List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
|
protected abstract List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
|
||||||
final ReferenceContext ref,
|
final ReferenceContext ref,
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
|
||||||
double[][] readHaplotypeLikelihoods;
|
double[][] readHaplotypeLikelihoods;
|
||||||
|
|
||||||
final byte refBase;
|
final byte refBase;
|
||||||
|
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap;
|
||||||
|
|
||||||
public GeneralPloidyIndelGenotypeLikelihoods(final List<Allele> alleles,
|
public GeneralPloidyIndelGenotypeLikelihoods(final List<Allele> alleles,
|
||||||
final double[] logLikelihoods,
|
final double[] logLikelihoods,
|
||||||
|
|
@ -34,7 +35,8 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
|
||||||
final boolean ignoreLaneInformation,
|
final boolean ignoreLaneInformation,
|
||||||
final PairHMMIndelErrorModel pairModel,
|
final PairHMMIndelErrorModel pairModel,
|
||||||
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
|
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
|
||||||
final ReferenceContext referenceContext) {
|
final ReferenceContext referenceContext,
|
||||||
|
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap) {
|
||||||
super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
|
super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
|
||||||
this.pairModel = pairModel;
|
this.pairModel = pairModel;
|
||||||
this.haplotypeMap = haplotypeMap;
|
this.haplotypeMap = haplotypeMap;
|
||||||
|
|
@ -42,6 +44,7 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
|
||||||
this.eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(alleles);
|
this.eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(alleles);
|
||||||
// todo - not needed if indel alleles have base at current position
|
// todo - not needed if indel alleles have base at current position
|
||||||
this.refBase = referenceContext.getBase();
|
this.refBase = referenceContext.getBase();
|
||||||
|
this.perReadAlleleLikelihoodMap = perReadAlleleLikelihoodMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------
|
||||||
|
|
@ -142,10 +145,9 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype
|
||||||
List<Integer> numSeenBases = new ArrayList<Integer>(this.alleles.size());
|
List<Integer> numSeenBases = new ArrayList<Integer>(this.alleles.size());
|
||||||
|
|
||||||
if (!hasReferenceSampleData) {
|
if (!hasReferenceSampleData) {
|
||||||
final int numHaplotypes = haplotypeMap.size();
|
|
||||||
|
|
||||||
final int readCounts[] = new int[pileup.getNumberOfElements()];
|
final int readCounts[] = new int[pileup.getNumberOfElements()];
|
||||||
readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts);
|
readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, perReadAlleleLikelihoodMap, readCounts);
|
||||||
n = readHaplotypeLikelihoods.length;
|
n = readHaplotypeLikelihoods.length;
|
||||||
} else {
|
} else {
|
||||||
Allele refAllele = null;
|
Allele refAllele = null;
|
||||||
|
|
|
||||||
|
|
@ -73,8 +73,9 @@ public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends Gener
|
||||||
final HashMap<String, ErrorModel> perLaneErrorModels,
|
final HashMap<String, ErrorModel> perLaneErrorModels,
|
||||||
final boolean useBQAedPileup,
|
final boolean useBQAedPileup,
|
||||||
final ReferenceContext ref,
|
final ReferenceContext ref,
|
||||||
final boolean ignoreLaneInformation){
|
final boolean ignoreLaneInformation,
|
||||||
return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
|
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap){
|
||||||
|
return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref, perReadAlleleLikelihoodMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
|
protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,
|
||||||
|
|
@ -90,7 +91,6 @@ public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends Gener
|
||||||
if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE)
|
if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE)
|
||||||
alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE);
|
alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE);
|
||||||
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
|
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
|
||||||
IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear();
|
|
||||||
haplotypeMap.clear();
|
haplotypeMap.clear();
|
||||||
}
|
}
|
||||||
IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(alleles, ref, ref.getLocus(), haplotypeMap);
|
IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(alleles, ref, ref.getLocus(), haplotypeMap);
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.*;
|
||||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.sting.commandline.*;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
|
|
@ -44,10 +45,6 @@ import org.broadinstitute.sting.gatk.walkers.PartitionBy;
|
||||||
import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
|
|
@ -417,7 +414,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
: genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) {
|
: genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) {
|
||||||
if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); }
|
if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); }
|
||||||
|
|
||||||
final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult );
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult );
|
||||||
final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst());
|
final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst());
|
||||||
|
|
||||||
// add some custom annotations to the calls
|
// add some custom annotations to the calls
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
@ -323,11 +324,13 @@ public class LikelihoodCalculationEngine {
|
||||||
return bestHaplotypes;
|
return bestHaplotypes;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Map<String, Map<Allele, List<GATKSAMRecord>>> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList, final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call) {
|
public static Map<String, PerReadAlleleLikelihoodMap> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList, final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call) {
|
||||||
final Map<String, Map<Allele, List<GATKSAMRecord>>> returnMap = new HashMap<String, Map<Allele, List<GATKSAMRecord>>>();
|
final Map<String, PerReadAlleleLikelihoodMap> returnMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
|
||||||
final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst());
|
final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst());
|
||||||
for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
|
for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
|
||||||
final Map<Allele, List<GATKSAMRecord>> alleleReadMap = new HashMap<Allele, List<GATKSAMRecord>>();
|
//final Map<Allele, List<GATKSAMRecord>> alleleReadMap = new HashMap<Allele, List<GATKSAMRecord>>();
|
||||||
|
final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||||
|
|
||||||
final ArrayList<GATKSAMRecord> readsForThisSample = sample.getValue();
|
final ArrayList<GATKSAMRecord> readsForThisSample = sample.getValue();
|
||||||
for( int iii = 0; iii < readsForThisSample.size(); iii++ ) {
|
for( int iii = 0; iii < readsForThisSample.size(); iii++ ) {
|
||||||
final GATKSAMRecord read = readsForThisSample.get(iii); // BUGBUG: assumes read order in this list and haplotype likelihood list are the same!
|
final GATKSAMRecord read = readsForThisSample.get(iii); // BUGBUG: assumes read order in this list and haplotype likelihood list are the same!
|
||||||
|
|
@ -335,51 +338,31 @@ public class LikelihoodCalculationEngine {
|
||||||
if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
|
if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
|
||||||
final double likelihoods[] = new double[call.getFirst().getAlleles().size()];
|
final double likelihoods[] = new double[call.getFirst().getAlleles().size()];
|
||||||
int count = 0;
|
int count = 0;
|
||||||
for( final Allele a : call.getFirst().getAlleles() ) { // find the allele with the highest haplotype likelihood
|
|
||||||
double maxLikelihood = Double.NEGATIVE_INFINITY;
|
for( final Allele a : call.getFirst().getAlleles() ) {
|
||||||
for( final Haplotype h : call.getSecond().get(a) ) { // use the max likelihood from all the haplotypes which mapped to this allele (achieved via the haplotype mapper object)
|
for( final Haplotype h : call.getSecond().get(a) ) { // use the max likelihood from all the haplotypes which mapped to this allele (achieved via the haplotype mapper object)
|
||||||
final double likelihood = h.getReadLikelihoods(sample.getKey())[iii];
|
final double likelihood = h.getReadLikelihoods(sample.getKey())[iii];
|
||||||
if( likelihood > maxLikelihood ) {
|
likelihoodMap.add(read, a, likelihood);
|
||||||
maxLikelihood = likelihood;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
likelihoods[count++] = maxLikelihood;
|
|
||||||
}
|
|
||||||
final int bestAllele = MathUtils.maxElementIndex(likelihoods);
|
|
||||||
final double bestLikelihood = likelihoods[bestAllele];
|
|
||||||
Allele allele = Allele.NO_CALL;
|
|
||||||
boolean isInformativeRead = false;
|
|
||||||
for( final double likelihood : likelihoods ) {
|
|
||||||
if( bestLikelihood - likelihood > BEST_LIKELIHOOD_THRESHOLD ) {
|
|
||||||
isInformativeRead = true;
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// uninformative reads get the no call Allele
|
|
||||||
if( isInformativeRead ) {
|
|
||||||
allele = call.getFirst().getAlleles().get(bestAllele);
|
|
||||||
}
|
|
||||||
List<GATKSAMRecord> readList = alleleReadMap.get(allele);
|
|
||||||
if( readList == null ) {
|
|
||||||
readList = new ArrayList<GATKSAMRecord>();
|
|
||||||
alleleReadMap.put(allele, readList);
|
|
||||||
}
|
|
||||||
readList.add(read);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// add all filtered reads to the NO_CALL list because they weren't given any likelihoods
|
/* // add all filtered reads to the NO_CALL list because they weren't given any likelihoods
|
||||||
List<GATKSAMRecord> readList = alleleReadMap.get(Allele.NO_CALL);
|
List<GATKSAMRecord> readList = alleleReadMap.get(Allele.NO_CALL);
|
||||||
if( readList == null ) {
|
if( readList == null ) {
|
||||||
readList = new ArrayList<GATKSAMRecord>();
|
readList = new ArrayList<GATKSAMRecord>();
|
||||||
alleleReadMap.put(Allele.NO_CALL, readList);
|
alleleReadMap.put(Allele.NO_CALL, readList);
|
||||||
}
|
}
|
||||||
for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
|
*/
|
||||||
|
/* for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
|
||||||
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all
|
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all
|
||||||
if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
|
if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
|
||||||
readList.add(read);
|
readList.add(read);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
returnMap.put(sample.getKey(), alleleReadMap);
|
*/
|
||||||
|
returnMap.put(sample.getKey(), likelihoodMap);
|
||||||
|
|
||||||
}
|
}
|
||||||
return returnMap;
|
return returnMap;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -201,7 +201,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
// compute mean number of reduced read counts in current kmer span
|
// compute mean number of reduced read counts in current kmer span
|
||||||
final byte[] counts = Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1);
|
final byte[] counts = Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1);
|
||||||
// precise rounding can make a difference with low consensus counts
|
// precise rounding can make a difference with low consensus counts
|
||||||
countNumber = (int)Math.round((double)MathUtils.sum(counts)/counts.length);
|
countNumber = MathUtils.arrayMax(counts);
|
||||||
|
// countNumber = (int)Math.round((double)MathUtils.sum(counts)/counts.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
if( !badKmer ) {
|
if( !badKmer ) {
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
|
@ -51,7 +52,12 @@ public class AlleleBalance extends InfoFieldAnnotation {
|
||||||
|
|
||||||
|
|
||||||
char[] BASES = {'A','C','G','T'};
|
char[] BASES = {'A','C','G','T'};
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
if ( stratifiedContexts.size() == 0 )
|
if ( stratifiedContexts.size() == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -36,6 +36,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
|
@ -52,7 +53,12 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
public class BaseCounts extends InfoFieldAnnotation {
|
public class BaseCounts extends InfoFieldAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
if ( stratifiedContexts.size() == 0 )
|
if ( stratifiedContexts.size() == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
|
@ -21,66 +23,40 @@ public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnot
|
||||||
|
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("BaseQRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities")); }
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("BaseQRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities")); }
|
||||||
|
|
||||||
protected void fillQualsFromPileup(byte ref, List<Byte> alts, ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
protected void fillQualsFromPileup(final List<Allele> allAlleles, final int refLoc,
|
||||||
for ( final PileupElement p : pileup ) {
|
final ReadBackedPileup pileup,
|
||||||
if( isUsableBase(p) ) {
|
final PerReadAlleleLikelihoodMap alleleLikelihoodMap,
|
||||||
if ( p.getBase() == ref )
|
final List<Double> refQuals, final List<Double> altQuals){
|
||||||
refQuals.add((double)p.getQual());
|
|
||||||
else if ( alts.contains(p.getBase()) )
|
|
||||||
altQuals.add((double)p.getQual());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final int refLoc, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, final List<Double> altQuals) {
|
|
||||||
// TODO -- implement me; how do we pull out the correct offset from the read?
|
|
||||||
return;
|
|
||||||
|
|
||||||
/*
|
|
||||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
|
|
||||||
final boolean matchesRef = ref.equals(alleleBin.getKey());
|
|
||||||
final boolean matchesAlt = alts.contains(alleleBin.getKey());
|
|
||||||
if ( !matchesRef && !matchesAlt )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
|
|
||||||
|
|
||||||
|
if (alleleLikelihoodMap == null) {
|
||||||
|
// use fast SNP-based version if we don't have per-read allele likelihoods
|
||||||
|
for ( final PileupElement p : pileup ) {
|
||||||
if ( isUsableBase(p) ) {
|
if ( isUsableBase(p) ) {
|
||||||
if ( matchesRef )
|
if ( allAlleles.get(0).equals(Allele.create(p.getBase())) ) {
|
||||||
refQuals.add((double)p.getQual());
|
refQuals.add((double)p.getQual());
|
||||||
else
|
} else if ( allAlleles.contains(Allele.create(p.getBase()))) {
|
||||||
altQuals.add((double)p.getQual());
|
altQuals.add((double)p.getQual());
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
|
||||||
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ?
|
|
||||||
HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
|
||||||
for (final PileupElement p: pileup) {
|
|
||||||
if (indelLikelihoodMap.containsKey(p)) {
|
|
||||||
// retrieve likelihood information corresponding to this read
|
|
||||||
LinkedHashMap<Allele,Double> el = indelLikelihoodMap.get(p);
|
|
||||||
// by design, first element in LinkedHashMap was ref allele
|
|
||||||
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
for (Map.Entry<Allele, Double> entry : el.entrySet()) {
|
|
||||||
|
|
||||||
if (entry.getKey().isReference())
|
|
||||||
refLikelihood = entry.getValue();
|
|
||||||
else {
|
|
||||||
double like = entry.getValue();
|
|
||||||
if (like >= altLikelihood)
|
|
||||||
altLikelihood = like;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (refLikelihood > altLikelihood + INDEL_LIKELIHOOD_THRESH)
|
|
||||||
refQuals.add(-10.0*refLikelihood);
|
|
||||||
else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH)
|
|
||||||
altQuals.add(-10.0*altLikelihood);
|
|
||||||
}
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Map.Entry<PileupElement,Map<Allele,Double>> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
|
||||||
|
if (!isUsableBase(el.getKey()))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
|
||||||
|
if (a.isNoCall())
|
||||||
|
continue; // read is non-informative
|
||||||
|
if (a.isReference())
|
||||||
|
refQuals.add(-10.0*(double)el.getValue().get(a));
|
||||||
|
else if (allAlleles.contains(a))
|
||||||
|
altQuals.add(-10.0*(double)el.getValue().get(a));
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
|
@ -61,7 +62,12 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn
|
||||||
|
|
||||||
private Set<String> founderIds = new HashSet<String>();
|
private Set<String> founderIds = new HashSet<String>();
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
|
||||||
if ( ! vc.hasGenotypes() )
|
if ( ! vc.hasGenotypes() )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
@ -73,13 +79,6 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn
|
||||||
founderIds = ((Walker)walker).getSampleDB().getFounderIds();
|
founderIds = ((Walker)walker).getSampleDB().getFounderIds();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
|
||||||
if ( ! vc.hasGenotypes() )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap<String, Object>(), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getKeyNames() {
|
public List<String> getKeyNames() {
|
||||||
return Arrays.asList(keyNames);
|
return Arrays.asList(keyNames);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
|
@ -24,68 +25,26 @@ public class ClippingRankSumTest extends RankSumTest {
|
||||||
|
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("ClippingRankSum", 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases")); }
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("ClippingRankSum", 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases")); }
|
||||||
|
|
||||||
protected void fillQualsFromPileup(byte ref, List<Byte> alts, ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
|
||||||
return;
|
|
||||||
// This working implementation below needs to be tested for the UG pipeline
|
|
||||||
/*
|
|
||||||
for ( final PileupElement p : pileup ) {
|
|
||||||
if ( isUsableBase(p) ) {
|
|
||||||
if ( p.getBase() == ref ) {
|
|
||||||
refQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead()));
|
|
||||||
} else if ( alts.contains(p.getBase()) ) {
|
|
||||||
altQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final int refLoc, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, final List<Double> altQuals) {
|
protected void fillQualsFromPileup(final List<Allele> allAlleles,
|
||||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
|
final int refLoc,
|
||||||
final boolean matchesRef = ref.equals(alleleBin.getKey());
|
final ReadBackedPileup pileup,
|
||||||
final boolean matchesAlt = alts.contains(alleleBin.getKey());
|
final PerReadAlleleLikelihoodMap likelihoodMap, final List<Double> refQuals, final List<Double> altQuals) {
|
||||||
if ( !matchesRef && !matchesAlt )
|
// todo - only support non-pileup case for now, e.g. active-region based version
|
||||||
continue;
|
if (pileup != null)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (Map.Entry<PileupElement,Map<Allele,Double>> el : likelihoodMap.getLikelihoodReadMap().entrySet()) {
|
||||||
|
|
||||||
|
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
|
||||||
|
if (a.isNoCall())
|
||||||
|
continue; // read is non-informative
|
||||||
|
if (a.isReference())
|
||||||
|
refQuals.add((double)AlignmentUtils.getNumHardClippedBases(el.getKey().getRead()));
|
||||||
|
else if (allAlleles.contains(a))
|
||||||
|
altQuals.add((double)AlignmentUtils.getNumHardClippedBases(el.getKey().getRead()));
|
||||||
|
|
||||||
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
|
|
||||||
if ( matchesRef )
|
|
||||||
refQuals.add((double)AlignmentUtils.getNumHardClippedBases(read));
|
|
||||||
else
|
|
||||||
altQuals.add((double)AlignmentUtils.getNumHardClippedBases(read));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
}
|
||||||
return;
|
|
||||||
// This working implementation below needs to be tested for the UG pipeline
|
|
||||||
|
|
||||||
/*
|
|
||||||
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ?
|
|
||||||
HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
|
||||||
for (final PileupElement p: pileup) {
|
|
||||||
if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() != 0 && p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE) {
|
|
||||||
// retrieve likelihood information corresponding to this read
|
|
||||||
LinkedHashMap<Allele,Double> el = indelLikelihoodMap.get(p);
|
|
||||||
// by design, first element in LinkedHashMap was ref allele
|
|
||||||
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
for (Allele a : el.keySet()) {
|
|
||||||
|
|
||||||
if (a.isReference())
|
|
||||||
refLikelihood =el.get(a);
|
|
||||||
else {
|
|
||||||
double like = el.get(a);
|
|
||||||
if (like >= altLikelihood)
|
|
||||||
altLikelihood = like;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (refLikelihood > altLikelihood + INDEL_LIKELIHOOD_THRESH)
|
|
||||||
refQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead()));
|
|
||||||
else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH)
|
|
||||||
altQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
|
||||||
|
|
@ -38,28 +39,30 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
if ( stratifiedContexts.size() == 0 )
|
final AnnotatorCompatible walker,
|
||||||
return null;
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
|
||||||
|
|
||||||
int depth = 0;
|
int depth = 0;
|
||||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() )
|
if (stratifiedContexts != null) {
|
||||||
depth += sample.getValue().getBasePileup().depthOfCoverage();
|
if ( stratifiedContexts.size() == 0 )
|
||||||
Map<String, Object> map = new HashMap<String, Object>();
|
return null;
|
||||||
map.put(getKeyNames().get(0), String.format("%d", depth));
|
|
||||||
return map;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() )
|
||||||
if ( stratifiedContexts.size() == 0 )
|
depth += sample.getValue().getBasePileup().depthOfCoverage();
|
||||||
return null;
|
|
||||||
|
|
||||||
int depth = 0;
|
|
||||||
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
|
|
||||||
for ( final List<GATKSAMRecord> alleleBin : alleleBins.values() ) {
|
|
||||||
depth += alleleBin.size();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
else if (perReadAlleleLikelihoodMap != null) {
|
||||||
|
if ( perReadAlleleLikelihoodMap.size() == 0 )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
for ( Map.Entry<String, PerReadAlleleLikelihoodMap> sample : perReadAlleleLikelihoodMap.entrySet() )
|
||||||
|
depth += sample.getValue().getLikelihoodReadMap().size();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return null;
|
||||||
|
|
||||||
Map<String, Object> map = new HashMap<String, Object>();
|
Map<String, Object> map = new HashMap<String, Object>();
|
||||||
map.put(getKeyNames().get(0), String.format("%d", depth));
|
map.put(getKeyNames().get(0), String.format("%d", depth));
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,13 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
|
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) {
|
public void annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final AlignmentContext stratifiedContext,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Genotype g,
|
||||||
|
final GenotypeBuilder gb) {
|
||||||
if ( g == null || !g.isCalled() )
|
if ( g == null || !g.isCalled() )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
|
@ -54,21 +55,29 @@ import java.util.*;
|
||||||
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||||
private static final String FS = "FS";
|
private static final String FS = "FS";
|
||||||
private static final double MIN_PVALUE = 1E-320;
|
private static final double MIN_PVALUE = 1E-320;
|
||||||
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
if ( !vc.isVariant() )
|
if ( !vc.isVariant() )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
int[][] table;
|
int[][] table;
|
||||||
|
|
||||||
if ( vc.isSNP() )
|
if (stratifiedPerReadAlleleLikelihoodMap != null) {
|
||||||
|
table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount());
|
||||||
|
}
|
||||||
|
else if (vc.isSNP() && stratifiedContexts != null) {
|
||||||
table = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount());
|
table = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount());
|
||||||
else if ( vc.isIndel() || vc.isMixed() ) {
|
|
||||||
table = getIndelContingencyTable(stratifiedContexts);
|
|
||||||
if (table == null)
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
// for non-snp variants, we need per-read likelihoods.
|
||||||
|
// for snps, we can get same result from simple pileup
|
||||||
|
return null;
|
||||||
|
|
||||||
|
if (table == null)
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
Double pvalue = Math.max(pValueForContingencyTable(table), MIN_PVALUE);
|
Double pvalue = Math.max(pValueForContingencyTable(table), MIN_PVALUE);
|
||||||
|
|
@ -80,22 +89,6 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
|
||||||
if ( !vc.isVariant() )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
final int[][] table = getContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount());
|
|
||||||
|
|
||||||
final Double pvalue = Math.max(pValueForContingencyTable(table), MIN_PVALUE);
|
|
||||||
if ( pvalue == null )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
final Map<String, Object> map = new HashMap<String, Object>();
|
|
||||||
map.put(FS, String.format("%.3f", QualityUtils.phredScaleErrorRate(pvalue)));
|
|
||||||
return map;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getKeyNames() {
|
public List<String> getKeyNames() {
|
||||||
return Arrays.asList(FS);
|
return Arrays.asList(FS);
|
||||||
}
|
}
|
||||||
|
|
@ -161,7 +154,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
||||||
table[0][1] += 1;
|
table[0][1] += 1;
|
||||||
table[1][1] -= 1;
|
table[1][1] -= 1;
|
||||||
|
|
||||||
return (table[0][0] >= 0 && table[1][1] >= 0) ? true : false;
|
return (table[0][0] >= 0 && table[1][1] >= 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean unrotateTable(int[][] table) {
|
private static boolean unrotateTable(int[][] table) {
|
||||||
|
|
@ -171,7 +164,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
||||||
table[0][1] -= 1;
|
table[0][1] -= 1;
|
||||||
table[1][1] += 1;
|
table[1][1] += 1;
|
||||||
|
|
||||||
return (table[0][1] >= 0 && table[1][0] >= 0) ? true : false;
|
return (table[0][1] >= 0 && table[1][0] >= 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static double computePValue(int[][] table) {
|
private static double computePValue(int[][] table) {
|
||||||
|
|
@ -218,31 +211,29 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
||||||
* allele2 # #
|
* allele2 # #
|
||||||
* @return a 2x2 contingency table
|
* @return a 2x2 contingency table
|
||||||
*/
|
*/
|
||||||
private static int[][] getContingencyTable(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, Allele ref, Allele alt) {
|
private static int[][] getContingencyTable( final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
|
||||||
|
final Allele ref, final Allele alt) {
|
||||||
int[][] table = new int[2][2];
|
int[][] table = new int[2][2];
|
||||||
|
|
||||||
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
|
for (PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
|
||||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : alleleBins.entrySet() ) {
|
for (Map.Entry<PileupElement,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
|
||||||
|
final boolean matchesRef = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()).equals(ref);
|
||||||
|
final boolean matchesAlt = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()).equals(alt);
|
||||||
|
|
||||||
final boolean matchesRef = ref.equals(alleleBin.getKey());
|
|
||||||
final boolean matchesAlt = alt.equals(alleleBin.getKey());
|
|
||||||
if ( !matchesRef && !matchesAlt )
|
if ( !matchesRef && !matchesAlt )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
|
boolean isFW = el.getKey().getRead().getReadNegativeStrandFlag();
|
||||||
boolean isFW = read.getReadNegativeStrandFlag();
|
|
||||||
|
|
||||||
int row = matchesRef ? 0 : 1;
|
int row = matchesRef ? 0 : 1;
|
||||||
int column = isFW ? 0 : 1;
|
int column = isFW ? 0 : 1;
|
||||||
|
|
||||||
table[row][column]++;
|
table[row][column]++;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return table;
|
return table;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
|
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
|
||||||
* fw rc
|
* fw rc
|
||||||
|
|
@ -275,69 +266,5 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
||||||
return table;
|
return table;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
|
|
||||||
* fw rc
|
|
||||||
* allele1 # #
|
|
||||||
* allele2 # #
|
|
||||||
* @return a 2x2 contingency table
|
|
||||||
*/
|
|
||||||
private static int[][] getIndelContingencyTable(Map<String, AlignmentContext> stratifiedContexts) {
|
|
||||||
final double INDEL_LIKELIHOOD_THRESH = 0.3;
|
|
||||||
final HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
|
||||||
|
|
||||||
if (indelLikelihoodMap == null)
|
|
||||||
return null;
|
|
||||||
|
|
||||||
int[][] table = new int[2][2];
|
|
||||||
|
|
||||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
|
||||||
final AlignmentContext context = sample.getValue();
|
|
||||||
if ( context == null )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
final ReadBackedPileup pileup = context.getBasePileup();
|
|
||||||
for ( final PileupElement p : pileup ) {
|
|
||||||
if ( ! RankSumTest.isUsableBase(p, true) || p.getRead().isReducedRead() ) // ignore reduced reads
|
|
||||||
continue;
|
|
||||||
if ( indelLikelihoodMap.containsKey(p) ) {
|
|
||||||
// to classify a pileup element as ref or alt, we look at the likelihood associated with the allele associated to this element.
|
|
||||||
// A pileup element then has a list of pairs of form (Allele, likelihood of this allele).
|
|
||||||
// To classify a pileup element as Ref or Alt, we look at the likelihood of corresponding alleles.
|
|
||||||
// If likelihood of ref allele > highest likelihood of all alt alleles + epsilon, then this pileup element is "ref"
|
|
||||||
// otherwise if highest alt allele likelihood is > ref likelihood + epsilon, then this pileup element it "alt"
|
|
||||||
// retrieve likelihood information corresponding to this read
|
|
||||||
LinkedHashMap<Allele,Double> el = indelLikelihoodMap.get(p);
|
|
||||||
// by design, first element in LinkedHashMap was ref allele
|
|
||||||
boolean isFW = !p.getRead().getReadNegativeStrandFlag();
|
|
||||||
|
|
||||||
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
for (Map.Entry<Allele,Double> entry : el.entrySet()) {
|
|
||||||
|
|
||||||
if (entry.getKey().isReference())
|
|
||||||
refLikelihood = entry.getValue();
|
|
||||||
else {
|
|
||||||
double like = entry.getValue();
|
|
||||||
if (like >= altLikelihood)
|
|
||||||
altLikelihood = like;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean matchesRef = (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH));
|
|
||||||
boolean matchesAlt = (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH));
|
|
||||||
if ( matchesRef || matchesAlt ) {
|
|
||||||
int row = matchesRef ? 0 : 1;
|
|
||||||
int column = isFW ? 0 : 1;
|
|
||||||
|
|
||||||
table[row][column]++;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return table;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
|
@ -25,7 +26,12 @@ import java.util.Map;
|
||||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||||
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
double content = computeGCContent(ref);
|
double content = computeGCContent(ref);
|
||||||
Map<String, Object> map = new HashMap<String, Object>();
|
Map<String, Object> map = new HashMap<String, Object>();
|
||||||
map.put(getKeyNames().get(0), String.format("%.2f", content));
|
map.put(getKeyNames().get(0), String.format("%.2f", content));
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.Haplotype;
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
|
@ -60,7 +61,12 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
||||||
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
|
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
|
||||||
private final static char REGEXP_WILDCARD = '.';
|
private final static char REGEXP_WILDCARD = '.';
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here
|
if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
@ -88,7 +94,9 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
||||||
if (vc.isSNP())
|
if (vc.isSNP())
|
||||||
scoreRA.add(scoreReadsAgainstHaplotypes(haplotypes, thisPileup, contextSize, locus)); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
scoreRA.add(scoreReadsAgainstHaplotypes(haplotypes, thisPileup, contextSize, locus)); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
||||||
else if (vc.isIndel() || vc.isMixed()) {
|
else if (vc.isIndel() || vc.isMixed()) {
|
||||||
Double d = scoreIndelsAgainstHaplotypes(thisPileup);
|
if (stratifiedPerReadAlleleLikelihoodMap == null)
|
||||||
|
return null;
|
||||||
|
Double d = scoreIndelsAgainstHaplotypes(stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName()));
|
||||||
if (d == null)
|
if (d == null)
|
||||||
return null;
|
return null;
|
||||||
scoreRA.add(d); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
scoreRA.add(d); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense
|
||||||
|
|
@ -177,7 +185,6 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
||||||
|
|
||||||
private Haplotype getHaplotypeFromRead(final PileupElement p, final int contextSize, final int locus) {
|
private Haplotype getHaplotypeFromRead(final PileupElement p, final int contextSize, final int locus) {
|
||||||
final GATKSAMRecord read = p.getRead();
|
final GATKSAMRecord read = p.getRead();
|
||||||
int readOffsetFromPileup = p.getOffset();
|
|
||||||
|
|
||||||
final byte[] haplotypeBases = new byte[contextSize];
|
final byte[] haplotypeBases = new byte[contextSize];
|
||||||
Arrays.fill(haplotypeBases, (byte) REGEXP_WILDCARD);
|
Arrays.fill(haplotypeBases, (byte) REGEXP_WILDCARD);
|
||||||
|
|
@ -189,7 +196,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
||||||
byte[] readQuals = read.getBaseQualities();
|
byte[] readQuals = read.getBaseQualities();
|
||||||
readQuals = AlignmentUtils.readToAlignmentByteArray(read.getCigar(), readQuals); // Shift the location of the qual scores based on the Cigar string
|
readQuals = AlignmentUtils.readToAlignmentByteArray(read.getCigar(), readQuals); // Shift the location of the qual scores based on the Cigar string
|
||||||
|
|
||||||
readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(read.getCigar(), p, read.getAlignmentStart(), locus);
|
final int readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(read.getCigar(), p, read.getAlignmentStart(), locus);
|
||||||
final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1) / 2;
|
final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1) / 2;
|
||||||
|
|
||||||
for (int i = 0; i < contextSize; i++) {
|
for (int i = 0; i < contextSize; i++) {
|
||||||
|
|
@ -346,31 +353,26 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Double scoreIndelsAgainstHaplotypes(final ReadBackedPileup pileup) {
|
private Double scoreIndelsAgainstHaplotypes(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap) {
|
||||||
final ArrayList<double[]> haplotypeScores = new ArrayList<double[]>();
|
final ArrayList<double[]> haplotypeScores = new ArrayList<double[]>();
|
||||||
|
|
||||||
final HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
if (perReadAlleleLikelihoodMap.isEmpty())
|
||||||
|
|
||||||
if (indelLikelihoodMap == null)
|
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
for (final PileupElement p : pileup) {
|
for (Map.Entry<PileupElement,Map<Allele,Double>> el : perReadAlleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
|
||||||
if (indelLikelihoodMap.containsKey(p)) {
|
|
||||||
// retrieve likelihood information corresponding to this read
|
|
||||||
LinkedHashMap<Allele, Double> el = indelLikelihoodMap.get(p);
|
|
||||||
|
|
||||||
// Score all the reads in the pileup, even the filtered ones
|
// retrieve likelihood information corresponding to this read
|
||||||
final double[] scores = new double[el.size()];
|
// Score all the reads in the pileup, even the filtered ones
|
||||||
int i = 0;
|
final double[] scores = new double[el.getValue().size()];
|
||||||
for (Map.Entry<Allele, Double> a : el.entrySet()) {
|
int i = 0;
|
||||||
scores[i++] = -a.getValue();
|
for (Map.Entry<Allele, Double> a : el.getValue().entrySet()) {
|
||||||
if (DEBUG) {
|
scores[i++] = -a.getValue();
|
||||||
System.out.printf(" vs. haplotype %d = %f%n", i - 1, scores[i - 1]);
|
if (DEBUG) {
|
||||||
}
|
System.out.printf(" vs. haplotype %d = %f%n", i - 1, scores[i - 1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
haplotypeScores.add(scores);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
haplotypeScores.add(scores);
|
||||||
}
|
}
|
||||||
|
|
||||||
// indel likelihoods are strict log-probs, not phred scored
|
// indel likelihoods are strict log-probs, not phred scored
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
|
@ -29,7 +30,12 @@ public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgress
|
||||||
private static final int MIN_GENOTYPE_QUALITY = 10;
|
private static final int MIN_GENOTYPE_QUALITY = 10;
|
||||||
private static final int MIN_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10;
|
private static final int MIN_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10;
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
|
|
||||||
final GenotypesContext genotypes = vc.getGenotypes();
|
final GenotypesContext genotypes = vc.getGenotypes();
|
||||||
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
|
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
|
@ -22,7 +23,12 @@ public class HomopolymerRun extends InfoFieldAnnotation {
|
||||||
|
|
||||||
private boolean ANNOTATE_INDELS = true;
|
private boolean ANNOTATE_INDELS = true;
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
|
|
||||||
if ( !vc.isBiallelic() )
|
if ( !vc.isBiallelic() )
|
||||||
return null;
|
return null;
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
|
@ -33,17 +34,18 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
|
||||||
private static final int MIN_SAMPLES = 10;
|
private static final int MIN_SAMPLES = 10;
|
||||||
private Set<String> founderIds;
|
private Set<String> founderIds;
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
|
||||||
//If available, get the founder IDs and cache them. the IC will only be computed on founders then.
|
//If available, get the founder IDs and cache them. the IC will only be computed on founders then.
|
||||||
if(founderIds == null)
|
if(founderIds == null && walker != null)
|
||||||
founderIds = ((Walker)walker).getSampleDB().getFounderIds();
|
founderIds = ((Walker)walker).getSampleDB().getFounderIds();
|
||||||
return calculateIC(vc);
|
return calculateIC(vc);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
|
||||||
return calculateIC(vc);
|
|
||||||
}
|
|
||||||
|
|
||||||
private Map<String, Object> calculateIC(final VariantContext vc) {
|
private Map<String, Object> calculateIC(final VariantContext vc) {
|
||||||
final GenotypesContext genotypes = (founderIds == null || founderIds.isEmpty()) ? vc.getGenotypes() : vc.getGenotypes(founderIds);
|
final GenotypesContext genotypes = (founderIds == null || founderIds.isEmpty()) ? vc.getGenotypes() : vc.getGenotypes(founderIds);
|
||||||
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
|
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.IndelUtils;
|
import org.broadinstitute.sting.utils.IndelUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
|
@ -18,9 +19,14 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
|
|
||||||
int run;
|
int run;
|
||||||
if (vc.isMixed()) {
|
if (vc.isMixed()) {
|
||||||
Map<String, Object> map = new HashMap<String, Object>();
|
Map<String, Object> map = new HashMap<String, Object>();
|
||||||
map.put(getKeyNames().get(0), String.format("%s", "MIXED"));
|
map.put(getKeyNames().get(0), String.format("%s", "MIXED"));
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
|
@ -21,7 +22,12 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
public class LowMQ extends InfoFieldAnnotation {
|
public class LowMQ extends InfoFieldAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
if ( stratifiedContexts.size() == 0 )
|
if ( stratifiedContexts.size() == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
|
@ -32,7 +33,12 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment
|
||||||
private String fatherId;
|
private String fatherId;
|
||||||
private String childId;
|
private String childId;
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
if ( mendelianViolation == null ) {
|
if ( mendelianViolation == null ) {
|
||||||
if (checkAndSetSamples(((Walker) walker).getSampleDB())) {
|
if (checkAndSetSamples(((Walker) walker).getSampleDB())) {
|
||||||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
||||||
|
|
|
||||||
|
|
@ -2,11 +2,13 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
|
||||||
|
|
@ -23,60 +25,39 @@ public class MappingQualityRankSumTest extends RankSumTest implements StandardAn
|
||||||
|
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MQRankSum", 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities")); }
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MQRankSum", 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities")); }
|
||||||
|
|
||||||
protected void fillQualsFromPileup(byte ref, List<Byte> alts, ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
protected void fillQualsFromPileup(final List<Allele> allAlleles,
|
||||||
for ( final PileupElement p : pileup ) {
|
final int refLoc,
|
||||||
if ( isUsableBase(p) ) {
|
final ReadBackedPileup pileup,
|
||||||
if ( p.getBase() == ref ) {
|
final PerReadAlleleLikelihoodMap likelihoodMap,
|
||||||
refQuals.add((double)p.getMappingQual());
|
final List<Double> refQuals, final List<Double> altQuals) {
|
||||||
} else if ( alts.contains(p.getBase()) ) {
|
|
||||||
altQuals.add((double)p.getMappingQual());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final int refLoc, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, final List<Double> altQuals) {
|
if (pileup != null && likelihoodMap == null) {
|
||||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
|
// no per-read likelihoods available:
|
||||||
final boolean matchesRef = ref.equals(alleleBin.getKey());
|
for ( final PileupElement p : pileup ) {
|
||||||
final boolean matchesAlt = alts.contains(alleleBin.getKey());
|
if ( isUsableBase(p) ) {
|
||||||
if ( !matchesRef && !matchesAlt )
|
if ( allAlleles.get(0).equals(Allele.create(p.getBase())) ) {
|
||||||
continue;
|
refQuals.add((double)p.getMappingQual());
|
||||||
|
} else if ( allAlleles.contains(Allele.create(p.getBase()))) {
|
||||||
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
|
altQuals.add((double)p.getMappingQual());
|
||||||
if ( matchesRef )
|
|
||||||
refQuals.add((double)read.getMappingQuality());
|
|
||||||
else
|
|
||||||
altQuals.add((double)read.getMappingQuality());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
|
||||||
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ?
|
|
||||||
HashMap<PileupElement,LinkedHashMap<Allele,Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
|
||||||
for (final PileupElement p: pileup) {
|
|
||||||
if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() != 0 && p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE) {
|
|
||||||
// retrieve likelihood information corresponding to this read
|
|
||||||
LinkedHashMap<Allele,Double> el = indelLikelihoodMap.get(p);
|
|
||||||
// by design, first element in LinkedHashMap was ref allele
|
|
||||||
double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
for (Map.Entry<Allele,Double> a : el.entrySet()) {
|
|
||||||
|
|
||||||
if (a.getKey().isReference())
|
|
||||||
refLikelihood = a.getValue();
|
|
||||||
else {
|
|
||||||
double like = a.getValue();
|
|
||||||
if (like >= altLikelihood)
|
|
||||||
altLikelihood = like;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (refLikelihood > altLikelihood + INDEL_LIKELIHOOD_THRESH)
|
|
||||||
refQuals.add((double)p.getMappingQual());
|
|
||||||
else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH)
|
|
||||||
altQuals.add((double)p.getMappingQual());
|
|
||||||
}
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (Map.Entry<PileupElement,Map<Allele,Double>> el : likelihoodMap.getLikelihoodReadMap().entrySet()) {
|
||||||
|
if (!isUsableBase(el.getKey()))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
|
||||||
|
if (a.isNoCall())
|
||||||
|
continue; // read is non-informative
|
||||||
|
if (a.isReference())
|
||||||
|
refQuals.add((double)el.getKey().getMappingQual());
|
||||||
|
else if (allAlleles.contains(a))
|
||||||
|
altQuals.add((double)el.getKey().getMappingQual());
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
|
||||||
|
|
@ -24,7 +25,12 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation {
|
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
if ( stratifiedContexts.size() == 0 )
|
if ( stratifiedContexts.size() == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
|
@ -22,7 +23,12 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
if ( stratifiedContexts.size() == 0 )
|
if ( stratifiedContexts.size() == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
|
@ -20,7 +21,12 @@ import java.util.Map;
|
||||||
* The number of N bases, counting only SOLiD data
|
* The number of N bases, counting only SOLiD data
|
||||||
*/
|
*/
|
||||||
public class NBaseCount extends InfoFieldAnnotation {
|
public class NBaseCount extends InfoFieldAnnotation {
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
if( stratifiedContexts.size() == 0 )
|
if( stratifiedContexts.size() == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
@ -28,14 +29,24 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
if ( !vc.hasLog10PError() || stratifiedContexts.size() == 0 )
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
|
||||||
|
if ( !vc.hasLog10PError() )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
final GenotypesContext genotypes = vc.getGenotypes();
|
final GenotypesContext genotypes = vc.getGenotypes();
|
||||||
if ( genotypes == null || genotypes.size() == 0 )
|
if ( genotypes == null || genotypes.size() == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
if (stratifiedContexts != null && stratifiedContexts.size() == 0)
|
||||||
|
return null;
|
||||||
|
if (perReadAlleleLikelihoodMap != null && perReadAlleleLikelihoodMap.size() == 0)
|
||||||
|
return null;
|
||||||
|
|
||||||
int depth = 0;
|
int depth = 0;
|
||||||
|
|
||||||
for ( final Genotype genotype : genotypes ) {
|
for ( final Genotype genotype : genotypes ) {
|
||||||
|
|
@ -44,11 +55,20 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
||||||
if ( !genotype.isHet() && !genotype.isHomVar() )
|
if ( !genotype.isHet() && !genotype.isHomVar() )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
if (stratifiedContexts!= null) {
|
||||||
if ( context == null )
|
AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||||
continue;
|
if ( context == null )
|
||||||
|
continue;
|
||||||
|
depth += context.getBasePileup().depthOfCoverage();
|
||||||
|
|
||||||
depth += context.getBasePileup().depthOfCoverage();
|
}
|
||||||
|
else if (perReadAlleleLikelihoodMap != null) {
|
||||||
|
PerReadAlleleLikelihoodMap perReadAlleleLikelihoods = perReadAlleleLikelihoodMap.get(genotype.getSampleName());
|
||||||
|
if (perReadAlleleLikelihoods == null || perReadAlleleLikelihoods.isEmpty())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
depth += perReadAlleleLikelihoods.getLikelihoodReadMap().size();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( depth == 0 )
|
if ( depth == 0 )
|
||||||
|
|
@ -67,39 +87,5 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
||||||
return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth"));
|
return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
|
||||||
if ( stratifiedContexts.size() == 0 )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
final GenotypesContext genotypes = vc.getGenotypes();
|
|
||||||
if ( genotypes == null || genotypes.size() == 0 )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
int depth = 0;
|
|
||||||
|
|
||||||
for ( final Genotype genotype : genotypes ) {
|
|
||||||
|
|
||||||
// we care only about variant calls with likelihoods
|
|
||||||
if ( !genotype.isHet() && !genotype.isHomVar() )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
final Map<Allele, List<GATKSAMRecord>> alleleBins = stratifiedContexts.get(genotype.getSampleName());
|
|
||||||
if ( alleleBins == null )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : alleleBins.entrySet() ) {
|
|
||||||
depth += alleleBin.getValue().size();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( depth == 0 )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
double QD = -10.0 * vc.getLog10PError() / (double)depth;
|
|
||||||
|
|
||||||
Map<String, Object> map = new HashMap<String, Object>();
|
|
||||||
map.put(getKeyNames().get(0), String.format("%.2f", QD));
|
|
||||||
return map;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
|
|
@ -18,10 +19,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.*;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -29,25 +27,48 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
if ( stratifiedContexts.size() == 0 )
|
final AnnotatorCompatible walker,
|
||||||
return null;
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
|
||||||
|
int totalSize = 0, index = 0;
|
||||||
|
int qualities[];
|
||||||
|
if (stratifiedContexts != null) {
|
||||||
|
if ( stratifiedContexts.size() == 0 )
|
||||||
|
return null;
|
||||||
|
|
||||||
int totalSize = 0;
|
for ( AlignmentContext context : stratifiedContexts.values() )
|
||||||
for ( AlignmentContext context : stratifiedContexts.values() )
|
totalSize += context.size();
|
||||||
totalSize += context.size();
|
|
||||||
|
|
||||||
final int[] qualities = new int[totalSize];
|
qualities = new int[totalSize];
|
||||||
int index = 0;
|
|
||||||
|
|
||||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
||||||
AlignmentContext context = sample.getValue();
|
AlignmentContext context = sample.getValue();
|
||||||
final ReadBackedPileup pileup = context.getBasePileup();
|
for (PileupElement p : context.getBasePileup() )
|
||||||
for (PileupElement p : pileup ) {
|
index = fillMappingQualitiesFromPileupAndUpdateIndex(p, index, qualities);
|
||||||
if ( p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE )
|
|
||||||
qualities[index++] = p.getMappingQual();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (perReadAlleleLikelihoodMap != null) {
|
||||||
|
if ( perReadAlleleLikelihoodMap.size() == 0 )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
for ( PerReadAlleleLikelihoodMap perReadLikelihoods : perReadAlleleLikelihoodMap.values() )
|
||||||
|
totalSize += perReadLikelihoods.size();
|
||||||
|
|
||||||
|
qualities = new int[totalSize];
|
||||||
|
for ( PerReadAlleleLikelihoodMap perReadLikelihoods : perReadAlleleLikelihoodMap.values() ) {
|
||||||
|
for (PileupElement p : perReadLikelihoods.getStoredPileupElements())
|
||||||
|
index = fillMappingQualitiesFromPileupAndUpdateIndex(p, index, qualities);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return null;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
double rms = MathUtils.rms(qualities);
|
double rms = MathUtils.rms(qualities);
|
||||||
Map<String, Object> map = new HashMap<String, Object>();
|
Map<String, Object> map = new HashMap<String, Object>();
|
||||||
|
|
@ -55,32 +76,12 @@ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAn
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
private static int fillMappingQualitiesFromPileupAndUpdateIndex(final PileupElement p, final int inputIdx, final int[] qualities) {
|
||||||
if ( stratifiedContexts.size() == 0 )
|
int outputIdx = inputIdx;
|
||||||
return null;
|
if ( p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE )
|
||||||
|
qualities[outputIdx++] = p.getMappingQual();
|
||||||
|
|
||||||
int depth = 0;
|
return outputIdx;
|
||||||
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
|
|
||||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : alleleBins.entrySet() ) {
|
|
||||||
depth += alleleBin.getValue().size();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final int[] qualities = new int[depth];
|
|
||||||
int index = 0;
|
|
||||||
|
|
||||||
for ( final Map<Allele, List<GATKSAMRecord>> alleleBins : stratifiedContexts.values() ) {
|
|
||||||
for ( final List<GATKSAMRecord> reads : alleleBins.values() ) {
|
|
||||||
for ( final GATKSAMRecord read : reads ) {
|
|
||||||
if ( read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE )
|
|
||||||
qualities[index++] = read.getMappingQuality();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final Map<String, Object> map = new HashMap<String, Object>();
|
|
||||||
map.put(getKeyNames().get(0), String.format("%.2f", MathUtils.rms(qualities)));
|
|
||||||
return map;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.RMS_MAPPING_QUALITY_KEY); }
|
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.RMS_MAPPING_QUALITY_KEY); }
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.MannWhitneyU;
|
import org.broadinstitute.sting.utils.MannWhitneyU;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
|
@ -28,12 +29,15 @@ import java.util.Map;
|
||||||
* Abstract root for all RankSum based annotations
|
* Abstract root for all RankSum based annotations
|
||||||
*/
|
*/
|
||||||
public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveRegionBasedAnnotation {
|
public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveRegionBasedAnnotation {
|
||||||
static final double INDEL_LIKELIHOOD_THRESH = 0.1;
|
|
||||||
static final boolean DEBUG = false;
|
static final boolean DEBUG = false;
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
if (stratifiedContexts.size() == 0)
|
final AnnotatorCompatible walker,
|
||||||
return null;
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
|
// either stratifiedContexts or stratifiedPerReadAlleleLikelihoodMap has to be non-null
|
||||||
|
|
||||||
final GenotypesContext genotypes = vc.getGenotypes();
|
final GenotypesContext genotypes = vc.getGenotypes();
|
||||||
if (genotypes == null || genotypes.size() == 0)
|
if (genotypes == null || genotypes.size() == 0)
|
||||||
|
|
@ -42,39 +46,24 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR
|
||||||
final ArrayList<Double> refQuals = new ArrayList<Double>();
|
final ArrayList<Double> refQuals = new ArrayList<Double>();
|
||||||
final ArrayList<Double> altQuals = new ArrayList<Double>();
|
final ArrayList<Double> altQuals = new ArrayList<Double>();
|
||||||
|
|
||||||
if ( vc.isSNP() ) {
|
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
|
||||||
final List<Byte> altAlleles = new ArrayList<Byte>();
|
PerReadAlleleLikelihoodMap indelLikelihoodMap = null;
|
||||||
for ( final Allele a : vc.getAlternateAlleles() )
|
ReadBackedPileup pileup = null;
|
||||||
altAlleles.add(a.getBases()[0]);
|
if (stratifiedPerReadAlleleLikelihoodMap != null && !stratifiedPerReadAlleleLikelihoodMap.isEmpty()) {
|
||||||
|
indelLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName());
|
||||||
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
|
if (indelLikelihoodMap == null)
|
||||||
|
continue;
|
||||||
|
if (indelLikelihoodMap.isEmpty())
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (stratifiedContexts != null) {
|
||||||
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
||||||
if ( context == null )
|
if ( context == null )
|
||||||
continue;
|
continue;
|
||||||
|
pileup = context.getBasePileup();
|
||||||
fillQualsFromPileup(ref.getBase(), altAlleles, context.getBasePileup(), refQuals, altQuals);
|
|
||||||
}
|
}
|
||||||
} else if ( vc.isIndel() || vc.isMixed() ) {
|
fillQualsFromPileup(vc.getAlleles(), vc.getStart(), pileup, indelLikelihoodMap, refQuals, altQuals );
|
||||||
|
}
|
||||||
for (final Genotype genotype : genotypes.iterateInSampleNameOrder()) {
|
|
||||||
final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
|
|
||||||
if (context == null) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
final ReadBackedPileup pileup = context.getBasePileup();
|
|
||||||
if (pileup == null)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap() == null ||
|
|
||||||
IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().size() == 0)
|
|
||||||
return null;
|
|
||||||
|
|
||||||
fillIndelQualsFromPileup(pileup, refQuals, altQuals);
|
|
||||||
}
|
|
||||||
} else
|
|
||||||
return null;
|
|
||||||
|
|
||||||
final MannWhitneyU mannWhitneyU = new MannWhitneyU();
|
final MannWhitneyU mannWhitneyU = new MannWhitneyU();
|
||||||
for (final Double qual : altQuals) {
|
for (final Double qual : altQuals) {
|
||||||
mannWhitneyU.add(qual, MannWhitneyU.USet.SET1);
|
mannWhitneyU.add(qual, MannWhitneyU.USet.SET1);
|
||||||
|
|
@ -103,50 +92,12 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
protected abstract void fillQualsFromPileup(final List<Allele> alleles,
|
||||||
if (stratifiedContexts.size() == 0)
|
final int refLoc,
|
||||||
return null;
|
final ReadBackedPileup readBackedPileup,
|
||||||
|
final PerReadAlleleLikelihoodMap alleleLikelihoodMap,
|
||||||
final GenotypesContext genotypes = vc.getGenotypes();
|
final List<Double> refQuals,
|
||||||
if (genotypes == null || genotypes.size() == 0)
|
final List<Double> altQuals);
|
||||||
return null;
|
|
||||||
|
|
||||||
final ArrayList<Double> refQuals = new ArrayList<Double>();
|
|
||||||
final ArrayList<Double> altQuals = new ArrayList<Double>();
|
|
||||||
|
|
||||||
for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) {
|
|
||||||
final Map<Allele, List<GATKSAMRecord>> context = stratifiedContexts.get(genotype.getSampleName());
|
|
||||||
if ( context == null )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
fillQualsFromPileup(vc.getReference(), vc.getAlternateAlleles(), vc.getStart(), context, refQuals, altQuals);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( refQuals.size() == 0 || altQuals.size() == 0 )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
final MannWhitneyU mannWhitneyU = new MannWhitneyU();
|
|
||||||
for (final Double qual : altQuals) {
|
|
||||||
mannWhitneyU.add(qual, MannWhitneyU.USet.SET1);
|
|
||||||
}
|
|
||||||
for (final Double qual : refQuals) {
|
|
||||||
mannWhitneyU.add(qual, MannWhitneyU.USet.SET2);
|
|
||||||
}
|
|
||||||
|
|
||||||
// we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases)
|
|
||||||
final Pair<Double, Double> testResults = mannWhitneyU.runOneSidedTest(MannWhitneyU.USet.SET1);
|
|
||||||
|
|
||||||
final Map<String, Object> map = new HashMap<String, Object>();
|
|
||||||
if (!Double.isNaN(testResults.first))
|
|
||||||
map.put(getKeyNames().get(0), String.format("%.3f", testResults.first));
|
|
||||||
return map;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected abstract void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final int refLoc, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, List<Double> altQuals);
|
|
||||||
|
|
||||||
protected abstract void fillQualsFromPileup(final byte ref, final List<Byte> alts, final ReadBackedPileup pileup, final List<Double> refQuals, final List<Double> altQuals);
|
|
||||||
|
|
||||||
protected abstract void fillIndelQualsFromPileup(final ReadBackedPileup pileup, final List<Double> refQuals, final List<Double> altQuals);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Can the base in this pileup element be used in comparative tests between ref / alt bases?
|
* Can the base in this pileup element be used in comparative tests between ref / alt bases?
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
|
@ -32,98 +33,55 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
|
||||||
return Arrays.asList(new VCFInfoHeaderLine("ReadPosRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"));
|
return Arrays.asList(new VCFInfoHeaderLine("ReadPosRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void fillQualsFromPileup(byte ref, List<Byte> alts, ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
protected void fillQualsFromPileup(final List<Allele> allAlleles,
|
||||||
for (final PileupElement p : pileup) {
|
final int refLoc,
|
||||||
if (isUsableBase(p)) {
|
final ReadBackedPileup pileup,
|
||||||
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0);
|
final PerReadAlleleLikelihoodMap alleleLikelihoodMap,
|
||||||
final int numAlignedBases = AlignmentUtils.getNumAlignedBases(p.getRead());
|
final List<Double> refQuals, final List<Double> altQuals) {
|
||||||
if (readPos > numAlignedBases / 2)
|
|
||||||
readPos = numAlignedBases - (readPos + 1);
|
|
||||||
|
|
||||||
|
if (alleleLikelihoodMap == null) {
|
||||||
|
// use fast SNP-based version if we don't have per-read allele likelihoods
|
||||||
|
for ( final PileupElement p : pileup ) {
|
||||||
|
if ( isUsableBase(p) ) {
|
||||||
|
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0);
|
||||||
|
|
||||||
if ( p.getBase() == ref )
|
readPos = getFinalReadPosition(p.getRead(),readPos);
|
||||||
refQuals.add((double) readPos);
|
|
||||||
else if ( alts.contains(p.getBase()) )
|
|
||||||
altQuals.add((double) readPos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void fillQualsFromPileup(final Allele ref, final List<Allele> alts, final int refLoc, final Map<Allele, List<GATKSAMRecord>> stratifiedContext, final List<Double> refQuals, final List<Double> altQuals) {
|
if ( allAlleles.get(0).equals(Allele.create(p.getBase())) ) {
|
||||||
for ( final Map.Entry<Allele, List<GATKSAMRecord>> alleleBin : stratifiedContext.entrySet() ) {
|
refQuals.add((double)readPos);
|
||||||
final boolean matchesRef = ref.equals(alleleBin.getKey());
|
} else if ( allAlleles.contains(Allele.create(p.getBase()))) {
|
||||||
final boolean matchesAlt = alts.contains(alleleBin.getKey());
|
altQuals.add((double)readPos);
|
||||||
if ( !matchesRef && !matchesAlt )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
for ( final GATKSAMRecord read : alleleBin.getValue() ) {
|
|
||||||
final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true );
|
|
||||||
if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED )
|
|
||||||
continue;
|
|
||||||
int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, false, 0, 0 );
|
|
||||||
|
|
||||||
final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read );
|
|
||||||
if (readPos > numAlignedBases / 2)
|
|
||||||
readPos = numAlignedBases - (readPos + 1);
|
|
||||||
|
|
||||||
if ( matchesRef )
|
|
||||||
refQuals.add((double) readPos);
|
|
||||||
else
|
|
||||||
altQuals.add((double) readPos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List<Double> refQuals, List<Double> altQuals) {
|
|
||||||
// equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele
|
|
||||||
// to classify a pileup element as ref or alt, we look at the likelihood associated with the allele associated to this element.
|
|
||||||
// A pileup element then has a list of pairs of form (Allele, likelihood of this allele).
|
|
||||||
// To classify a pileup element as Ref or Alt, we look at the likelihood of corresponding alleles.
|
|
||||||
// If likelihood of ref allele > highest likelihood of all alt alleles + epsilon, then this pielup element is "ref"
|
|
||||||
// otherwise if highest alt allele likelihood is > ref likelihood + epsilon, then this pileup element it "alt"
|
|
||||||
final HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
|
||||||
for (final PileupElement p : pileup) {
|
|
||||||
if (indelLikelihoodMap.containsKey(p)) {
|
|
||||||
LinkedHashMap<Allele, Double> el = indelLikelihoodMap.get(p); // retrieve likelihood information corresponding to this read
|
|
||||||
double refLikelihood = 0.0, altLikelihood = Double.NEGATIVE_INFINITY; // by design, first element in LinkedHashMap was ref allele
|
|
||||||
|
|
||||||
for (Map.Entry<Allele,Double> a : el.entrySet()) {
|
|
||||||
if (a.getKey().isReference())
|
|
||||||
refLikelihood = a.getValue();
|
|
||||||
else {
|
|
||||||
double like = a.getValue();
|
|
||||||
if (like >= altLikelihood)
|
|
||||||
altLikelihood = like;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int readPos = getOffsetFromClippedReadStart(p.getRead(), p.getOffset());
|
|
||||||
final int numAlignedBases = getNumAlignedBases(p.getRead());
|
|
||||||
|
|
||||||
if (readPos > numAlignedBases / 2) {
|
|
||||||
readPos = numAlignedBases - (readPos + 1);
|
|
||||||
}
|
|
||||||
//if (DEBUG) System.out.format("R:%s start:%d C:%s offset:%d rp:%d readPos:%d alignedB:%d\n",p.getRead().getReadName(),p.getRead().getAlignmentStart(),p.getRead().getCigarString(),p.getOffset(), rp, readPos, numAlignedBases);
|
|
||||||
|
|
||||||
|
|
||||||
// if event is beyond span of read just return and don't consider this element. This can happen, for example, with reads
|
|
||||||
// where soft clipping still left strings of low quality bases but these are later removed by indel-specific clipping.
|
|
||||||
// if (readPos < -1)
|
|
||||||
// return;
|
|
||||||
if (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH)) {
|
|
||||||
refQuals.add((double) readPos);
|
|
||||||
//if (DEBUG) System.out.format("REF like: %4.1f, pos: %d\n",refLikelihood,readPos);
|
|
||||||
} else if (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH)) {
|
|
||||||
altQuals.add((double) readPos);
|
|
||||||
//if (DEBUG) System.out.format("ALT like: %4.1f, pos: %d\n",refLikelihood,readPos);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Map.Entry<PileupElement,Map<Allele,Double>> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
|
||||||
|
int readPos = getOffsetFromClippedReadStart(el.getKey().getRead(), el.getKey().getOffset());
|
||||||
|
readPos = getFinalReadPosition(el.getKey().getRead(),readPos);
|
||||||
|
|
||||||
|
final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
|
||||||
|
if (a.isNoCall())
|
||||||
|
continue; // read is non-informative
|
||||||
|
if (a.isReference())
|
||||||
|
refQuals.add((double)readPos);
|
||||||
|
else if (allAlleles.contains(a))
|
||||||
|
altQuals.add((double)readPos);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int getFinalReadPosition(GATKSAMRecord read, int initialReadPosition) {
|
||||||
|
final int numAlignedBases = getNumAlignedBases(read);
|
||||||
|
|
||||||
|
int readPos = initialReadPosition;
|
||||||
|
if (initialReadPosition > numAlignedBases / 2) {
|
||||||
|
readPos = numAlignedBases - (initialReadPosition + 1);
|
||||||
|
}
|
||||||
|
return readPos;
|
||||||
|
|
||||||
|
}
|
||||||
int getNumClippedBasesAtStart(SAMRecord read) {
|
int getNumClippedBasesAtStart(SAMRecord read) {
|
||||||
// compute total number of clipped bases (soft or hard clipped)
|
// compute total number of clipped bases (soft or hard clipped)
|
||||||
// check for hard clips (never consider these bases):
|
// check for hard clips (never consider these bases):
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
|
@ -46,7 +47,12 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
public class SampleList extends InfoFieldAnnotation {
|
public class SampleList extends InfoFieldAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
if ( vc.isMonomorphicInSamples() || !vc.hasGenotypes() )
|
if ( vc.isMonomorphicInSamples() || !vc.hasGenotypes() )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -225,7 +226,12 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
||||||
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
|
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
|
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
|
||||||
|
|
||||||
// Get only SnpEff records that start at this locus, not merely span it:
|
// Get only SnpEff records that start at this locus, not merely span it:
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
@ -22,7 +23,12 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
|
public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
if ( stratifiedContexts.size() == 0 )
|
if ( stratifiedContexts.size() == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
|
@ -47,7 +48,12 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
|
||||||
private static final String STR_PRESENT = "STR";
|
private static final String STR_PRESENT = "STR";
|
||||||
private static final String REPEAT_UNIT_KEY = "RU";
|
private static final String REPEAT_UNIT_KEY = "RU";
|
||||||
private static final String REPEATS_PER_ALLELE_KEY = "RPA";
|
private static final String REPEATS_PER_ALLELE_KEY = "RPA";
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
if ( !vc.isIndel())
|
if ( !vc.isIndel())
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
|
@ -28,7 +29,12 @@ public class TechnologyComposition extends InfoFieldAnnotation implements Experi
|
||||||
private String n454 ="Num454";
|
private String n454 ="Num454";
|
||||||
private String nSolid = "NumSOLiD";
|
private String nSolid = "NumSOLiD";
|
||||||
private String nOther = "NumOther";
|
private String nOther = "NumOther";
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
if ( stratifiedContexts.size() == 0 )
|
if ( stratifiedContexts.size() == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
|
|
@ -28,7 +29,12 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
|
||||||
private Set<Sample> trios = null;
|
private Set<Sample> trios = null;
|
||||||
private final static int MIN_NUM_VALID_TRIOS = 5; // don't calculate this population-level statistic if there are less than X trios with full genotype likelihood information
|
private final static int MIN_NUM_VALID_TRIOS = 5; // don't calculate this population-level statistic if there are less than X trios with full genotype likelihood information
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
if ( trios == null ) {
|
if ( trios == null ) {
|
||||||
if ( walker instanceof VariantAnnotator ) {
|
if ( walker instanceof VariantAnnotator ) {
|
||||||
trios = ((VariantAnnotator) walker).getSampleDB().getChildrenWithParents();
|
trios = ((VariantAnnotator) walker).getSampleDB().getChildrenWithParents();
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
@ -178,7 +179,18 @@ public class VariantAnnotatorEngine {
|
||||||
this.requireStrictAlleleMatch = requireStrictAlleleMatch;
|
this.requireStrictAlleleMatch = requireStrictAlleleMatch;
|
||||||
}
|
}
|
||||||
|
|
||||||
public VariantContext annotateContext(final RefMetaDataTracker tracker, final ReferenceContext ref, final Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public VariantContext annotateContext(final RefMetaDataTracker tracker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
VariantContext vc) {
|
||||||
|
return annotateContext(tracker, ref, stratifiedContexts, vc, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public VariantContext annotateContext(final RefMetaDataTracker tracker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
VariantContext vc,
|
||||||
|
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||||
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
|
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
|
||||||
|
|
||||||
// annotate db occurrences
|
// annotate db occurrences
|
||||||
|
|
@ -189,7 +201,7 @@ public class VariantAnnotatorEngine {
|
||||||
|
|
||||||
// go through all the requested info annotationTypes
|
// go through all the requested info annotationTypes
|
||||||
for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
|
for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
|
||||||
Map<String, Object> annotationsFromCurrentType = annotationType.annotate(tracker, walker, ref, stratifiedContexts, vc);
|
Map<String, Object> annotationsFromCurrentType = annotationType.annotate(tracker, walker, ref, stratifiedContexts, vc, perReadAlleleLikelihoodMap);
|
||||||
if ( annotationsFromCurrentType != null )
|
if ( annotationsFromCurrentType != null )
|
||||||
infoAnnotations.putAll(annotationsFromCurrentType);
|
infoAnnotations.putAll(annotationsFromCurrentType);
|
||||||
}
|
}
|
||||||
|
|
@ -201,7 +213,7 @@ public class VariantAnnotatorEngine {
|
||||||
return builder.genotypes(annotateGenotypes(tracker, ref, stratifiedContexts, vc)).make();
|
return builder.genotypes(annotateGenotypes(tracker, ref, stratifiedContexts, vc)).make();
|
||||||
}
|
}
|
||||||
|
|
||||||
public VariantContext annotateContext(final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
public VariantContext annotateContext(final Map<String, PerReadAlleleLikelihoodMap> stratifiedContexts, VariantContext vc) {
|
||||||
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
|
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
|
||||||
|
|
||||||
// go through all the requested info annotationTypes
|
// go through all the requested info annotationTypes
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
|
@ -10,8 +11,8 @@ import java.util.Map;
|
||||||
|
|
||||||
// TODO -- make this an abstract class when we move away from InfoFieldAnnotation
|
// TODO -- make this an abstract class when we move away from InfoFieldAnnotation
|
||||||
public interface ActiveRegionBasedAnnotation extends AnnotationType {
|
public interface ActiveRegionBasedAnnotation extends AnnotationType {
|
||||||
// return annotations for the given contexts split by sample and then allele
|
// return annotations for the given contexts split by sample and then read likelihoof
|
||||||
public abstract Map<String, Object> annotate(final Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, final VariantContext vc);
|
public abstract Map<String, Object> annotate(final Map<String,PerReadAlleleLikelihoodMap> stratifiedContexts, final VariantContext vc);
|
||||||
|
|
||||||
// return the descriptions used for the VCF INFO meta field
|
// return the descriptions used for the VCF INFO meta field
|
||||||
public abstract List<VCFInfoHeaderLine> getDescriptions();
|
public abstract List<VCFInfoHeaderLine> getDescriptions();
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
|
@ -11,8 +12,25 @@ import java.util.Map;
|
||||||
|
|
||||||
public abstract class InfoFieldAnnotation extends VariantAnnotatorAnnotation {
|
public abstract class InfoFieldAnnotation extends VariantAnnotatorAnnotation {
|
||||||
// return annotations for the given contexts split by sample
|
// return annotations for the given contexts split by sample
|
||||||
public abstract Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker,
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc);
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc) {
|
||||||
|
return annotate(tracker, walker, ref, stratifiedContexts, vc, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, Object> annotate(Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap, VariantContext vc) {
|
||||||
|
return annotate(null, null, null, null, vc, perReadAlleleLikelihoodMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public abstract Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
final AnnotatorCompatible walker,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap);
|
||||||
|
|
||||||
// return the descriptions used for the VCF INFO meta field
|
// return the descriptions used for the VCF INFO meta field
|
||||||
public abstract List<VCFInfoHeaderLine> getDescriptions();
|
public abstract List<VCFInfoHeaderLine> getDescriptions();
|
||||||
|
|
|
||||||
|
|
@ -103,7 +103,8 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
||||||
final AlignmentContextUtils.ReadOrientation contextType,
|
final AlignmentContextUtils.ReadOrientation contextType,
|
||||||
final List<Allele> allAllelesToUse,
|
final List<Allele> allAllelesToUse,
|
||||||
final boolean useBAQedPileup,
|
final boolean useBAQedPileup,
|
||||||
final GenomeLocParser locParser);
|
final GenomeLocParser locParser,
|
||||||
|
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap);
|
||||||
|
|
||||||
|
|
||||||
protected int getFilteredDepth(ReadBackedPileup pileup) {
|
protected int getFilteredDepth(ReadBackedPileup pileup) {
|
||||||
|
|
@ -115,4 +116,5 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
||||||
|
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -48,24 +48,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
|
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
|
||||||
private PairHMMIndelErrorModel pairModel;
|
private PairHMMIndelErrorModel pairModel;
|
||||||
|
|
||||||
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
|
|
||||||
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
|
|
||||||
protected synchronized HashMap<PileupElement, LinkedHashMap<Allele, Double>> initialValue() {
|
|
||||||
return new HashMap<PileupElement, LinkedHashMap<Allele, Double>>();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
private LinkedHashMap<Allele, Haplotype> haplotypeMap;
|
private LinkedHashMap<Allele, Haplotype> haplotypeMap;
|
||||||
|
|
||||||
// gdebug removeme
|
|
||||||
// todo -cleanup
|
|
||||||
private GenomeLoc lastSiteVisited;
|
|
||||||
private List<Allele> alleleList = new ArrayList<Allele>();
|
private List<Allele> alleleList = new ArrayList<Allele>();
|
||||||
|
|
||||||
static {
|
|
||||||
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||||
super(UAC, logger);
|
super(UAC, logger);
|
||||||
|
|
@ -93,16 +80,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
final AlignmentContextUtils.ReadOrientation contextType,
|
final AlignmentContextUtils.ReadOrientation contextType,
|
||||||
final List<Allele> allAllelesToUse,
|
final List<Allele> allAllelesToUse,
|
||||||
final boolean useBAQedPileup,
|
final boolean useBAQedPileup,
|
||||||
final GenomeLocParser locParser) {
|
final GenomeLocParser locParser,
|
||||||
|
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||||
|
|
||||||
GenomeLoc loc = ref.getLocus();
|
GenomeLoc loc = ref.getLocus();
|
||||||
// if (!ref.getLocus().equals(lastSiteVisited)) {
|
// if (!ref.getLocus().equals(lastSiteVisited)) {
|
||||||
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
|
if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) {
|
||||||
// starting a new site: clear allele list
|
// starting a new site: clear allele list
|
||||||
lastSiteVisited = ref.getLocus();
|
|
||||||
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
|
|
||||||
haplotypeMap.clear();
|
haplotypeMap.clear();
|
||||||
|
perReadAlleleLikelihoodMap.clear(); // clean mapping sample-> per read, per allele likelihoods
|
||||||
alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
|
alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
|
||||||
if (alleleList.isEmpty())
|
if (alleleList.isEmpty())
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -130,10 +116,14 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
|
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
|
||||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||||
|
|
||||||
|
if (!perReadAlleleLikelihoodMap.containsKey(sample.getKey())){
|
||||||
|
// no likelihoods have been computed for this sample at this site
|
||||||
|
perReadAlleleLikelihoodMap.put(sample.getKey(), new PerReadAlleleLikelihoodMap());
|
||||||
|
}
|
||||||
final ReadBackedPileup pileup = context.getBasePileup();
|
final ReadBackedPileup pileup = context.getBasePileup();
|
||||||
if (pileup != null) {
|
if (pileup != null) {
|
||||||
final GenotypeBuilder b = new GenotypeBuilder(sample.getKey());
|
final GenotypeBuilder b = new GenotypeBuilder(sample.getKey());
|
||||||
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
|
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey()));
|
||||||
b.PL(genotypeLikelihoods);
|
b.PL(genotypeLikelihoods);
|
||||||
b.DP(getFilteredDepth(pileup));
|
b.DP(getFilteredDepth(pileup));
|
||||||
genotypes.add(b.make());
|
genotypes.add(b.make());
|
||||||
|
|
@ -150,10 +140,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
return builder.genotypes(genotypes).make();
|
return builder.genotypes(genotypes).make();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static HashMap<PileupElement, LinkedHashMap<Allele, Double>> getIndelLikelihoodMap() {
|
|
||||||
return indelLikelihoodMap.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void getHaplotypeMapFromAlleles(final List<Allele> alleleList,
|
public static void getHaplotypeMapFromAlleles(final List<Allele> alleleList,
|
||||||
final ReferenceContext ref,
|
final ReferenceContext ref,
|
||||||
final GenomeLoc loc,
|
final GenomeLoc loc,
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,128 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
|
|
||||||
|
//import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class PerReadAlleleLikelihoodMap {
|
||||||
|
public static final double INDEL_LIKELIHOOD_THRESH = 0.1;
|
||||||
|
|
||||||
|
private List<Allele> alleles;
|
||||||
|
private Map<PileupElement,Map<Allele,Double>> likelihoodReadMap;
|
||||||
|
public PerReadAlleleLikelihoodMap() {
|
||||||
|
likelihoodReadMap = new LinkedHashMap<PileupElement,Map<Allele,Double>>();
|
||||||
|
alleles = new ArrayList<Allele>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void add(PileupElement p, Allele a, Double likelihood) {
|
||||||
|
Map<Allele,Double> likelihoodMap;
|
||||||
|
if (likelihoodReadMap.containsKey(p)){
|
||||||
|
// seen pileup element before
|
||||||
|
likelihoodMap = likelihoodReadMap.get(p);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
likelihoodMap = new HashMap<Allele, Double>();
|
||||||
|
likelihoodReadMap.put(p,likelihoodMap);
|
||||||
|
}
|
||||||
|
likelihoodMap.put(a,likelihood);
|
||||||
|
|
||||||
|
if (!alleles.contains(a))
|
||||||
|
alleles.add(a);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
return likelihoodReadMap.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void add(GATKSAMRecord read, Allele a, Double likelihood) {
|
||||||
|
PileupElement p = new PileupElement(read,-1,false,false,false,false,false,false);
|
||||||
|
add(p,a,likelihood);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean containsPileupElement(PileupElement p) {
|
||||||
|
return likelihoodReadMap.containsKey(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEmpty() {
|
||||||
|
return likelihoodReadMap.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<PileupElement,Map<Allele,Double>> getLikelihoodReadMap() {
|
||||||
|
return likelihoodReadMap;
|
||||||
|
}
|
||||||
|
public void clear() {
|
||||||
|
alleles.clear();
|
||||||
|
likelihoodReadMap.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<PileupElement> getStoredPileupElements() {
|
||||||
|
return likelihoodReadMap.keySet();
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Returns list of reads greedily associated with a particular allele.
|
||||||
|
* Needs to loop for each read, and assign to each allele
|
||||||
|
* @param a Desired allele
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Requires("a!=null")
|
||||||
|
public List<GATKSAMRecord> getReadsAssociatedWithAllele(Allele a) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<Allele,Double> getLikelihoodsAssociatedWithPileupElement(PileupElement p) {
|
||||||
|
if (!likelihoodReadMap.containsKey(p))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
return likelihoodReadMap.get(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Allele getMostLikelyAllele(Map<Allele,Double> alleleMap) {
|
||||||
|
double minLike = Double.POSITIVE_INFINITY, maxLike = Double.NEGATIVE_INFINITY;
|
||||||
|
Allele mostLikelyAllele = Allele.NO_CALL;
|
||||||
|
|
||||||
|
for (Map.Entry<Allele,Double> el : alleleMap.entrySet()) {
|
||||||
|
if (el.getValue() > maxLike) {
|
||||||
|
maxLike = el.getValue();
|
||||||
|
mostLikelyAllele = el.getKey();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (el.getValue() < minLike)
|
||||||
|
minLike = el.getValue();
|
||||||
|
|
||||||
|
}
|
||||||
|
if (maxLike-minLike > INDEL_LIKELIHOOD_THRESH)
|
||||||
|
return mostLikelyAllele;
|
||||||
|
else
|
||||||
|
return Allele.NO_CALL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -62,7 +62,10 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
final AlignmentContextUtils.ReadOrientation contextType,
|
final AlignmentContextUtils.ReadOrientation contextType,
|
||||||
final List<Allele> allAllelesToUse,
|
final List<Allele> allAllelesToUse,
|
||||||
final boolean useBAQedPileup,
|
final boolean useBAQedPileup,
|
||||||
final GenomeLocParser locParser) {
|
final GenomeLocParser locParser,
|
||||||
|
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||||
|
|
||||||
|
perReadAlleleLikelihoodMap.clear(); // not used in SNP model, sanity check to delete any older data
|
||||||
|
|
||||||
final byte refBase = ref.getBase();
|
final byte refBase = ref.getBase();
|
||||||
final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(refBase);
|
final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(refBase);
|
||||||
|
|
|
||||||
|
|
@ -177,19 +177,23 @@ public class UnifiedGenotyperEngine {
|
||||||
final List<VariantCallContext> results = new ArrayList<VariantCallContext>(2);
|
final List<VariantCallContext> results = new ArrayList<VariantCallContext>(2);
|
||||||
|
|
||||||
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
|
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
|
||||||
|
|
||||||
|
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap = new HashMap<String,PerReadAlleleLikelihoodMap>();
|
||||||
|
|
||||||
if ( models.isEmpty() ) {
|
if ( models.isEmpty() ) {
|
||||||
results.add(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null);
|
results.add(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
for ( final GenotypeLikelihoodsCalculationModel.Model model : models ) {
|
for ( final GenotypeLikelihoodsCalculationModel.Model model : models ) {
|
||||||
|
perReadAlleleLikelihoodMap.clear();
|
||||||
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
|
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
|
||||||
if ( stratifiedContexts == null ) {
|
if ( stratifiedContexts == null ) {
|
||||||
results.add(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext) : null);
|
results.add(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext) : null);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
|
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap);
|
||||||
if ( vc != null )
|
if ( vc != null )
|
||||||
results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true));
|
results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true, perReadAlleleLikelihoodMap));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -219,9 +223,13 @@ public class UnifiedGenotyperEngine {
|
||||||
* @param tracker the meta data tracker
|
* @param tracker the meta data tracker
|
||||||
* @param refContext the reference base
|
* @param refContext the reference base
|
||||||
* @param rawContext contextual information around the locus
|
* @param rawContext contextual information around the locus
|
||||||
|
* @param perReadAlleleLikelihoodMap Map to store per-sample, per-read, per-allele likelihoods (only used for indels)
|
||||||
* @return the VariantContext object
|
* @return the VariantContext object
|
||||||
*/
|
*/
|
||||||
public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
public VariantContext calculateLikelihoods(final RefMetaDataTracker tracker,
|
||||||
|
final ReferenceContext refContext,
|
||||||
|
final AlignmentContext rawContext,
|
||||||
|
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||||
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
|
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
|
||||||
if ( models.isEmpty() ) {
|
if ( models.isEmpty() ) {
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -231,7 +239,7 @@ public class UnifiedGenotyperEngine {
|
||||||
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
|
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
|
||||||
// return the first valid one we encounter
|
// return the first valid one we encounter
|
||||||
if ( stratifiedContexts != null )
|
if ( stratifiedContexts != null )
|
||||||
return calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
|
return calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -247,7 +255,11 @@ public class UnifiedGenotyperEngine {
|
||||||
* @param vc the GL-annotated variant context
|
* @param vc the GL-annotated variant context
|
||||||
* @return the VariantCallContext object
|
* @return the VariantCallContext object
|
||||||
*/
|
*/
|
||||||
public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, VariantContext vc) {
|
public VariantCallContext calculateGenotypes(final RefMetaDataTracker tracker,
|
||||||
|
final ReferenceContext refContext,
|
||||||
|
final AlignmentContext rawContext,
|
||||||
|
final VariantContext vc,
|
||||||
|
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||||
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
|
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
|
||||||
if ( models.isEmpty() ) {
|
if ( models.isEmpty() ) {
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -256,25 +268,37 @@ public class UnifiedGenotyperEngine {
|
||||||
// return the first one
|
// return the first one
|
||||||
final GenotypeLikelihoodsCalculationModel.Model model = models.get(0);
|
final GenotypeLikelihoodsCalculationModel.Model model = models.get(0);
|
||||||
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
|
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
|
||||||
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model);
|
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, perReadAlleleLikelihoodMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public VariantCallContext calculateGenotypes(final RefMetaDataTracker tracker,
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
final ReferenceContext refContext,
|
||||||
|
final AlignmentContext rawContext,
|
||||||
|
final VariantContext vc) {
|
||||||
|
return calculateGenotypes(tracker, refContext, rawContext, vc, null);
|
||||||
|
}
|
||||||
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// Private implementation helpers
|
// Private implementation helpers
|
||||||
//
|
//
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
// private method called by both UnifiedGenotyper and UGCalcLikelihoods entry points into the engine
|
// private method called by both UnifiedGenotyper and UGCalcLikelihoods entry points into the engine
|
||||||
private VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, Map<String, AlignmentContext> stratifiedContexts, AlignmentContextUtils.ReadOrientation type, List<Allele> alternateAllelesToUse, boolean useBAQedPileup, final GenotypeLikelihoodsCalculationModel.Model model) {
|
private VariantContext calculateLikelihoods(final RefMetaDataTracker tracker,
|
||||||
|
final ReferenceContext refContext,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final AlignmentContextUtils.ReadOrientation type,
|
||||||
|
final List<Allele> alternateAllelesToUse,
|
||||||
|
final boolean useBAQedPileup,
|
||||||
|
final GenotypeLikelihoodsCalculationModel.Model model,
|
||||||
|
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||||
|
|
||||||
// initialize the data for this thread if that hasn't been done yet
|
// initialize the data for this thread if that hasn't been done yet
|
||||||
if ( glcm.get() == null ) {
|
if ( glcm.get() == null ) {
|
||||||
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
|
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
|
||||||
}
|
}
|
||||||
|
|
||||||
return glcm.get().get(model.name().toUpperCase()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
|
return glcm.get().get(model.name().toUpperCase()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser, perReadAlleleLikelihoodMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
||||||
|
|
@ -305,12 +329,22 @@ public class UnifiedGenotyperEngine {
|
||||||
return new VariantCallContext(vc, false);
|
return new VariantCallContext(vc, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public VariantCallContext calculateGenotypes(VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model, final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||||
return calculateGenotypes(null, null, null, null, vc, model);
|
return calculateGenotypes(null, null, null, null, vc, model, perReadAlleleLikelihoodMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||||
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, false);
|
return calculateGenotypes(null, null, null, null, vc, model, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public VariantCallContext calculateGenotypes(final RefMetaDataTracker tracker,
|
||||||
|
final ReferenceContext refContext,
|
||||||
|
final AlignmentContext rawContext,
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc,
|
||||||
|
final GenotypeLikelihoodsCalculationModel.Model model,
|
||||||
|
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||||
|
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, false,perReadAlleleLikelihoodMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -324,8 +358,11 @@ public class UnifiedGenotyperEngine {
|
||||||
* @param inheritAttributesFromInputVC Output VC will contain attributes inherited from input vc
|
* @param inheritAttributesFromInputVC Output VC will contain attributes inherited from input vc
|
||||||
* @return VC with assigned genotypes
|
* @return VC with assigned genotypes
|
||||||
*/
|
*/
|
||||||
public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model,
|
public VariantCallContext calculateGenotypes(final RefMetaDataTracker tracker, final ReferenceContext refContext,
|
||||||
final boolean inheritAttributesFromInputVC) {
|
final AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model,
|
||||||
|
final boolean inheritAttributesFromInputVC,
|
||||||
|
final Map<String,PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||||
|
|
||||||
boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;
|
boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;
|
||||||
|
|
||||||
|
|
@ -451,7 +488,7 @@ public class UnifiedGenotyperEngine {
|
||||||
List<Allele> allAllelesToUse = builder.make().getAlleles();
|
List<Allele> allAllelesToUse = builder.make().getAlleles();
|
||||||
|
|
||||||
// the forward lod
|
// the forward lod
|
||||||
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model);
|
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
|
||||||
AFresult.reset();
|
AFresult.reset();
|
||||||
afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model), AFresult);
|
afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model), AFresult);
|
||||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||||
|
|
@ -460,7 +497,7 @@ public class UnifiedGenotyperEngine {
|
||||||
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
||||||
|
|
||||||
// the reverse lod
|
// the reverse lod
|
||||||
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model);
|
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
|
||||||
AFresult.reset();
|
AFresult.reset();
|
||||||
afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model), AFresult);
|
afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model), AFresult);
|
||||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||||
|
|
@ -496,7 +533,7 @@ public class UnifiedGenotyperEngine {
|
||||||
final ReadBackedPileup pileup = rawContext.getBasePileup();
|
final ReadBackedPileup pileup = rawContext.getBasePileup();
|
||||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
||||||
|
|
||||||
vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall);
|
vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall, perReadAlleleLikelihoodMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
|
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.indels;
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.sting.utils.Haplotype;
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.PairHMM;
|
import org.broadinstitute.sting.utils.PairHMM;
|
||||||
|
|
@ -40,6 +41,7 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
public class PairHMMIndelErrorModel {
|
public class PairHMMIndelErrorModel {
|
||||||
|
|
@ -167,11 +169,15 @@ public class PairHMMIndelErrorModel {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public synchronized double[] computeDiploidReadHaplotypeLikelihoods(ReadBackedPileup pileup, LinkedHashMap<Allele, Haplotype> haplotypeMap, ReferenceContext ref, int eventLength, HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap){
|
public synchronized double[] computeDiploidReadHaplotypeLikelihoods(final ReadBackedPileup pileup,
|
||||||
|
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
|
||||||
|
final ReferenceContext ref,
|
||||||
|
final int eventLength,
|
||||||
|
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap){
|
||||||
final int numHaplotypes = haplotypeMap.size();
|
final int numHaplotypes = haplotypeMap.size();
|
||||||
|
|
||||||
final int readCounts[] = new int[pileup.getNumberOfElements()];
|
final int readCounts[] = new int[pileup.getNumberOfElements()];
|
||||||
final double[][] readLikelihoods = computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, indelLikelihoodMap, readCounts);
|
final double[][] readLikelihoods = computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap, readCounts);
|
||||||
return getDiploidHaplotypeLikelihoods(numHaplotypes, readCounts, readLikelihoods);
|
return getDiploidHaplotypeLikelihoods(numHaplotypes, readCounts, readLikelihoods);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -181,7 +187,7 @@ public class PairHMMIndelErrorModel {
|
||||||
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
|
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
|
||||||
final ReferenceContext ref,
|
final ReferenceContext ref,
|
||||||
final int eventLength,
|
final int eventLength,
|
||||||
final HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap,
|
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap,
|
||||||
final int[] readCounts) {
|
final int[] readCounts) {
|
||||||
final double readLikelihoods[][] = new double[pileup.getNumberOfElements()][haplotypeMap.size()];
|
final double readLikelihoods[][] = new double[pileup.getNumberOfElements()][haplotypeMap.size()];
|
||||||
final PairHMM pairHMM = new PairHMM(bandedLikelihoods);
|
final PairHMM pairHMM = new PairHMM(bandedLikelihoods);
|
||||||
|
|
@ -192,8 +198,8 @@ public class PairHMMIndelErrorModel {
|
||||||
readCounts[readIdx] = p.getRepresentativeCount();
|
readCounts[readIdx] = p.getRepresentativeCount();
|
||||||
|
|
||||||
// check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)
|
// check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)
|
||||||
if (indelLikelihoodMap.containsKey(p)) {
|
if (perReadAlleleLikelihoodMap.containsPileupElement(p)) {
|
||||||
HashMap<Allele,Double> el = indelLikelihoodMap.get(p);
|
Map<Allele,Double> el = perReadAlleleLikelihoodMap.getLikelihoodsAssociatedWithPileupElement(p);
|
||||||
int j=0;
|
int j=0;
|
||||||
for (Allele a: haplotypeMap.keySet()) {
|
for (Allele a: haplotypeMap.keySet()) {
|
||||||
readLikelihoods[readIdx][j++] = el.get(a);
|
readLikelihoods[readIdx][j++] = el.get(a);
|
||||||
|
|
@ -201,7 +207,7 @@ public class PairHMMIndelErrorModel {
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
final int refWindowStart = ref.getWindow().getStart();
|
final int refWindowStart = ref.getWindow().getStart();
|
||||||
final int refWindowStop = ref.getWindow().getStop();
|
final int refWindowStop = ref.getWindow().getStop();
|
||||||
|
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.format("Read Name:%s, aln start:%d aln stop:%d orig cigar:%s\n",p.getRead().getReadName(), p.getRead().getAlignmentStart(), p.getRead().getAlignmentEnd(), p.getRead().getCigarString());
|
System.out.format("Read Name:%s, aln start:%d aln stop:%d orig cigar:%s\n",p.getRead().getReadName(), p.getRead().getAlignmentStart(), p.getRead().getAlignmentEnd(), p.getRead().getCigarString());
|
||||||
|
|
@ -280,7 +286,7 @@ public class PairHMMIndelErrorModel {
|
||||||
System.out.format("numStartSoftClippedBases: %d numEndSoftClippedBases: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d\n",
|
System.out.format("numStartSoftClippedBases: %d numEndSoftClippedBases: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d\n",
|
||||||
numStartSoftClippedBases, numEndSoftClippedBases, ref.getWindow().getStart(), ref.getWindow().getStop(), startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, read.getReadLength());
|
numStartSoftClippedBases, numEndSoftClippedBases, ref.getWindow().getStart(), ref.getWindow().getStop(), startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, read.getReadLength());
|
||||||
|
|
||||||
LinkedHashMap<Allele,Double> readEl = new LinkedHashMap<Allele,Double>();
|
// LinkedHashMap<Allele,Double> readEl = new LinkedHashMap<Allele,Double>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if we'll end up with an empty read once all clipping is done
|
* Check if we'll end up with an empty read once all clipping is done
|
||||||
|
|
@ -288,7 +294,7 @@ public class PairHMMIndelErrorModel {
|
||||||
if (numStartSoftClippedBases + numEndSoftClippedBases >= unclippedReadBases.length) {
|
if (numStartSoftClippedBases + numEndSoftClippedBases >= unclippedReadBases.length) {
|
||||||
int j=0;
|
int j=0;
|
||||||
for (Allele a: haplotypeMap.keySet()) {
|
for (Allele a: haplotypeMap.keySet()) {
|
||||||
readEl.put(a,0.0);
|
perReadAlleleLikelihoodMap.add(p,a,0.0);
|
||||||
readLikelihoods[readIdx][j++] = 0.0;
|
readLikelihoods[readIdx][j++] = 0.0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -329,45 +335,45 @@ public class PairHMMIndelErrorModel {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(),
|
final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(),
|
||||||
(int)indStart, (int)indStop);
|
(int)indStart, (int)indStop);
|
||||||
|
|
||||||
final int X_METRIC_LENGTH = readBases.length+2;
|
final int X_METRIC_LENGTH = readBases.length+2;
|
||||||
final int Y_METRIC_LENGTH = haplotypeBases.length+2;
|
final int Y_METRIC_LENGTH = haplotypeBases.length+2;
|
||||||
|
|
||||||
if (matchMetricArray == null) {
|
if (matchMetricArray == null) {
|
||||||
//no need to reallocate arrays for each new haplotype, as length won't change
|
//no need to reallocate arrays for each new haplotype, as length won't change
|
||||||
matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||||
XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||||
YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||||
|
|
||||||
|
|
||||||
PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
|
PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
|
||||||
}
|
}
|
||||||
|
|
||||||
int startIndexInHaplotype = 0;
|
int startIndexInHaplotype = 0;
|
||||||
if (previousHaplotypeSeen != null)
|
if (previousHaplotypeSeen != null)
|
||||||
startIndexInHaplotype = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen);
|
startIndexInHaplotype = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen);
|
||||||
previousHaplotypeSeen = haplotypeBases.clone();
|
previousHaplotypeSeen = haplotypeBases.clone();
|
||||||
|
|
||||||
readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals,
|
readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals,
|
||||||
(read.hasBaseIndelQualities() ? read.getBaseInsertionQualities() : contextLogGapOpenProbabilities),
|
(read.hasBaseIndelQualities() ? read.getBaseInsertionQualities() : contextLogGapOpenProbabilities),
|
||||||
(read.hasBaseIndelQualities() ? read.getBaseDeletionQualities() : contextLogGapOpenProbabilities),
|
(read.hasBaseIndelQualities() ? read.getBaseDeletionQualities() : contextLogGapOpenProbabilities),
|
||||||
contextLogGapContinuationProbabilities,
|
contextLogGapContinuationProbabilities,
|
||||||
startIndexInHaplotype, matchMetricArray, XMetricArray, YMetricArray);
|
startIndexInHaplotype, matchMetricArray, XMetricArray, YMetricArray);
|
||||||
|
|
||||||
|
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.println("H:"+new String(haplotypeBases));
|
System.out.println("H:"+new String(haplotypeBases));
|
||||||
System.out.println("R:"+new String(readBases));
|
System.out.println("R:"+new String(readBases));
|
||||||
System.out.format("L:%4.2f\n",readLikelihood);
|
System.out.format("L:%4.2f\n",readLikelihood);
|
||||||
System.out.format("StPos:%d\n", startIndexInHaplotype);
|
System.out.format("StPos:%d\n", startIndexInHaplotype);
|
||||||
}
|
}
|
||||||
readEl.put(a,readLikelihood);
|
|
||||||
|
perReadAlleleLikelihoodMap.add(p, a, readLikelihood);
|
||||||
readLikelihoods[readIdx][j++] = readLikelihood;
|
readLikelihoods[readIdx][j++] = readLikelihood;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
indelLikelihoodMap.put(p,readEl);
|
|
||||||
}
|
}
|
||||||
readIdx++;
|
readIdx++;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -625,6 +625,10 @@ public class MathUtils {
|
||||||
return maxElementIndex(array, array.length);
|
return maxElementIndex(array, array.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static int maxElementIndex(final byte[] array) {
|
||||||
|
return maxElementIndex(array, array.length);
|
||||||
|
}
|
||||||
|
|
||||||
public static int maxElementIndex(final int[] array, int endIndex) {
|
public static int maxElementIndex(final int[] array, int endIndex) {
|
||||||
if (array == null || array.length == 0)
|
if (array == null || array.length == 0)
|
||||||
throw new IllegalArgumentException("Array cannot be null!");
|
throw new IllegalArgumentException("Array cannot be null!");
|
||||||
|
|
@ -638,6 +642,24 @@ public class MathUtils {
|
||||||
return maxI;
|
return maxI;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static int maxElementIndex(final byte[] array, int endIndex) {
|
||||||
|
if (array == null || array.length == 0)
|
||||||
|
throw new IllegalArgumentException("Array cannot be null!");
|
||||||
|
|
||||||
|
int maxI = 0;
|
||||||
|
for (int i = 1; i < endIndex; i++) {
|
||||||
|
if (array[i] > array[maxI])
|
||||||
|
maxI = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
return maxI;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static byte arrayMax(final byte[] array) {
|
||||||
|
return array[maxElementIndex(array)];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public static double arrayMax(final double[] array) {
|
public static double arrayMax(final double[] array) {
|
||||||
return array[maxElementIndex(array)];
|
return array[maxElementIndex(array)];
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue