parent
25fa25b618
commit
9d32dec9cd
|
|
@ -61,6 +61,9 @@ import java.util.List;
|
|||
|
||||
public class GenotypeCalculationArgumentCollection implements Cloneable{
|
||||
|
||||
|
||||
public static final String MAX_ALTERNATE_ALLELES_SHORT_NAME = "maxAltAlleles";
|
||||
|
||||
/**
|
||||
* Depending on the value of the --max_alternate_alleles argument, we may genotype only a fraction of the alleles being sent on for genotyping.
|
||||
* Using this argument instructs the genotyper to annotate (in the INFO field) the number of alternate alleles that were originally discovered at the site.
|
||||
|
|
@ -122,7 +125,7 @@ public class GenotypeCalculationArgumentCollection implements Cloneable{
|
|||
* As of GATK 2.2 the genotyper can handle a very large number of events, so the default maximum has been increased to 6.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
|
||||
@Argument(fullName = "max_alternate_alleles", shortName = MAX_ALTERNATE_ALLELES_SHORT_NAME, doc = "Maximum number of alternate alleles to genotype", required = false)
|
||||
public int MAX_ALTERNATE_ALLELES = 6;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -54,9 +54,13 @@ package org.broadinstitute.gatk.tools.walkers.genotyper;
|
|||
import htsjdk.variant.variantcontext.Allele;
|
||||
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.genotyper.AlleleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.AlleleListUtils;
|
||||
import org.broadinstitute.gatk.utils.genotyper.IndexedAlleleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Genotyping Likelihoods collection.
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ public class VariantCallContext extends VariantContext {
|
|||
// Should this site be emitted?
|
||||
public boolean shouldEmit = true;
|
||||
|
||||
VariantCallContext(VariantContext vc, boolean confidentlyCalledP) {
|
||||
public VariantCallContext(VariantContext vc, boolean confidentlyCalledP) {
|
||||
super(vc);
|
||||
this.confidentlyCalled = confidentlyCalledP;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,6 +53,9 @@ package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.SimpleTimer;
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import htsjdk.variant.variantcontext.GenotypeBuilder;
|
||||
import htsjdk.variant.variantcontext.GenotypesContext;
|
||||
|
|
@ -128,6 +131,7 @@ public abstract class AFCalculator implements Cloneable {
|
|||
* @return result (for programming convenience)
|
||||
*/
|
||||
public AFCalculationResult getLog10PNonRef(final VariantContext vc, final int defaultPloidy, final int maximumAlternativeAlleles, final double[] log10AlleleFrequencyPriors) {
|
||||
|
||||
if ( vc == null ) throw new IllegalArgumentException("VariantContext cannot be null");
|
||||
if ( vc.getNAlleles() == 1 ) throw new IllegalArgumentException("VariantContext has only a single reference allele, but getLog10PNonRef requires at least one at all " + vc);
|
||||
if ( log10AlleleFrequencyPriors == null ) throw new IllegalArgumentException("priors vector cannot be null");
|
||||
|
|
@ -135,6 +139,9 @@ public abstract class AFCalculator implements Cloneable {
|
|||
// reset the result, so we can store our new result there
|
||||
final StateTracker stateTracker = getStateTracker(true,maximumAlternativeAlleles);
|
||||
|
||||
//TODO All implementations of the reduce-scope seems to employ a bad criterion to
|
||||
//TODO decide what alleles to keep. This must be changed eventually.
|
||||
//TODO issue {@see https://github.com/broadinstitute/gsa-unstable/issues/1376}
|
||||
final VariantContext vcWorking = reduceScope(vc,defaultPloidy, maximumAlternativeAlleles);
|
||||
|
||||
callTimer.start();
|
||||
|
|
|
|||
|
|
@ -51,19 +51,19 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import htsjdk.samtools.util.StringUtil;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection;
|
||||
import org.broadinstitute.gatk.utils.*;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.genotyper.AlleleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.IndexedAlleleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.Utils;
|
||||
import org.broadinstitute.gatk.utils.collections.Pair;
|
||||
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.haplotype.EventMap;
|
||||
|
|
@ -82,6 +82,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
|||
protected static final int ALLELE_EXTENSION = 2;
|
||||
private static final String phase01 = "0|1";
|
||||
private static final String phase10 = "1|0";
|
||||
private static final int MAX_DROPPED_ALTERNATIVE_ALLELES_TO_LOG = 20;
|
||||
|
||||
private MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger;
|
||||
|
||||
|
|
@ -188,7 +189,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
|||
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
|
||||
@Ensures("result != null")
|
||||
// TODO - can this be refactored? this is hard to follow!
|
||||
public CalledHaplotypes assignGenotypeLikelihoods( final List<Haplotype> haplotypes,
|
||||
CalledHaplotypes assignGenotypeLikelihoods( final List<Haplotype> haplotypes,
|
||||
final ReadLikelihoods<Haplotype> readLikelihoods,
|
||||
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
|
||||
final byte[] ref,
|
||||
|
|
@ -241,9 +242,6 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
|||
final GenotypeLikelihoodsCalculationModel.Model calculationModel = mergedVC.isSNP()
|
||||
? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||
|
||||
if (emitReferenceConfidence)
|
||||
mergedVC = addNonRefSymbolicAllele(mergedVC);
|
||||
|
||||
final Map<VariantContext, Allele> mergeMap = new LinkedHashMap<>();
|
||||
mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
|
||||
for(int iii = 0; iii < eventsAtThisLoc.size(); iii++) {
|
||||
|
|
@ -256,39 +254,25 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
|||
if (logger != null) logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
|
||||
}
|
||||
|
||||
ReadLikelihoods<Allele> readAlleleLikelihoods = readLikelihoods.marginalize(alleleMapper, genomeLocParser.createPaddedGenomeLoc(genomeLocParser.createGenomeLoc(mergedVC), ALLELE_EXTENSION));
|
||||
final ReadLikelihoods<Allele> readAlleleLikelihoods = readLikelihoods.marginalize(alleleMapper, genomeLocParser.createPaddedGenomeLoc(genomeLocParser.createGenomeLoc(mergedVC), ALLELE_EXTENSION));
|
||||
if (configuration.isSampleContaminationPresent())
|
||||
readAlleleLikelihoods.contaminationDownsampling(configuration.getSampleContamination());
|
||||
|
||||
final boolean someAllelesWereDropped = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES < readAlleleLikelihoods.alleleCount() - 1;
|
||||
|
||||
if (emitReferenceConfidence)
|
||||
if (someAllelesWereDropped) {
|
||||
reduceNumberOfAlternativeAllelesBasedOnLikelihoods(readAlleleLikelihoods, genomeLocParser.createGenomeLoc(mergedVC));
|
||||
}
|
||||
|
||||
if (emitReferenceConfidence) {
|
||||
mergedVC = addNonRefSymbolicAllele(mergedVC);
|
||||
readAlleleLikelihoods.addNonReferenceAllele(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
}
|
||||
|
||||
final GenotypesContext genotypes = calculateGLsForThisEvent( readAlleleLikelihoods, mergedVC, noCallAlleles );
|
||||
final VariantContext call = calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel);
|
||||
if( call != null ) {
|
||||
|
||||
readAlleleLikelihoods = prepareReadAlleleLikelihoodsForAnnotation(readLikelihoods, perSampleFilteredReadList,
|
||||
genomeLocParser, emitReferenceConfidence, alleleMapper, readAlleleLikelihoods, call);
|
||||
|
||||
ReferenceContext referenceContext = new ReferenceContext(genomeLocParser, genomeLocParser.createGenomeLoc(mergedVC.getChr(), mergedVC.getStart(), mergedVC.getEnd()), refLoc, ref);
|
||||
VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(referenceContext, tracker,readAlleleLikelihoods, call, emitReferenceConfidence);
|
||||
|
||||
if( call.getAlleles().size() != mergedVC.getAlleles().size() )
|
||||
annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
|
||||
|
||||
// maintain the set of all called haplotypes
|
||||
for ( final Allele calledAllele : call.getAlleles() ) {
|
||||
final List<Haplotype> haplotypeList = alleleMapper.get(calledAllele);
|
||||
if (haplotypeList == null) continue;
|
||||
calledHaplotypes.addAll(haplotypeList);
|
||||
}
|
||||
|
||||
if ( !emitReferenceConfidence ) {
|
||||
// set GTs to no-call when GQ is 0 in normal mode
|
||||
annotatedCall = clearUnconfidentGenotypeCalls(annotatedCall);
|
||||
}
|
||||
|
||||
final GenotypesContext genotypes = calculateGLsForThisEvent(readAlleleLikelihoods, noCallAlleles );
|
||||
final VariantContext call = calculateGenotypes(new VariantContextBuilder(mergedVC).alleles(readAlleleLikelihoods.alleles()).genotypes(genotypes).make(), calculationModel);
|
||||
if ( call != null ) {
|
||||
final VariantContext annotatedCall = annotateCall(readLikelihoods, perSampleFilteredReadList, ref, refLoc, genomeLocParser, tracker, emitReferenceConfidence, calledHaplotypes, mergedVC, alleleMapper, readAlleleLikelihoods, someAllelesWereDropped, call);
|
||||
returnCalls.add( annotatedCall );
|
||||
}
|
||||
}
|
||||
|
|
@ -298,6 +282,148 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
|||
return new CalledHaplotypes(phasedCalls, calledHaplotypes);
|
||||
}
|
||||
|
||||
private VariantContext annotateCall(final ReadLikelihoods<Haplotype> readLikelihoods,
|
||||
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList,
|
||||
final byte[] ref, final GenomeLoc refLoc, final GenomeLocParser genomeLocParser,
|
||||
final RefMetaDataTracker tracker,
|
||||
final boolean emitReferenceConfidence,
|
||||
final Set<Haplotype> calledHaplotypes, final VariantContext mergedVC,
|
||||
final Map<Allele, List<Haplotype>> alleleMapper,
|
||||
final ReadLikelihoods<Allele> readAlleleLikelihoods,
|
||||
final boolean someAlternativeAllelesWereAlreadyDropped,
|
||||
final VariantContext call) {
|
||||
final int initialAlleleNumber = readAlleleLikelihoods.alleleCount();
|
||||
final ReadLikelihoods<Allele> readAlleleLikelihoodsForAnnotation = prepareReadAlleleLikelihoodsForAnnotation(readLikelihoods, perSampleFilteredReadList,
|
||||
genomeLocParser, emitReferenceConfidence, alleleMapper, readAlleleLikelihoods, call);
|
||||
|
||||
ReferenceContext referenceContext = new ReferenceContext(genomeLocParser, genomeLocParser.createGenomeLoc(mergedVC), refLoc, ref);
|
||||
final boolean someAlternativeAllelesWereDropped = call.getAlleles().size() != initialAlleleNumber;
|
||||
VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(referenceContext, tracker,readAlleleLikelihoodsForAnnotation, call, emitReferenceConfidence);
|
||||
if (someAlternativeAllelesWereDropped || someAlternativeAllelesWereAlreadyDropped)
|
||||
annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
|
||||
|
||||
// maintain the set of all called haplotypes
|
||||
for ( final Allele calledAllele : call.getAlleles() ) {
|
||||
final List<Haplotype> haplotypeList = alleleMapper.get(calledAllele);
|
||||
if (haplotypeList == null) continue;
|
||||
calledHaplotypes.addAll(haplotypeList);
|
||||
}
|
||||
|
||||
return !emitReferenceConfidence ? clearUnconfidentGenotypeCalls(annotatedCall) : annotatedCall;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduce the number alternative alleles in a read-likelihoods collection to the maximum-alt-allele user parameter value.
|
||||
* <p>
|
||||
* We always keep the reference allele.
|
||||
* As for the other alleles we keep the ones with the highest AF estimated as
|
||||
* described in {@link #excessAlternativeAlleles(GenotypingLikelihoods, int)}
|
||||
* </p>
|
||||
* @param readAlleleLikelihoods the target read-likelihood collection.
|
||||
*/
|
||||
private void reduceNumberOfAlternativeAllelesBasedOnLikelihoods(final ReadLikelihoods<Allele> readAlleleLikelihoods, final GenomeLoc location) {
|
||||
final GenotypingLikelihoods<Allele> genotypeLikelihoods = genotypingModel.calculateLikelihoods(readAlleleLikelihoods, new GenotypingData<>(ploidyModel,readAlleleLikelihoods));
|
||||
final Set<Allele> allelesToDrop = excessAlternativeAlleles(genotypeLikelihoods, configuration.genotypeArgs.MAX_ALTERNATE_ALLELES);
|
||||
final String allelesToDropString;
|
||||
if (allelesToDrop.size() < MAX_DROPPED_ALTERNATIVE_ALLELES_TO_LOG) {
|
||||
allelesToDropString = StringUtil.join(", ", allelesToDrop);
|
||||
} else {
|
||||
final Iterator<Allele> it = allelesToDrop.iterator();
|
||||
final StringBuilder builder = new StringBuilder();
|
||||
for (int i = 0; i < MAX_DROPPED_ALTERNATIVE_ALLELES_TO_LOG; i++) {
|
||||
builder.append(it.next().toString()).append(", ");
|
||||
}
|
||||
allelesToDropString = builder.append(it.next().toString()).append(" and ").append(allelesToDrop.size() - 20).append(" more").toString();
|
||||
}
|
||||
logger.warn(String.format("location %s: too many alternative alleles found (%d) larger than the maximum requested with -%s (%d), the following will be dropped: %s.", location,
|
||||
readAlleleLikelihoods.alleleCount() - 1, GenotypeCalculationArgumentCollection.MAX_ALTERNATE_ALLELES_SHORT_NAME, configuration.genotypeArgs.MAX_ALTERNATE_ALLELES,
|
||||
allelesToDropString));
|
||||
readAlleleLikelihoods.dropAlleles(allelesToDrop);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the set of alleles that should be dropped in order to bring down the number
|
||||
* of alternative alleles to the maximum allowed.
|
||||
*
|
||||
* <p>
|
||||
* The alleles that put forward for removal are those with the lowest estimated allele count.
|
||||
* </p>
|
||||
* <p>
|
||||
* Allele counts are estimated herein as the weighted average count
|
||||
* across samples and phased genotypes where the weight is the genotype likelihood-- we apply
|
||||
* a uniform prior to all genotypes configurations.
|
||||
* </p>
|
||||
* <p>
|
||||
* In case of a tie, unlikely for non trivial likelihoods, we keep the alleles with the lower index.
|
||||
* </p>
|
||||
*
|
||||
* @param genotypeLikelihoods target genotype likelihoods.
|
||||
* @param maxAlternativeAlleles maximum number of alternative alleles allowed.
|
||||
* @return never {@code null}.
|
||||
*/
|
||||
private Set<Allele> excessAlternativeAlleles(final GenotypingLikelihoods<Allele> genotypeLikelihoods, final int maxAlternativeAlleles) {
|
||||
final int alleleCount = genotypeLikelihoods.alleleCount();
|
||||
final int excessAlternativeAlleleCount = Math.max(0, alleleCount - 1 - maxAlternativeAlleles);
|
||||
if (excessAlternativeAlleleCount <= 0) {
|
||||
return Collections.emptySet();
|
||||
}
|
||||
|
||||
final double log10NumberOfAlleles = MathUtils.Log10Cache.get(alleleCount); // log10(Num of Alleles); e.g. log10(2) for diploids.
|
||||
final double[] log10EstimatedACs = new double[alleleCount]; // where we store the AC estimates.
|
||||
// Set allele counts to 0 (i.e. exp(-Inf)) at the start.
|
||||
Arrays.fill(log10EstimatedACs, Double.NEGATIVE_INFINITY);
|
||||
|
||||
for (int i = 0; i < genotypeLikelihoods.sampleCount(); i++) {
|
||||
final GenotypeLikelihoodCalculator calculator = GenotypeLikelihoodCalculators.getInstance(genotypeLikelihoods.samplePloidy(i), alleleCount);
|
||||
final int numberOfUnphasedGenotypes = calculator.genotypeCount();
|
||||
// unphased genotype log10 likelihoods
|
||||
final double[] log10Likelihoods = genotypeLikelihoods.sampleLikelihoods(i).getAsVector();
|
||||
// total number of phased genotypes for all possible combinations of allele counts.
|
||||
final double log10NumberOfPhasedGenotypes = calculator.ploidy() * log10NumberOfAlleles;
|
||||
for (int j = 0; j < numberOfUnphasedGenotypes; j++) {
|
||||
final GenotypeAlleleCounts alleleCounts = calculator.genotypeAlleleCountsAt(j);
|
||||
// given the current unphased genotype, how many phased genotypes there are:
|
||||
final double log10NumberOfPhasedGenotypesForThisUnphasedGenotype = alleleCounts.log10CombinationCount();
|
||||
final double log10GenotypeLikelihood = log10Likelihoods[j];
|
||||
for (int k = 0; k < alleleCounts.distinctAlleleCount(); k++) {
|
||||
final int alleleIndex = alleleCounts.alleleIndexAt(k);
|
||||
final int alleleCallCount = alleleCounts.alleleCountAt(k);
|
||||
final double log10AlleleCount = MathUtils.Log10Cache.get(alleleCallCount);
|
||||
final double log10Weight = log10GenotypeLikelihood + log10NumberOfPhasedGenotypesForThisUnphasedGenotype
|
||||
- log10NumberOfPhasedGenotypes;
|
||||
// update the allele AC adding the contribution of this unphased genotype at this sample.
|
||||
log10EstimatedACs[alleleIndex] = MathUtils.log10sumLog10(log10EstimatedACs[alleleIndex],
|
||||
log10Weight + log10AlleleCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final PriorityQueue<Allele> lessFrequentFirst = new PriorityQueue<>(alleleCount, new Comparator<Allele>() {
|
||||
@Override
|
||||
public int compare(final Allele a1, final Allele a2) {
|
||||
final int index1 = genotypeLikelihoods.alleleIndex(a1);
|
||||
final int index2 = genotypeLikelihoods.alleleIndex(a2);
|
||||
final double freq1 = log10EstimatedACs[index1];
|
||||
final double freq2 = log10EstimatedACs[index2];
|
||||
if (freq1 != freq2) {
|
||||
return Double.compare(freq1, freq2);
|
||||
} else {
|
||||
return Integer.compare(index2, index1);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
for (int i = 1; i < alleleCount; i++) {
|
||||
lessFrequentFirst.add(genotypeLikelihoods.alleleAt(i));
|
||||
}
|
||||
|
||||
final Set<Allele> result = new HashSet<>(excessAlternativeAlleleCount);
|
||||
for (int i = 0; i < excessAlternativeAlleleCount; i++) {
|
||||
result.add(lessFrequentFirst.remove());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to phase the individual alleles based on pairwise comparisons to the other alleles based on all called haplotypes
|
||||
*
|
||||
|
|
@ -325,13 +451,14 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
|||
* @param calledHaplotypes the set of haplotypes used for calling
|
||||
* @return non-null Map
|
||||
*/
|
||||
protected static Map<VariantContext, Set<Haplotype>> constructHaplotypeMapping(final List<VariantContext> originalCalls,
|
||||
@VisibleForTesting
|
||||
static Map<VariantContext, Set<Haplotype>> constructHaplotypeMapping(final List<VariantContext> originalCalls,
|
||||
final Set<Haplotype> calledHaplotypes) {
|
||||
final Map<VariantContext, Set<Haplotype>> haplotypeMap = new HashMap<>(originalCalls.size());
|
||||
for ( final VariantContext call : originalCalls ) {
|
||||
// don't try to phase if there is not exactly 1 alternate allele
|
||||
if ( ! isBiallelic(call) ) {
|
||||
haplotypeMap.put(call, Collections.<Haplotype>emptySet());
|
||||
haplotypeMap.put(call, Collections.emptySet());
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -362,10 +489,11 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
|||
* note that it is okay for this method NOT to populate the phaseSetMapping at all (e.g. in an impossible-to-phase situation)
|
||||
* @return the next incremental unique index
|
||||
*/
|
||||
protected static int constructPhaseSetMapping(final List<VariantContext> originalCalls,
|
||||
final Map<VariantContext, Set<Haplotype>> haplotypeMap,
|
||||
final int totalAvailableHaplotypes,
|
||||
final Map<VariantContext, Pair<Integer, String>> phaseSetMapping) {
|
||||
@VisibleForTesting
|
||||
static int constructPhaseSetMapping(final List<VariantContext> originalCalls,
|
||||
final Map<VariantContext, Set<Haplotype>> haplotypeMap,
|
||||
final int totalAvailableHaplotypes,
|
||||
final Map<VariantContext, Pair<Integer, String>> phaseSetMapping) {
|
||||
|
||||
final int numCalls = originalCalls.size();
|
||||
int uniqueCounter = 0;
|
||||
|
|
@ -457,9 +585,10 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
|||
* @param indexTo last index (exclusive) of phase group IDs
|
||||
* @return a non-null list which represents the possibly phased version of the calls
|
||||
*/
|
||||
protected static List<VariantContext> constructPhaseGroups(final List<VariantContext> originalCalls,
|
||||
final Map<VariantContext, Pair<Integer, String>> phaseSetMapping,
|
||||
final int indexTo) {
|
||||
@VisibleForTesting
|
||||
static List<VariantContext> constructPhaseGroups(final List<VariantContext> originalCalls,
|
||||
final Map<VariantContext, Pair<Integer, String>> phaseSetMapping,
|
||||
final int indexTo) {
|
||||
final List<VariantContext> phasedCalls = new ArrayList<>(originalCalls);
|
||||
|
||||
// if we managed to find any phased groups, update the VariantContexts
|
||||
|
|
@ -561,6 +690,10 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
|||
GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
}
|
||||
|
||||
if (call.getAlleles().size() != readAlleleLikelihoodsForAnnotations.alleleCount()) {
|
||||
readAlleleLikelihoodsForAnnotations.updateNonRefAlleleLikelihoods(new IndexedAlleleList<>(new HashSet<>(call.getAlleles())));
|
||||
}
|
||||
|
||||
// Skim the filtered map based on the location so that we do not add filtered read that are going to be removed
|
||||
// right after a few lines of code bellow.
|
||||
final Map<String, List<GATKSAMRecord>> overlappingFilteredReads = overlappingFilteredReads(perSampleFilteredReadList, loc);
|
||||
|
|
@ -687,15 +820,12 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
|||
/**
|
||||
* For a particular event described in inputVC, form PL vector for each sample by looking into allele read map and filling likelihood matrix for each allele
|
||||
* @param readLikelihoods Allele map describing mapping from reads to alleles and corresponding likelihoods
|
||||
* @param mergedVC Input VC with event to genotype
|
||||
* @return GenotypesContext object wrapping genotype objects with PLs
|
||||
*/
|
||||
@Requires({"readLikelihoods!= null", "mergedVC != null"})
|
||||
@Ensures("result != null")
|
||||
protected GenotypesContext calculateGLsForThisEvent( final ReadLikelihoods<Allele> readLikelihoods, final VariantContext mergedVC, final List<Allele> noCallAlleles ) {
|
||||
final List<Allele> vcAlleles = mergedVC.getAlleles();
|
||||
final AlleleList<Allele> alleleList = readLikelihoods.alleleCount() == vcAlleles.size() ? readLikelihoods : new IndexedAlleleList<>(vcAlleles);
|
||||
final GenotypingLikelihoods<Allele> likelihoods = genotypingModel.calculateLikelihoods(alleleList,new GenotypingData<>(ploidyModel,readLikelihoods));
|
||||
private GenotypesContext calculateGLsForThisEvent(final ReadLikelihoods<Allele> readLikelihoods, final List<Allele> noCallAlleles) {
|
||||
final GenotypingLikelihoods<Allele> likelihoods = genotypingModel.calculateLikelihoods(readLikelihoods, new GenotypingData<>(ploidyModel, readLikelihoods));
|
||||
final int sampleCount = samples.sampleCount();
|
||||
final GenotypesContext result = GenotypesContext.create(sampleCount);
|
||||
for (int s = 0; s < sampleCount; s++)
|
||||
|
|
@ -709,7 +839,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
|||
*/
|
||||
// TODO - split into input haplotypes and output haplotypes as not to share I/O arguments
|
||||
@Requires("haplotypes != null")
|
||||
protected static void cleanUpSymbolicUnassembledEvents( final List<Haplotype> haplotypes ) {
|
||||
private static void cleanUpSymbolicUnassembledEvents(final List<Haplotype> haplotypes) {
|
||||
final List<Haplotype> haplotypesToRemove = new ArrayList<>();
|
||||
for( final Haplotype h : haplotypes ) {
|
||||
for( final VariantContext vc : h.getEventMap().getVariantContexts() ) {
|
||||
|
|
@ -742,9 +872,9 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
|||
|
||||
final Map<Event, List<Haplotype>> eventMapper = new LinkedHashMap<>(eventsAtThisLoc.size()+1);
|
||||
final Event refEvent = new Event(null);
|
||||
eventMapper.put(refEvent, new ArrayList<Haplotype>());
|
||||
eventMapper.put(refEvent, new ArrayList<>());
|
||||
for( final VariantContext vc : eventsAtThisLoc ) {
|
||||
eventMapper.put(new Event(vc), new ArrayList<Haplotype>());
|
||||
eventMapper.put(new Event(vc), new ArrayList<>());
|
||||
}
|
||||
|
||||
for( final Haplotype h : haplotypes ) {
|
||||
|
|
@ -764,11 +894,12 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
|||
}
|
||||
|
||||
@Deprecated
|
||||
protected static Map<Integer,VariantContext> generateVCsFromAlignment( final Haplotype haplotype, final byte[] ref, final GenomeLoc refLoc, final String sourceNameToAdd ) {
|
||||
@VisibleForTesting
|
||||
static Map<Integer,VariantContext> generateVCsFromAlignment(final Haplotype haplotype, final byte[] ref, final GenomeLoc refLoc, final String sourceNameToAdd) {
|
||||
return new EventMap(haplotype, ref, refLoc, sourceNameToAdd);
|
||||
}
|
||||
|
||||
protected static boolean containsVCWithMatchingAlleles( final List<VariantContext> list, final VariantContext vcToTest ) {
|
||||
private static boolean containsVCWithMatchingAlleles( final List<VariantContext> list, final VariantContext vcToTest ) {
|
||||
for( final VariantContext vc : list ) {
|
||||
if( vc.hasSameAllelesAs(vcToTest) ) {
|
||||
return true;
|
||||
|
|
@ -780,7 +911,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
|||
protected static class Event {
|
||||
public VariantContext vc;
|
||||
|
||||
public Event( final VariantContext vc ) {
|
||||
@VisibleForTesting
|
||||
Event( final VariantContext vc ) {
|
||||
this.vc = vc;
|
||||
}
|
||||
|
||||
|
|
@ -800,7 +932,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
|||
*
|
||||
* @return never {@code null}.
|
||||
*/
|
||||
public PloidyModel getPloidyModel() {
|
||||
PloidyModel getPloidyModel() {
|
||||
return ploidyModel;
|
||||
}
|
||||
|
||||
|
|
@ -809,7 +941,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
|
|||
*
|
||||
* @return never {@code null}.
|
||||
*/
|
||||
public GenotypingModel getGenotypingModel() {
|
||||
GenotypingModel getGenotypingModel() {
|
||||
return genotypingModel;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -352,7 +352,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
public void testHaplotypeCallerMultiAllelicNonRef() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -A StrandAlleleCountsBySample",
|
||||
b37KGReference, privateTestDir + "multiallelic-nonref.bam", "2:47641259-47641859", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("87f9203f5ac637080469052b702c61c7"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("1d9e75bd09a6fc5a1d9156fe8a7d43ce"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest(" testHaplotypeCallerMultiAllelicNonRef", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -174,7 +174,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void testHaplotypeCallerMultiSampleGGA() throws IOException {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf" +
|
||||
" -isr INTERSECTION -L " + GGA_INTERVALS_FILE,
|
||||
"6d3cea3ee76b6eba14c1dfe230cff96b");
|
||||
"af3f54bee3347cafb34734dc7d3516a9");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -186,7 +186,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGATetraploid() throws IOException {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 4 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf -isr INTERSECTION -L 20:10080000-10100000",
|
||||
"9315d146a66c7baf3b615eb480c54dc1");
|
||||
"d7c3aac002701bec6b6d0539c65207be");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -315,11 +315,11 @@ public class MathUtils {
|
|||
return maxValue + (sum != 1.0 ? Math.log10(sum) : 0.0);
|
||||
}
|
||||
|
||||
public static double sumLog10(final double[] log10values) {
|
||||
public static double sumLog10(final double ... log10values) {
|
||||
return Math.pow(10.0, log10sumLog10(log10values));
|
||||
}
|
||||
|
||||
public static double log10sumLog10(final double[] log10values) {
|
||||
public static double log10sumLog10(final double ... log10values) {
|
||||
return log10sumLog10(log10values, 0);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.utils.genotyper;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import it.unimi.dsi.fastutil.ints.IntArrayList;
|
||||
import it.unimi.dsi.fastutil.objects.Object2IntMap;
|
||||
|
|
@ -34,6 +35,7 @@ import org.broadinstitute.gatk.utils.GenomeLoc;
|
|||
import org.broadinstitute.gatk.utils.Utils;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -92,6 +94,11 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
*/
|
||||
private int referenceAlleleIndex = -1;
|
||||
|
||||
/**
|
||||
* Index of the non-ref allele if any, otherwise - 1.
|
||||
*/
|
||||
private int nonRefAlleleIndex = -1;
|
||||
|
||||
/**
|
||||
* Caches the read-list per sample list returned by {@link #sampleReads}
|
||||
*/
|
||||
|
|
@ -138,6 +145,7 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
readListBySampleIndex = new List[sampleCount];
|
||||
valuesBySampleIndex = new double[sampleCount][][];
|
||||
referenceAlleleIndex = findReferenceAllele(alleles);
|
||||
nonRefAlleleIndex = findNonRefAllele(alleles);
|
||||
|
||||
readIndexBySampleIndex = new Object2IntMap[sampleCount];
|
||||
|
||||
|
|
@ -219,6 +227,15 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
return -1;
|
||||
}
|
||||
|
||||
private int findNonRefAllele(final AlleleList<A> alleles) {
|
||||
final int alleleCount = alleles.alleleCount();
|
||||
for (int i = alleleCount - 1; i >= 0; i--) {
|
||||
if (alleles.alleleAt(i).equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE))
|
||||
return i;
|
||||
}
|
||||
return - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the index of a sample within the likelihood collection.
|
||||
*
|
||||
|
|
@ -419,16 +436,21 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Search the best allele for a read.
|
||||
*
|
||||
* @param sampleIndex including sample index.
|
||||
* @param readIndex target read index.
|
||||
*
|
||||
* @return never {@code null}, but with {@link BestAllele#allele allele} == {@code null}
|
||||
* if non-could be found.
|
||||
*/
|
||||
private BestAllele searchBestAllele(final int sampleIndex, final int readIndex, final boolean canBeReference) {
|
||||
return searchBestAllele(sampleIndex, readIndex, canBeReference, alleles);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search the best allele for a read.
|
||||
*
|
||||
* @param sampleIndex including sample index.
|
||||
* @param readIndex target read index.
|
||||
*
|
||||
* @return never {@code null}, but with {@link BestAllele#allele allele} == {@code null}
|
||||
* if non-could be found.
|
||||
*/
|
||||
private BestAllele searchBestAllele(final int sampleIndex, final int readIndex, final boolean canBeReference,
|
||||
final AlleleList<A> allelesToConsider) {
|
||||
final int alleleCount = alleles.alleleCount();
|
||||
if (alleleCount == 0 || (alleleCount == 1 && referenceAlleleIndex == 0 && !canBeReference))
|
||||
return new BestAllele(sampleIndex,readIndex,-1,Double.NEGATIVE_INFINITY,Double.NEGATIVE_INFINITY);
|
||||
|
|
@ -441,6 +463,10 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
for (int a = bestAlleleIndex + 1; a < alleleCount; a++) {
|
||||
if (!canBeReference && referenceAlleleIndex == a)
|
||||
continue;
|
||||
if (nonRefAlleleIndex == a)
|
||||
continue;
|
||||
if (allelesToConsider.alleleIndex(alleles.alleleAt(a)) < 0)
|
||||
continue;
|
||||
final double candidateLikelihood = sampleValues[a][readIndex];
|
||||
if (candidateLikelihood > bestLikelihood) {
|
||||
bestAlleleIndex = a;
|
||||
|
|
@ -501,6 +527,7 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
|
||||
alleleList = null;
|
||||
int referenceIndex = this.referenceAlleleIndex;
|
||||
int nonRefIndex = this.nonRefAlleleIndex;
|
||||
@SuppressWarnings("unchecked")
|
||||
final A[] newAlleles = (A[]) new Allele[newAlleleCount];
|
||||
for (int a = 0; a < oldAlleleCount; a++)
|
||||
|
|
@ -511,19 +538,20 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
if (referenceIndex != -1)
|
||||
throw new IllegalArgumentException("there cannot be more than one reference allele");
|
||||
referenceIndex = newIndex;
|
||||
} else if (allele.equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE)) {
|
||||
if (nonRefAlleleIndex != -1)
|
||||
throw new IllegalArgumentException(String.format("there cannot be more than one %s allele", GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE_NAME));
|
||||
nonRefIndex = newIndex;
|
||||
}
|
||||
newAlleles[newIndex++] = allele;
|
||||
}
|
||||
|
||||
alleles = new IndexedAlleleList<>(newAlleles);
|
||||
|
||||
if (referenceIndex != -1)
|
||||
referenceAlleleIndex = referenceIndex;
|
||||
|
||||
final int sampleCount = samples.sampleCount();
|
||||
for (int s = 0; s < sampleCount; s++) {
|
||||
final int sampleReadCount = readsBySampleIndex[s].length;
|
||||
final double[][] newValuesBySampleIndex = Arrays.copyOf(valuesBySampleIndex[s],newAlleleCount);
|
||||
final double[][] newValuesBySampleIndex = Arrays.copyOf(valuesBySampleIndex[s], newAlleleCount);
|
||||
for (int a = oldAlleleCount; a < newAlleleCount; a++) {
|
||||
newValuesBySampleIndex[a] = new double[sampleReadCount];
|
||||
if (defaultLikelihood != 0.0)
|
||||
|
|
@ -531,8 +559,86 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
}
|
||||
valuesBySampleIndex[s] = newValuesBySampleIndex;
|
||||
}
|
||||
|
||||
if (referenceIndex != -1)
|
||||
referenceAlleleIndex = referenceIndex;
|
||||
if (nonRefIndex != -1) {
|
||||
nonRefAlleleIndex = nonRefIndex;
|
||||
updateNonRefAlleleLikelihoods();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Modify this likelihood collection dropping some of its alleles.
|
||||
* @param allelesToDrop set of alleles to be dropped.
|
||||
* @throws IllegalArgumentException if {@code allelesToDrop} is {@code null} or contain elements that are
|
||||
* not alleles in this collection.
|
||||
*/
|
||||
public void dropAlleles(final Set<A> allelesToDrop) {
|
||||
if (allelesToDrop == null) {
|
||||
throw new IllegalArgumentException("the input allele to drop set cannot be null");
|
||||
}
|
||||
if (allelesToDrop.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
final boolean[] indicesToDrop = new boolean[alleles.alleleCount()];
|
||||
for (final A allele : allelesToDrop) {
|
||||
final int index = alleles.alleleIndex(allele);
|
||||
if (index < 0) {
|
||||
throw new IllegalArgumentException("unknown allele: " + allele);
|
||||
}
|
||||
indicesToDrop[index] = true;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
final A[] newAlleles = (A[]) new Allele[alleles.alleleCount() - allelesToDrop.size()];
|
||||
final int[] newAlleleIndices = new int[newAlleles.length];
|
||||
int nextIndex = 0;
|
||||
for (int i = 0; i < alleles.alleleCount(); i++) {
|
||||
if (indicesToDrop[i]) {
|
||||
continue;
|
||||
}
|
||||
newAlleleIndices[nextIndex] = i;
|
||||
newAlleles[nextIndex++] = alleles.alleleAt(i);
|
||||
}
|
||||
for (int i = 0; i < samples.sampleCount(); i++) {
|
||||
final double[][] oldSampleValues = valuesBySampleIndex[i];
|
||||
final double[][] newSampleValues = new double[newAlleles.length][];
|
||||
for (int j = 0; j < newAlleles.length; j++) {
|
||||
newSampleValues[j] = oldSampleValues[newAlleleIndices[j]];
|
||||
}
|
||||
valuesBySampleIndex[i] = newSampleValues;
|
||||
}
|
||||
alleleList = Collections.unmodifiableList(Arrays.asList(newAlleles));
|
||||
alleles = new IndexedAlleleList<>(alleleList);
|
||||
if (nonRefAlleleIndex >= 0) {
|
||||
nonRefAlleleIndex = findNonRefAllele(alleles);
|
||||
updateNonRefAlleleLikelihoods();
|
||||
}
|
||||
if (referenceAlleleIndex >= 0)
|
||||
referenceAlleleIndex = findReferenceAllele(alleles);
|
||||
}
|
||||
|
||||
/**
|
||||
* Drop all the alleles not present in the subset passed as a parameter.
|
||||
* @param subset the alleles to retain.
|
||||
* @throws IllegalArgumentException if {@code alleles} is {@code null} or contain alleles unknown to this likelihood
|
||||
* collection.
|
||||
*/
|
||||
public void retainAlleles(final Set<A> subset) {
|
||||
if (alleles == null) {
|
||||
throw new IllegalArgumentException("the retain subset must not be null");
|
||||
} else if (!alleles().containsAll(subset) || subset.size() > alleleCount()) {
|
||||
throw new IllegalArgumentException("some of the alleles to retain are not present in the read-likelihoods collection");
|
||||
} else if (subset.isEmpty() || (subset.size() == 1 && subset.contains(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE))) {
|
||||
throw new IllegalArgumentException("there must be at least one allele to retain");
|
||||
} else {
|
||||
final Set<A> allelesToDrop = new HashSet<>(alleles());
|
||||
allelesToDrop.removeAll(subset);
|
||||
dropAlleles(allelesToDrop);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Likelihood matrix between a set of alleles and reads.
|
||||
* @param <A> the allele-type.
|
||||
|
|
@ -543,13 +649,13 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
* List of reads in the matrix sorted by their index therein.
|
||||
* @return never {@code null}.
|
||||
*/
|
||||
public List<GATKSAMRecord> reads();
|
||||
List<GATKSAMRecord> reads();
|
||||
|
||||
/**
|
||||
* List of alleles in the matrix sorted by their index in the collection.
|
||||
* @return never {@code null}.
|
||||
*/
|
||||
public List<A> alleles();
|
||||
List<A> alleles();
|
||||
|
||||
/**
|
||||
* Set the likelihood of a read given an allele through their indices.
|
||||
|
|
@ -561,7 +667,7 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
* @throws IllegalArgumentException if {@code alleleIndex} or {@code readIndex}
|
||||
* are not valid allele and read indices respectively.
|
||||
*/
|
||||
public void set(final int alleleIndex, final int readIndex, final double value);
|
||||
void set(final int alleleIndex, final int readIndex, final double value);
|
||||
|
||||
/**
|
||||
* Returns the likelihood of a read given a haplotype.
|
||||
|
|
@ -575,7 +681,7 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
* @return the requested likelihood, whatever value was provided using {@link #set(int,int,double) set}
|
||||
* or 0.0 if none was set.
|
||||
*/
|
||||
public double get(final int alleleIndex, final int readIndex);
|
||||
double get(final int alleleIndex, final int readIndex);
|
||||
|
||||
/**
|
||||
* Queries the index of an allele in the matrix.
|
||||
|
|
@ -586,7 +692,7 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
* @return -1 if such allele does not exist, otherwise its index which 0 or greater.
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public int alleleIndex(final A allele);
|
||||
int alleleIndex(final A allele);
|
||||
|
||||
/**
|
||||
* Queries the index of a read in the matrix.
|
||||
|
|
@ -599,19 +705,19 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
* which is 0 or greater.
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public int readIndex(final GATKSAMRecord read);
|
||||
int readIndex(final GATKSAMRecord read);
|
||||
|
||||
/**
|
||||
* Number of allele in the matrix.
|
||||
* @return never negative.
|
||||
*/
|
||||
public int alleleCount();
|
||||
int alleleCount();
|
||||
|
||||
/**
|
||||
* Number of reads in the matrix.
|
||||
* @return never negative.
|
||||
*/
|
||||
public int readCount();
|
||||
int readCount();
|
||||
|
||||
/**
|
||||
* Returns the allele given its index.
|
||||
|
|
@ -621,7 +727,7 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
* @throws IllegalArgumentException if {@code alleleIndex} is not a valid allele index.
|
||||
* @return never {@code null}.
|
||||
*/
|
||||
public A alleleAt(final int alleleIndex);
|
||||
A alleleAt(final int alleleIndex);
|
||||
|
||||
/**
|
||||
* Returns the allele given its index.
|
||||
|
|
@ -631,7 +737,7 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
* @throws IllegalArgumentException if {@code readIndex} is not a valid read index.
|
||||
* @return never {@code null}.
|
||||
*/
|
||||
public GATKSAMRecord readAt(final int readIndex);
|
||||
GATKSAMRecord readAt(final int readIndex);
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -640,7 +746,7 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
* @param dest the destination array.
|
||||
* @param offset the copy offset within the destination allele
|
||||
*/
|
||||
public void copyAlleleLikelihoods(final int alleleIndex, final double[] dest, final int offset);
|
||||
void copyAlleleLikelihoods(final int alleleIndex, final double[] dest, final int offset);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -905,7 +1011,6 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
*
|
||||
* @throws IllegalArgumentException the location cannot be {@code null} nor unmapped.
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public void filterToOnlyOverlappingUnclippedReads(final GenomeLoc location) {
|
||||
if (location == null)
|
||||
throw new IllegalArgumentException("the location cannot be null");
|
||||
|
|
@ -921,7 +1026,6 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
final int alleleCount = alleles.alleleCount();
|
||||
final IntArrayList removeIndices = new IntArrayList(10);
|
||||
for (int s = 0; s < sampleCount; s++) {
|
||||
int readRemoveCount = 0;
|
||||
final GATKSAMRecord[] sampleReads = readsBySampleIndex[s];
|
||||
final int sampleReadCount = sampleReads.length;
|
||||
for (int r = 0; r < sampleReadCount; r++)
|
||||
|
|
@ -932,28 +1036,6 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
}
|
||||
}
|
||||
|
||||
// Compare the read coordinates to the location of interest.
|
||||
private boolean readOverlapsLocation(final String contig, final int locStart,
|
||||
final int locEnd, final GATKSAMRecord read) {
|
||||
final boolean overlaps;
|
||||
|
||||
if (read.getReadUnmappedFlag())
|
||||
overlaps = false;
|
||||
else if (!read.getReferenceName().equals(contig))
|
||||
overlaps = false;
|
||||
else {
|
||||
int alnStart = read.getAlignmentStart();
|
||||
int alnStop = read.getAlignmentEnd();
|
||||
if (alnStart > alnStop) { // Paranoia? based on GLP.createGenomeLoc(Read) this can happen?.
|
||||
final int end = alnStart;
|
||||
alnStart = alnStop;
|
||||
alnStop = end;
|
||||
}
|
||||
overlaps = !(alnStop < locStart || alnStart > locEnd);
|
||||
}
|
||||
return overlaps;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes those read that the best possible likelihood given any allele is just too low.
|
||||
*
|
||||
|
|
@ -991,7 +1073,7 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
}
|
||||
|
||||
// Check whether the read is poorly modelled.
|
||||
protected boolean readIsPoorlyModelled(final int sampleIndex, final int readIndex, final GATKSAMRecord read, final double maxErrorRatePerBase) {
|
||||
private boolean readIsPoorlyModelled(final int sampleIndex, final int readIndex, final GATKSAMRecord read, final double maxErrorRatePerBase) {
|
||||
final double maxErrorsForRead = Math.min(2.0, Math.ceil(read.getReadLength() * maxErrorRatePerBase));
|
||||
final double log10QualPerBase = -4.0;
|
||||
final double log10MaxLikelihoodForTrueAllele = maxErrorsForRead * log10QualPerBase;
|
||||
|
|
@ -1089,37 +1171,25 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
throw new IllegalArgumentException("non-ref allele cannot be null");
|
||||
if (!nonRefAllele.equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE))
|
||||
throw new IllegalArgumentException("the non-ref allele is not valid");
|
||||
// Already present?
|
||||
if (alleles.alleleIndex(nonRefAllele) != -1)
|
||||
return;
|
||||
|
||||
final int oldAlleleCount = alleles.alleleCount();
|
||||
final int newAlleleCount = oldAlleleCount + 1;
|
||||
@SuppressWarnings("unchecked")
|
||||
final A[] newAlleles = (A[]) new Allele[newAlleleCount];
|
||||
for (int a = 0; a < oldAlleleCount; a++)
|
||||
newAlleles[a] = alleles.alleleAt(a);
|
||||
newAlleles[oldAlleleCount] = nonRefAllele;
|
||||
alleles = new IndexedAlleleList<>(newAlleles);
|
||||
alleleList = null; // remove the cached alleleList.
|
||||
|
||||
final int sampleCount = samples.sampleCount();
|
||||
for (int s = 0; s < sampleCount; s++)
|
||||
addNonReferenceAlleleLikelihoodsPerSample(oldAlleleCount, newAlleleCount, s);
|
||||
addMissingAlleles(Collections.singleton(nonRefAllele), Double.NEGATIVE_INFINITY);
|
||||
updateNonRefAlleleLikelihoods();
|
||||
}
|
||||
|
||||
// Updates per-sample structures according to the addition of the NON_REF allele.
|
||||
private void addNonReferenceAlleleLikelihoodsPerSample(final int alleleCount, final int newAlleleCount, final int sampleIndex) {
|
||||
final double[][] sampleValues = valuesBySampleIndex[sampleIndex] = Arrays.copyOf(valuesBySampleIndex[sampleIndex], newAlleleCount);
|
||||
final int sampleReadCount = readsBySampleIndex[sampleIndex].length;
|
||||
public void updateNonRefAlleleLikelihoods() {
|
||||
updateNonRefAlleleLikelihoods(alleles);
|
||||
}
|
||||
|
||||
final double[] nonRefAlleleLikelihoods = sampleValues[alleleCount] = new double [sampleReadCount];
|
||||
Arrays.fill(nonRefAlleleLikelihoods,Double.NEGATIVE_INFINITY);
|
||||
for (int r = 0; r < sampleReadCount; r++) {
|
||||
final BestAllele bestAllele = searchBestAllele(sampleIndex,r,true);
|
||||
final double secondBestLikelihood = Double.isInfinite(bestAllele.confidence) ? bestAllele.likelihood
|
||||
: bestAllele.likelihood - bestAllele.confidence;
|
||||
nonRefAlleleLikelihoods[r] = secondBestLikelihood;
|
||||
public void updateNonRefAlleleLikelihoods(final AlleleList<A> allelesToConsider) {
|
||||
if (nonRefAlleleIndex < 0)
|
||||
return;
|
||||
for (int s = 0; s < samples.sampleCount(); s++) {
|
||||
final double[][] sampleValues = valuesBySampleIndex[s];
|
||||
for (int r = 0; r < sampleValues[0].length; r++) {
|
||||
final BestAllele bestAllele = searchBestAllele(s, r, true, allelesToConsider);
|
||||
final double secondBestLikelihood = Double.isInfinite(bestAllele.confidence) ? bestAllele.likelihood
|
||||
: bestAllele.likelihood - bestAllele.confidence;
|
||||
sampleValues[nonRefAlleleIndex][r] = secondBestLikelihood;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1170,9 +1240,9 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
*
|
||||
* @return never {@code null}.
|
||||
*/
|
||||
public static ReadLikelihoods<Allele> fromPerAlleleReadLikelihoodsMap(final AlleleList<Allele> alleleList, final Map<String,PerReadAlleleLikelihoodMap> map) {
|
||||
@VisibleForTesting
|
||||
static ReadLikelihoods<Allele> fromPerAlleleReadLikelihoodsMap(final AlleleList<Allele> alleleList, final Map<String,PerReadAlleleLikelihoodMap> map) {
|
||||
|
||||
//TODO add test code for this method.
|
||||
// First we need to create the read-likelihood collection with all required alleles, samples and reads.
|
||||
final SampleList sampleList = new IndexedSampleList(map.keySet());
|
||||
final int alleleCount = alleleList.alleleCount();
|
||||
|
|
@ -1228,7 +1298,8 @@ public class ReadLikelihoods<A extends Allele> implements SampleList, AlleleList
|
|||
* @param sampleIndex the target sample.
|
||||
* @return never {@code null}, perhaps empty.
|
||||
*/
|
||||
public Map<A,List<GATKSAMRecord>> readsByBestAlleleMap(final int sampleIndex) {
|
||||
@VisibleForTesting
|
||||
Map<A,List<GATKSAMRecord>> readsByBestAlleleMap(final int sampleIndex) {
|
||||
checkSampleIndex(sampleIndex);
|
||||
final int alleleCount = alleles.alleleCount();
|
||||
final int sampleReadCount = readsBySampleIndex[sampleIndex].length;
|
||||
|
|
|
|||
Loading…
Reference in New Issue