Fix 1448 to make alt allele removal by likelihoods robust to ref allele indices (#1475)

* alt alle removal by likelihoods now robust to ref allele indices (no longer assumes 0-indexed ref)
This commit is contained in:
Steve Huang 2016-09-13 21:20:44 -04:00 committed by GitHub
parent f51d20c517
commit 091d05370b
2 changed files with 41 additions and 3 deletions

View File

@ -487,7 +487,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
* @param maxAlternativeAlleles maximum number of alternative alleles allowed.
* @return never {@code null}.
*/
private Set<Allele> excessAlternativeAlleles(final GenotypingLikelihoods<Allele> genotypeLikelihoods, final int maxAlternativeAlleles) {
@VisibleForTesting
static Set<Allele> excessAlternativeAlleles(final GenotypingLikelihoods<Allele> genotypeLikelihoods, final int maxAlternativeAlleles) {
final int alleleCount = genotypeLikelihoods.alleleCount();
final int excessAlternativeAlleleCount = Math.max(0, alleleCount - 1 - maxAlternativeAlleles);
if (excessAlternativeAlleleCount <= 0) {
@ -539,8 +540,11 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<AssemblyBa
}
});
for (int i = 1; i < alleleCount; i++) {
lessFrequentFirst.add(genotypeLikelihoods.alleleAt(i));
for (int i = 0; i < alleleCount; i++) {
final Allele a = genotypeLikelihoods.alleleAt(i);
if(a.isNonReference()){
lessFrequentFirst.add(genotypeLikelihoods.alleleAt(i));
}
}
final Set<Allele> result = new HashSet<>(excessAlternativeAlleleCount);

View File

@ -59,10 +59,15 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.*;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.gatk.utils.genotyper.AlleleList;
import org.broadinstitute.gatk.utils.genotyper.IndexedAlleleList;
import org.broadinstitute.gatk.utils.genotyper.IndexedSampleList;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.EventMap;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
@ -531,4 +536,33 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
Assert.assertEquals(uniqueGroups.size(), expectedNumGroups);
Assert.assertEquals(counter, expectedGroupSize);
}
@Test
public void testExcessAlternativeAllelesKeepRef(){
// prep data
final Allele ref = Allele.create("A", true);
final Allele altC = Allele.create("C", false);
final Allele altG = Allele.create("G", false);
final Allele altT = Allele.create("T", false);
final AlleleList<Allele> indexedAlleleList = new IndexedAlleleList<>(altC, altG, altT, ref);// specifically make the ref allele not at index 0
final IndexedSampleList indexedSampleList = new IndexedSampleList("Dummy");
final List<GATKSAMRecord> reads = new ArrayList<>();
for (int i=0; i<10; ++i) {
reads.add(GATKSAMRecord.createRandomRead(101));
}
final Map<String, List<GATKSAMRecord>> sampleToReads = Collections.singletonMap(indexedSampleList.sampleAt(0), reads);
final ReadLikelihoods<Allele> readLikelihoods = new ReadLikelihoods<>(indexedSampleList, indexedAlleleList, sampleToReads);
final PloidyModel ploidyModel = new HomogeneousPloidyModel(indexedSampleList, 2);
final GenotypingModel genotypingModel = new InfiniteRandomMatingPopulationModel();
final GenotypingLikelihoods<Allele> genotypeLikelihoods = genotypingModel.calculateLikelihoods(readLikelihoods, new GenotypingData<>(ploidyModel,readLikelihoods));
// test
final Set<Allele> excessAltAlleles = HaplotypeCallerGenotypingEngine.excessAlternativeAlleles(genotypeLikelihoods, 2);
Assert.assertFalse(excessAltAlleles.contains(ref));
Assert.assertEquals(excessAltAlleles.size(), 1);
}
}