StrandAlleleCountsBySample selects most likely only from VCF alleles
This commit is contained in:
parent
3b327ac0e4
commit
80a22aad77
|
|
@ -139,7 +139,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
||||||
final Set<Allele> alleles = new HashSet<>(vc.getAlleles());
|
final Set<Allele> alleles = new HashSet<>(vc.getAlleles());
|
||||||
|
|
||||||
// make sure that there's a meaningful relationship between the alleles in the perReadAlleleLikelihoodMap and our VariantContext
|
// make sure that there's a meaningful relationship between the alleles in the perReadAlleleLikelihoodMap and our VariantContext
|
||||||
if ( ! perReadAlleleLikelihoodMap.getAllelesSet().containsAll(alleles) )
|
if ( ! perReadAlleleLikelihoodMap.getAllelesSet().isEmpty() && ! perReadAlleleLikelihoodMap.getAllelesSet().containsAll(alleles) )
|
||||||
throw new IllegalStateException("VC alleles " + alleles + " not a strict subset of per read allele map alleles " + perReadAlleleLikelihoodMap.getAllelesSet());
|
throw new IllegalStateException("VC alleles " + alleles + " not a strict subset of per read allele map alleles " + perReadAlleleLikelihoodMap.getAllelesSet());
|
||||||
|
|
||||||
final HashMap<Allele, Integer> alleleCounts = new HashMap<>();
|
final HashMap<Allele, Integer> alleleCounts = new HashMap<>();
|
||||||
|
|
@ -148,7 +148,6 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
||||||
for ( final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : perReadAlleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
|
for ( final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : perReadAlleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
|
||||||
final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue(), alleles);
|
final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue(), alleles);
|
||||||
if (! a.isInformative() ) continue; // read is non-informative
|
if (! a.isInformative() ) continue; // read is non-informative
|
||||||
final GATKSAMRecord read = el.getKey();
|
|
||||||
final int prevCount = alleleCounts.get(a.getMostLikelyAllele());
|
final int prevCount = alleleCounts.get(a.getMostLikelyAllele());
|
||||||
alleleCounts.put(a.getMostLikelyAllele(), prevCount + 1);
|
alleleCounts.put(a.getMostLikelyAllele(), prevCount + 1);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -69,9 +69,7 @@ import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||||
|
|
||||||
import java.util.Collections;
|
import java.util.*;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Number of forward and reverse reads that support each allele
|
* Number of forward and reverse reads that support each allele
|
||||||
|
|
@ -142,11 +140,16 @@ public class StrandAlleleCountsBySample extends GenotypeAnnotation {
|
||||||
if( stratifiedPerReadAlleleLikelihoodMap == null ) { throw new IllegalArgumentException("stratifiedPerReadAlleleLikelihoodMap cannot be null"); }
|
if( stratifiedPerReadAlleleLikelihoodMap == null ) { throw new IllegalArgumentException("stratifiedPerReadAlleleLikelihoodMap cannot be null"); }
|
||||||
if( vc == null ) { throw new IllegalArgumentException("input vc cannot be null"); }
|
if( vc == null ) { throw new IllegalArgumentException("input vc cannot be null"); }
|
||||||
|
|
||||||
|
final Set<Allele> alleles = new HashSet<>(vc.getAlleles());
|
||||||
|
|
||||||
final int[] table = new int[vc.getNAlleles()*2];
|
final int[] table = new int[vc.getNAlleles()*2];
|
||||||
|
|
||||||
for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
|
for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
|
||||||
|
// make sure that there's a meaningful relationship between the alleles in the perReadAlleleLikelihoodMap and our VariantContext
|
||||||
|
if ( ! maps.getAllelesSet().isEmpty() && ! maps.getAllelesSet().containsAll(alleles) )
|
||||||
|
throw new IllegalStateException("VC alleles " + alleles + " not a strict subset of per read allele map alleles " + maps.getAllelesSet());
|
||||||
for (final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
|
for (final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
|
||||||
final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
|
final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue(), alleles);
|
||||||
final GATKSAMRecord read = el.getKey();
|
final GATKSAMRecord read = el.getKey();
|
||||||
if (mostLikelyAllele.isInformative())
|
if (mostLikelyAllele.isInformative())
|
||||||
updateTable(table, vc.getAlleleIndex(mostLikelyAllele.getAlleleIfInformative()), read);
|
updateTable(table, vc.getAlleleIndex(mostLikelyAllele.getAlleleIfInformative()), read);
|
||||||
|
|
|
||||||
|
|
@ -422,36 +422,17 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testStrandAlleleCountsBySample() throws IOException {
|
public void testStrandAlleleCountsBySample() {
|
||||||
|
final String MD5 = "93e424f866f03ac23f9ddac94401e348";
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(
|
final WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T HaplotypeCaller --disableDithering " +
|
"-T HaplotypeCaller --disableDithering " +
|
||||||
String.format("-R %s -I %s ", REF, CEUTRIO_BAM) +
|
String.format("-R %s -I %s ", REF, CEUTRIO_BAM) +
|
||||||
"--no_cmdline_in_header -o %s -L 20:10130000-10134800 " +
|
"--no_cmdline_in_header -o %s -L 20:10130000-10134800 " +
|
||||||
"-A StrandBiasBySample -A StrandAlleleCountsBySample",
|
"-A StrandBiasBySample -A StrandAlleleCountsBySample",
|
||||||
1, Arrays.asList("")
|
1, Arrays.asList(MD5)
|
||||||
);
|
);
|
||||||
spec.disableShadowBCF(); //TODO: Remove when BaseTest.assertAttributesEquals() works with SC
|
spec.disableShadowBCF();
|
||||||
final File outputVCF = executeTest("testStrandAlleleCountsBySample", spec).getFirst().get(0);
|
executeTest("testStrandAlleleCountsBySample", spec);
|
||||||
|
|
||||||
//Confirm that SB and SAC are identical for bi-allelic variants
|
|
||||||
final VCFCodec codec = new VCFCodec();
|
|
||||||
final FileInputStream s = new FileInputStream(outputVCF);
|
|
||||||
final LineIterator lineIterator = codec.makeSourceFromStream(new PositionalBufferedStream(s));
|
|
||||||
codec.readHeader(lineIterator);
|
|
||||||
|
|
||||||
while (lineIterator.hasNext()) {
|
|
||||||
final String line = lineIterator.next();
|
|
||||||
Assert.assertFalse(line == null);
|
|
||||||
final VariantContext vc = codec.decode(line);
|
|
||||||
|
|
||||||
if (vc.isBiallelic()) {
|
|
||||||
for (final Genotype g : vc.getGenotypes()) {
|
|
||||||
Assert.assertTrue(g.hasExtendedAttribute("SB"));
|
|
||||||
Assert.assertTrue(g.hasExtendedAttribute("SAC"));
|
|
||||||
Assert.assertEquals(g.getExtendedAttribute("SB").toString(), g.getExtendedAttribute("SAC").toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test(enabled = false)
|
||||||
|
|
|
||||||
|
|
@ -343,4 +343,13 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
||||||
executeTest(" testASInsertSizeRankSum", spec);
|
executeTest(" testASInsertSizeRankSum", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHaplotypeCallerMultiAllelicNonRef() {
|
||||||
|
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -A StrandAlleleCountsBySample",
|
||||||
|
b37KGReference, privateTestDir + "multiallelic-nonref.bam", "2:47641259-47641859", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
|
||||||
|
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("ba2df96afbf5fe2a59270b6e65c3fb4e"));
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest(" testHaplotypeCallerMultiAllelicNonRef", spec);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue