Merge pull request #1290 from broadinstitute/rhl_sac_nonref

StrandAlleleCountsBySample selects most likely only from VCF alleles
This commit is contained in:
Ron Levine 2016-02-29 17:05:57 -05:00
commit 625941dc50
4 changed files with 22 additions and 30 deletions

View File

@ -139,7 +139,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
final Set<Allele> alleles = new HashSet<>(vc.getAlleles());
// make sure that there's a meaningful relationship between the alleles in the perReadAlleleLikelihoodMap and our VariantContext
if ( ! perReadAlleleLikelihoodMap.getAllelesSet().containsAll(alleles) )
if ( ! perReadAlleleLikelihoodMap.getAllelesSet().isEmpty() && ! perReadAlleleLikelihoodMap.getAllelesSet().containsAll(alleles) )
throw new IllegalStateException("VC alleles " + alleles + " not a strict subset of per read allele map alleles " + perReadAlleleLikelihoodMap.getAllelesSet());
final HashMap<Allele, Integer> alleleCounts = new HashMap<>();
@ -148,7 +148,6 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
for ( final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : perReadAlleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue(), alleles);
if (! a.isInformative() ) continue; // read is non-informative
final GATKSAMRecord read = el.getKey();
final int prevCount = alleleCounts.get(a.getMostLikelyAllele());
alleleCounts.put(a.getMostLikelyAllele(), prevCount + 1);
}

View File

@ -69,9 +69,7 @@ import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
* Number of forward and reverse reads that support each allele
@ -142,11 +140,16 @@ public class StrandAlleleCountsBySample extends GenotypeAnnotation {
if( stratifiedPerReadAlleleLikelihoodMap == null ) { throw new IllegalArgumentException("stratifiedPerReadAlleleLikelihoodMap cannot be null"); }
if( vc == null ) { throw new IllegalArgumentException("input vc cannot be null"); }
final Set<Allele> alleles = new HashSet<>(vc.getAlleles());
final int[] table = new int[vc.getNAlleles()*2];
for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
// make sure that there's a meaningful relationship between the alleles in the perReadAlleleLikelihoodMap and our VariantContext
if ( ! maps.getAllelesSet().isEmpty() && ! maps.getAllelesSet().containsAll(alleles) )
throw new IllegalStateException("VC alleles " + alleles + " not a strict subset of per read allele map alleles " + maps.getAllelesSet());
for (final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue(), alleles);
final GATKSAMRecord read = el.getKey();
if (mostLikelyAllele.isInformative())
updateTable(table, vc.getAlleleIndex(mostLikelyAllele.getAlleleIfInformative()), read);

View File

@ -423,36 +423,17 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
}
@Test
public void testStrandAlleleCountsBySample() throws IOException {
public void testStrandAlleleCountsBySample() {
final String MD5 = "93e424f866f03ac23f9ddac94401e348";
final WalkerTestSpec spec = new WalkerTestSpec(
"-T HaplotypeCaller --disableDithering " +
String.format("-R %s -I %s ", REF, CEUTRIO_BAM) +
"--no_cmdline_in_header -o %s -L 20:10130000-10134800 " +
"-A StrandBiasBySample -A StrandAlleleCountsBySample",
1, Arrays.asList("")
1, Arrays.asList(MD5)
);
spec.disableShadowBCF(); //TODO: Remove when BaseTest.assertAttributesEquals() works with SC
final File outputVCF = executeTest("testStrandAlleleCountsBySample", spec).getFirst().get(0);
//Confirm that SB and SAC are identical for bi-allelic variants
final VCFCodec codec = new VCFCodec();
final FileInputStream s = new FileInputStream(outputVCF);
final LineIterator lineIterator = codec.makeSourceFromStream(new PositionalBufferedStream(s));
codec.readHeader(lineIterator);
while (lineIterator.hasNext()) {
final String line = lineIterator.next();
Assert.assertFalse(line == null);
final VariantContext vc = codec.decode(line);
if (vc.isBiallelic()) {
for (final Genotype g : vc.getGenotypes()) {
Assert.assertTrue(g.hasExtendedAttribute("SB"));
Assert.assertTrue(g.hasExtendedAttribute("SAC"));
Assert.assertEquals(g.getExtendedAttribute("SB").toString(), g.getExtendedAttribute("SAC").toString());
}
}
}
spec.disableShadowBCF();
executeTest("testStrandAlleleCountsBySample", spec);
}
@Test(enabled = false)

View File

@ -343,4 +343,13 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
executeTest(" testASInsertSizeRankSum", spec);
}
@Test
public void testHaplotypeCallerMultiAllelicNonRef() {
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -A StrandAlleleCountsBySample",
b37KGReference, privateTestDir + "multiallelic-nonref.bam", "2:47641259-47641859", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("ba2df96afbf5fe2a59270b6e65c3fb4e"));
spec.disableShadowBCF();
executeTest(" testHaplotypeCallerMultiAllelicNonRef", spec);
}
}