Merge pull request #708 from broadinstitute/ldg_SBannotationWarnings

Refactor StrandBiasTest (using template method) and add warnings for whe...
This commit is contained in:
Eric Banks 2014-08-20 09:30:06 -04:00
commit 78c2da1fef
4 changed files with 146 additions and 73 deletions

View File

@ -47,6 +47,8 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import cern.jet.math.Arithmetic;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypesContext;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
@ -87,42 +89,34 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
private static final int MIN_QUAL_FOR_FILTERED_TEST = 17;
private static final int MIN_COUNT = 2;
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
if ( !vc.isVariant() )
return null;
if ( vc.hasGenotypes() ) {
final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes(), MIN_COUNT );
if ( tableFromPerSampleAnnotations != null ) {
return pValueForBestTable(tableFromPerSampleAnnotations, null);
}
}
if (vc.isSNP() && stratifiedContexts != null) {
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST, MIN_COUNT);
printTable("unfiltered", tableNoFiltering);
printTable("filtered", tableFiltering);
return pValueForBestTable(tableFiltering, tableNoFiltering);
}
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
// either SNP with no alignment context, or indels: per-read likelihood map needed
final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT);
//logger.info("VC " + vc);
//printTable(table, 0.0);
return pValueForBestTable(table, null);
}
else
// for non-snp variants, we need per-read likelihoods.
// for snps, we can get same result from simple pileup
return null;
@Override
protected Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes){
final int[][] tableFromPerSampleAnnotations = getTableFromSamples( genotypes, MIN_COUNT );
return ( tableFromPerSampleAnnotations != null )? pValueForBestTable(tableFromPerSampleAnnotations, null) : null;
}
@Override
protected Map<String, Object> calculateAnnotationFromStratifiedContexts(final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc){
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST, MIN_COUNT);
printTable("unfiltered", tableNoFiltering);
printTable("filtered", tableFiltering);
return pValueForBestTable(tableFiltering, tableNoFiltering);
}
@Override
protected Map<String, Object> calculateAnnotationFromLikelihoodMap(final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
final VariantContext vc){
// either SNP with no alignment context, or indels: per-read likelihood map needed
final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT);
//logger.info("VC " + vc);
//printTable(table, 0.0);
return pValueForBestTable(table, null);
}
/**
* Create an annotation for the highest (i.e., least significant) p-value of table1 and table2
*

View File

@ -48,7 +48,15 @@ package org.broadinstitute.gatk.tools.walkers.annotator;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFFormatHeaderLine;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypesContext;
@ -64,6 +72,83 @@ import java.util.*;
* Class of tests to detect strand bias.
*/
public abstract class StrandBiasTest extends InfoFieldAnnotation {
private final static Logger logger = Logger.getLogger(StrandBiasTest.class);
@Override
public void initialize(final AnnotatorCompatible walker, final GenomeAnalysisEngine toolkit, final Set<VCFHeaderLine> headerLines) {
boolean hasSBBSannotation = false;
for ( final VCFHeaderLine line : headerLines) {
if ( line instanceof VCFFormatHeaderLine) {
final VCFFormatHeaderLine formatline = (VCFFormatHeaderLine)line;
if ( formatline.getID().equals(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME) ) {
hasSBBSannotation = true;
break;
}
}
}
if (hasSBBSannotation) {
logger.info("StrandBiasBySample annotation exists in input VCF header. Attempting to use StrandBiasBySample " +
"values to calculate strand bias annotation values. If no sample has the SB genotype annotation, annotation may still fail.");
return;
}
boolean hasReads = toolkit.getReadsDataSource().getReaderIDs().size() > 0;
if (hasReads) {
logger.info("SAM/BAM data was found. Attempting to use read data to calculate strand bias annotations values.");
return;
}
logger.info(new String("No StrandBiasBySample annotation or read data was found. Strand bias annotations will not be output."));
}
@Override
//template method for calculating strand bias annotations using the three different methods
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
final Map<String,AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
if ( !vc.isVariant() )
return null;
if ( vc.hasGenotypes() ) {
boolean hasSB = false;
for (final Genotype g : vc.getGenotypes()) {
if (g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME)) {
hasSB = true;
break;
}
}
if (hasSB)
return calculateAnnotationFromGTfield(vc.getGenotypes());
}
//stratifiedContexts can come come from VariantAnnotator, but will be size 0 if no reads were provided
if (vc.isSNP() && stratifiedContexts != null && stratifiedContexts.size() > 0) {
return calculateAnnotationFromStratifiedContexts(stratifiedContexts, vc);
}
//stratifiedPerReadAllelelikelihoodMap can come from HaplotypeCaller call to VariantAnnotatorEngine
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
return calculateAnnotationFromLikelihoodMap(stratifiedPerReadAlleleLikelihoodMap, vc);
}
else
// for non-snp variants, we need per-read likelihoods.
// for snps, we can get same result from simple pileup
return null;
}
protected abstract Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes);
protected abstract Map<String, Object> calculateAnnotationFromStratifiedContexts(final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc);
protected abstract Map<String, Object> calculateAnnotationFromLikelihoodMap(final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
final VariantContext vc);
/**
* Create the contingency table by retrieving the per-sample strand bias annotation and adding them together
* @param genotypes the genotypes from which to pull out the per-sample strand bias annotation
@ -108,9 +193,9 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
final int minCount ) {
int[][] table = new int[2][2];
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
for (final Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
final int[] myTable = new int[4];
for (PileupElement p : sample.getValue().getBasePileup()) {
for (final PileupElement p : sample.getValue().getBasePileup()) {
if ( ! isUsableBase(p) ) // ignore deletions and bad MQ
continue;

View File

@ -46,6 +46,8 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypesContext;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
@ -79,38 +81,30 @@ public class StrandOddsRatio extends StrandBiasTest implements ActiveRegionBased
private static final String SOR = "SOR";
@Override
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
final Map<String,AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
if ( !vc.isVariant() )
return null;
if ( vc.hasGenotypes() ) {
final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes(), MIN_COUNT );
if ( tableFromPerSampleAnnotations != null ) {
final double ratio = calculateSOR(tableFromPerSampleAnnotations);
return annotationForOneTable(ratio);
}
}
if (vc.isSNP() && stratifiedContexts != null) {
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
final double ratio = calculateSOR(tableNoFiltering);
protected Map<String, Object> calculateAnnotationFromGTfield(GenotypesContext genotypes){
final int[][] tableFromPerSampleAnnotations = getTableFromSamples( genotypes, MIN_COUNT );
if ( tableFromPerSampleAnnotations != null ) {
final double ratio = calculateSOR(tableFromPerSampleAnnotations);
return annotationForOneTable(ratio);
}
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
// either SNP with no alignment context, or indels: per-read likelihood map needed
final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT);
final double ratio = calculateSOR(table);
return annotationForOneTable(ratio);
}
else
// for non-snp variants, we need per-read likelihoods.
// for snps, we can get same result from simple pileup
return null;
return null;
}
@Override
protected Map<String, Object> calculateAnnotationFromStratifiedContexts(Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc){
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
final double ratio = calculateSOR(tableNoFiltering);
return annotationForOneTable(ratio);
}
@Override
protected Map<String, Object> calculateAnnotationFromLikelihoodMap(Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
final VariantContext vc){
// either SNP with no alignment context, or indels: per-read likelihood map needed
final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT);
final double ratio = calculateSOR(table);
return annotationForOneTable(ratio);
}
/**

View File

@ -155,7 +155,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoReads() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("1c423b7730b9805e7b885ece924286e0"));
Arrays.asList("e86d04320dc9a597c1f58ad4b03b33c3"));
executeTest("not passing it any reads", spec);
}
@ -163,7 +163,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("54d7d5bb9404652857adf5e50d995f30"));
Arrays.asList("bcccbe1c8572d98fa73d824d80ea0a05"));
executeTest("getting DB tag with dbSNP", spec);
}
@ -171,7 +171,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testMultipleIdsWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3withIDs.vcf -L " + privateTestDir + "vcfexample3withIDs.vcf", 1,
Arrays.asList("5fe63e511061ed4f91d938e72e7e3c39"));
Arrays.asList("e9d82ed45249833803aad9ebf2a686c2"));
executeTest("adding multiple IDs with dbSNP", spec);
}
@ -179,7 +179,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithHapMap() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("cc7184263975595a6e2473d153227146"));
Arrays.asList("2ff3d4bdaa9e0cc2688b26fed601c184"));
executeTest("getting DB tag with HM3", spec);
}
@ -187,7 +187,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithTwoComps() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf --comp:foo " + privateTestDir + "fakeHM3.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("6afbf05090ae139f53467cf6e0e71cf4"));
Arrays.asList("dbffbec98dc60850a6bde11540e8040c"));
executeTest("getting DB tag with 2 comps", spec);
}
@ -203,7 +203,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testUsingExpression() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.AF -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("2b0e8cdfd691779befc5ac123d1a1887"));
Arrays.asList("fb00c3b512926f8036829e53fd7f369a"));
executeTest("using expression", spec);
}
@ -211,7 +211,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testUsingExpressionWithID() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.ID -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("3de1d1998203518098ffae233f3e2352"));
Arrays.asList("915f72e6cbeb7a77cd2746b4a99a6d64"));
executeTest("using expression with ID", spec);
}