Refactor StrandBiasTest (using template method) and add warnings for when annotations may not be calculated successfully.
VariantAnnotator/FS behavior changes slightly: VA used to output zeros for FS if there was no strand bias info, now skips FS output (but will still show FS in header)
This commit is contained in:
parent
86cb88e121
commit
b512c7eac9
|
|
@ -47,6 +47,8 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import cern.jet.math.Arithmetic;
|
||||
import htsjdk.variant.variantcontext.Genotype;
|
||||
import htsjdk.variant.variantcontext.GenotypesContext;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
|
|
@ -87,42 +89,34 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
|
|||
private static final int MIN_QUAL_FOR_FILTERED_TEST = 17;
|
||||
private static final int MIN_COUNT = 2;
|
||||
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ( !vc.isVariant() )
|
||||
return null;
|
||||
|
||||
if ( vc.hasGenotypes() ) {
|
||||
final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes(), MIN_COUNT );
|
||||
if ( tableFromPerSampleAnnotations != null ) {
|
||||
return pValueForBestTable(tableFromPerSampleAnnotations, null);
|
||||
}
|
||||
}
|
||||
|
||||
if (vc.isSNP() && stratifiedContexts != null) {
|
||||
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
|
||||
final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST, MIN_COUNT);
|
||||
printTable("unfiltered", tableNoFiltering);
|
||||
printTable("filtered", tableFiltering);
|
||||
return pValueForBestTable(tableFiltering, tableNoFiltering);
|
||||
}
|
||||
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
|
||||
// either SNP with no alignment context, or indels: per-read likelihood map needed
|
||||
final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT);
|
||||
//logger.info("VC " + vc);
|
||||
//printTable(table, 0.0);
|
||||
return pValueForBestTable(table, null);
|
||||
}
|
||||
else
|
||||
// for non-snp variants, we need per-read likelihoods.
|
||||
// for snps, we can get same result from simple pileup
|
||||
return null;
|
||||
@Override
|
||||
protected Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes){
|
||||
final int[][] tableFromPerSampleAnnotations = getTableFromSamples( genotypes, MIN_COUNT );
|
||||
return ( tableFromPerSampleAnnotations != null )? pValueForBestTable(tableFromPerSampleAnnotations, null) : null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, Object> calculateAnnotationFromStratifiedContexts(final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc){
|
||||
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
|
||||
final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST, MIN_COUNT);
|
||||
printTable("unfiltered", tableNoFiltering);
|
||||
printTable("filtered", tableFiltering);
|
||||
return pValueForBestTable(tableFiltering, tableNoFiltering);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, Object> calculateAnnotationFromLikelihoodMap(final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
|
||||
final VariantContext vc){
|
||||
// either SNP with no alignment context, or indels: per-read likelihood map needed
|
||||
final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT);
|
||||
//logger.info("VC " + vc);
|
||||
//printTable(table, 0.0);
|
||||
return pValueForBestTable(table, null);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Create an annotation for the highest (i.e., least significant) p-value of table1 and table2
|
||||
*
|
||||
|
|
|
|||
|
|
@ -48,7 +48,15 @@ package org.broadinstitute.gatk.tools.walkers.annotator;
|
|||
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import htsjdk.variant.vcf.VCFFormatHeaderLine;
|
||||
import htsjdk.variant.vcf.VCFHeaderLine;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import htsjdk.variant.variantcontext.Genotype;
|
||||
import htsjdk.variant.variantcontext.GenotypesContext;
|
||||
|
|
@ -64,6 +72,83 @@ import java.util.*;
|
|||
* Class of tests to detect strand bias.
|
||||
*/
|
||||
public abstract class StrandBiasTest extends InfoFieldAnnotation {
|
||||
private final static Logger logger = Logger.getLogger(StrandBiasTest.class);
|
||||
|
||||
@Override
|
||||
public void initialize(final AnnotatorCompatible walker, final GenomeAnalysisEngine toolkit, final Set<VCFHeaderLine> headerLines) {
|
||||
boolean hasSBBSannotation = false;
|
||||
for ( final VCFHeaderLine line : headerLines) {
|
||||
if ( line instanceof VCFFormatHeaderLine) {
|
||||
final VCFFormatHeaderLine formatline = (VCFFormatHeaderLine)line;
|
||||
if ( formatline.getID().equals(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME) ) {
|
||||
hasSBBSannotation = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hasSBBSannotation) {
|
||||
logger.info("StrandBiasBySample annotation exists in input VCF header. Attempting to use StrandBiasBySample " +
|
||||
"values to calculate strand bias annotation values. If no sample has the SB genotype annotation, annotation may still fail.");
|
||||
return;
|
||||
}
|
||||
|
||||
boolean hasReads = toolkit.getReadsDataSource().getReaderIDs().size() > 0;
|
||||
if (hasReads) {
|
||||
logger.info("SAM/BAM data was found. Attempting to use read data to calculate strand bias annotations values.");
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(new String("No StrandBiasBySample annotation or read data was found. Strand bias annotations will not be output."));
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
//template method for calculating strand bias annotations using the three different methods
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String,AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ( !vc.isVariant() )
|
||||
return null;
|
||||
|
||||
if ( vc.hasGenotypes() ) {
|
||||
boolean hasSB = false;
|
||||
for (final Genotype g : vc.getGenotypes()) {
|
||||
if (g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME)) {
|
||||
hasSB = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (hasSB)
|
||||
return calculateAnnotationFromGTfield(vc.getGenotypes());
|
||||
}
|
||||
|
||||
//stratifiedContexts can come come from VariantAnnotator, but will be size 0 if no reads were provided
|
||||
if (vc.isSNP() && stratifiedContexts != null && stratifiedContexts.size() > 0) {
|
||||
return calculateAnnotationFromStratifiedContexts(stratifiedContexts, vc);
|
||||
}
|
||||
|
||||
//stratifiedPerReadAllelelikelihoodMap can come from HaplotypeCaller call to VariantAnnotatorEngine
|
||||
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
|
||||
return calculateAnnotationFromLikelihoodMap(stratifiedPerReadAlleleLikelihoodMap, vc);
|
||||
}
|
||||
else
|
||||
// for non-snp variants, we need per-read likelihoods.
|
||||
// for snps, we can get same result from simple pileup
|
||||
return null;
|
||||
}
|
||||
|
||||
protected abstract Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes);
|
||||
|
||||
protected abstract Map<String, Object> calculateAnnotationFromStratifiedContexts(final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc);
|
||||
|
||||
protected abstract Map<String, Object> calculateAnnotationFromLikelihoodMap(final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
|
||||
final VariantContext vc);
|
||||
|
||||
/**
|
||||
* Create the contingency table by retrieving the per-sample strand bias annotation and adding them together
|
||||
* @param genotypes the genotypes from which to pull out the per-sample strand bias annotation
|
||||
|
|
@ -108,9 +193,9 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
|
|||
final int minCount ) {
|
||||
int[][] table = new int[2][2];
|
||||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
||||
for (final Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
||||
final int[] myTable = new int[4];
|
||||
for (PileupElement p : sample.getValue().getBasePileup()) {
|
||||
for (final PileupElement p : sample.getValue().getBasePileup()) {
|
||||
|
||||
if ( ! isUsableBase(p) ) // ignore deletions and bad MQ
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -46,6 +46,8 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import htsjdk.variant.variantcontext.Genotype;
|
||||
import htsjdk.variant.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
|
|
@ -79,38 +81,30 @@ public class StrandOddsRatio extends StrandBiasTest implements ActiveRegionBased
|
|||
private static final String SOR = "SOR";
|
||||
|
||||
@Override
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String,AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ( !vc.isVariant() )
|
||||
return null;
|
||||
|
||||
if ( vc.hasGenotypes() ) {
|
||||
final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes(), MIN_COUNT );
|
||||
if ( tableFromPerSampleAnnotations != null ) {
|
||||
final double ratio = calculateSOR(tableFromPerSampleAnnotations);
|
||||
return annotationForOneTable(ratio);
|
||||
}
|
||||
}
|
||||
|
||||
if (vc.isSNP() && stratifiedContexts != null) {
|
||||
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
|
||||
final double ratio = calculateSOR(tableNoFiltering);
|
||||
protected Map<String, Object> calculateAnnotationFromGTfield(GenotypesContext genotypes){
|
||||
final int[][] tableFromPerSampleAnnotations = getTableFromSamples( genotypes, MIN_COUNT );
|
||||
if ( tableFromPerSampleAnnotations != null ) {
|
||||
final double ratio = calculateSOR(tableFromPerSampleAnnotations);
|
||||
return annotationForOneTable(ratio);
|
||||
}
|
||||
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
|
||||
// either SNP with no alignment context, or indels: per-read likelihood map needed
|
||||
final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT);
|
||||
final double ratio = calculateSOR(table);
|
||||
return annotationForOneTable(ratio);
|
||||
}
|
||||
else
|
||||
// for non-snp variants, we need per-read likelihoods.
|
||||
// for snps, we can get same result from simple pileup
|
||||
return null;
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, Object> calculateAnnotationFromStratifiedContexts(Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc){
|
||||
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
|
||||
final double ratio = calculateSOR(tableNoFiltering);
|
||||
return annotationForOneTable(ratio);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, Object> calculateAnnotationFromLikelihoodMap(Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
|
||||
final VariantContext vc){
|
||||
// either SNP with no alignment context, or indels: per-read likelihood map needed
|
||||
final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT);
|
||||
final double ratio = calculateSOR(table);
|
||||
return annotationForOneTable(ratio);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -155,7 +155,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoReads() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("1c423b7730b9805e7b885ece924286e0"));
|
||||
Arrays.asList("e86d04320dc9a597c1f58ad4b03b33c3"));
|
||||
executeTest("not passing it any reads", spec);
|
||||
}
|
||||
|
||||
|
|
@ -163,7 +163,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("54d7d5bb9404652857adf5e50d995f30"));
|
||||
Arrays.asList("bcccbe1c8572d98fa73d824d80ea0a05"));
|
||||
executeTest("getting DB tag with dbSNP", spec);
|
||||
}
|
||||
|
||||
|
|
@ -171,7 +171,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testMultipleIdsWithDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3withIDs.vcf -L " + privateTestDir + "vcfexample3withIDs.vcf", 1,
|
||||
Arrays.asList("5fe63e511061ed4f91d938e72e7e3c39"));
|
||||
Arrays.asList("e9d82ed45249833803aad9ebf2a686c2"));
|
||||
executeTest("adding multiple IDs with dbSNP", spec);
|
||||
}
|
||||
|
||||
|
|
@ -179,7 +179,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithHapMap() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("cc7184263975595a6e2473d153227146"));
|
||||
Arrays.asList("2ff3d4bdaa9e0cc2688b26fed601c184"));
|
||||
executeTest("getting DB tag with HM3", spec);
|
||||
}
|
||||
|
||||
|
|
@ -187,7 +187,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithTwoComps() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf --comp:foo " + privateTestDir + "fakeHM3.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("6afbf05090ae139f53467cf6e0e71cf4"));
|
||||
Arrays.asList("dbffbec98dc60850a6bde11540e8040c"));
|
||||
executeTest("getting DB tag with 2 comps", spec);
|
||||
}
|
||||
|
||||
|
|
@ -203,7 +203,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testUsingExpression() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.AF -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("2b0e8cdfd691779befc5ac123d1a1887"));
|
||||
Arrays.asList("fb00c3b512926f8036829e53fd7f369a"));
|
||||
executeTest("using expression", spec);
|
||||
}
|
||||
|
||||
|
|
@ -211,7 +211,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testUsingExpressionWithID() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.ID -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("3de1d1998203518098ffae233f3e2352"));
|
||||
Arrays.asList("915f72e6cbeb7a77cd2746b4a99a6d64"));
|
||||
executeTest("using expression with ID", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue