Merge pull request #773 from broadinstitute/rhl_annotator_warning
Make annotators emit a warning if they can't be applied
This commit is contained in:
commit
bcac930aad
|
|
@ -51,13 +51,16 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||||
|
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller;
|
||||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.GenotypeAnnotation;
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||||
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.gatk.utils.genotyper.MostLikelyAllele;
|
import org.broadinstitute.gatk.utils.genotyper.MostLikelyAllele;
|
||||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
|
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||||
import htsjdk.variant.variantcontext.Allele;
|
import htsjdk.variant.variantcontext.Allele;
|
||||||
import htsjdk.variant.variantcontext.Genotype;
|
import htsjdk.variant.variantcontext.Genotype;
|
||||||
|
|
@ -91,6 +94,12 @@ import java.util.*;
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class DepthPerSampleHC extends GenotypeAnnotation {
|
public class DepthPerSampleHC extends GenotypeAnnotation {
|
||||||
|
private final static Logger logger = Logger.getLogger(DepthPerSampleHC.class);
|
||||||
|
private boolean walkerIdentityCheckWarningLogged = false;
|
||||||
|
private boolean alleleLikelihoodMapWarningLogged = false;
|
||||||
|
private boolean alleleLikelihoodMapSubsetWarningLogged = false;
|
||||||
|
|
||||||
|
@Override
|
||||||
public void annotate(final RefMetaDataTracker tracker,
|
public void annotate(final RefMetaDataTracker tracker,
|
||||||
final AnnotatorCompatible walker,
|
final AnnotatorCompatible walker,
|
||||||
final ReferenceContext ref,
|
final ReferenceContext ref,
|
||||||
|
|
@ -102,22 +111,42 @@ public class DepthPerSampleHC extends GenotypeAnnotation {
|
||||||
if ( g == null || !g.isCalled() || ( stratifiedContext == null && alleleLikelihoodMap == null) )
|
if ( g == null || !g.isCalled() || ( stratifiedContext == null && alleleLikelihoodMap == null) )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (alleleLikelihoodMap == null )
|
if ( !(walker instanceof HaplotypeCaller) ) {
|
||||||
throw new IllegalStateException("DepthPerSampleHC can only be used with likelihood based annotations in the HaplotypeCaller");
|
if ( !walkerIdentityCheckWarningLogged ) {
|
||||||
|
if ( walker != null )
|
||||||
|
logger.warn("Annotation will not be calculated, must be called from HaplotypeCaller, not " + walker.getClass().getName());
|
||||||
|
else
|
||||||
|
logger.warn("Annotation will not be calculated, must be called from HaplotypeCaller");
|
||||||
|
walkerIdentityCheckWarningLogged = true;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (alleleLikelihoodMap == null ){
|
||||||
|
if ( !alleleLikelihoodMapWarningLogged ) {
|
||||||
|
logger.warn("DepthPerSampleHC can only be used with likelihood based annotations in the HaplotypeCaller");
|
||||||
|
alleleLikelihoodMapWarningLogged = true;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// the depth for the HC is the sum of the informative alleles at this site. It's not perfect (as we cannot
|
// the depth for the HC is the sum of the informative alleles at this site. It's not perfect (as we cannot
|
||||||
// differentiate between reads that align over the event but aren't informative vs. those that aren't even
|
// differentiate between reads that align over the event but aren't informative vs. those that aren't even
|
||||||
// close) but it's a pretty good proxy and it matches with the AD field (i.e., sum(AD) = DP).
|
// close) but it's a pretty good proxy and it matches with the AD field (i.e., sum(AD) = DP).
|
||||||
int dp = 0;
|
int dp = 0;
|
||||||
|
|
||||||
if ( alleleLikelihoodMap.isEmpty() ) {
|
// there are reads
|
||||||
// there are no reads
|
if ( !alleleLikelihoodMap.isEmpty() ) {
|
||||||
} else {
|
|
||||||
final Set<Allele> alleles = new HashSet<>(vc.getAlleles());
|
final Set<Allele> alleles = new HashSet<>(vc.getAlleles());
|
||||||
|
|
||||||
// make sure that there's a meaningful relationship between the alleles in the perReadAlleleLikelihoodMap and our VariantContext
|
// make sure that there's a meaningful relationship between the alleles in the perReadAlleleLikelihoodMap and our VariantContext
|
||||||
if ( ! alleleLikelihoodMap.getAllelesSet().containsAll(alleles) )
|
if ( !alleleLikelihoodMap.getAllelesSet().containsAll(alleles) ) {
|
||||||
throw new IllegalStateException("VC alleles " + alleles + " not a strict subset of per read allele map alleles " + alleleLikelihoodMap.getAllelesSet());
|
if ( !alleleLikelihoodMapSubsetWarningLogged ) {
|
||||||
|
logger.warn("VC alleles " + alleles + " not a strict subset of per read allele map alleles " + alleleLikelihoodMap.getAllelesSet());
|
||||||
|
alleleLikelihoodMapSubsetWarningLogged = true;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
|
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
|
||||||
final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue(), alleles);
|
final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue(), alleles);
|
||||||
|
|
@ -130,10 +159,12 @@ public class DepthPerSampleHC extends GenotypeAnnotation {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public List<String> getKeyNames() {
|
public List<String> getKeyNames() {
|
||||||
return Collections.singletonList(VCFConstants.DEPTH_KEY);
|
return Collections.singletonList(VCFConstants.DEPTH_KEY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public List<VCFFormatHeaderLine> getDescriptions() {
|
public List<VCFFormatHeaderLine> getDescriptions() {
|
||||||
return Collections.singletonList(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY));
|
return Collections.singletonList(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -54,9 +54,9 @@ package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||||
import cern.jet.math.Arithmetic;
|
import cern.jet.math.Arithmetic;
|
||||||
import htsjdk.variant.variantcontext.GenotypesContext;
|
import htsjdk.variant.variantcontext.GenotypesContext;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||||
|
|
@ -94,7 +94,7 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
|
||||||
private static final String FS = "FS";
|
private static final String FS = "FS";
|
||||||
private static final double MIN_PVALUE = 1E-320;
|
private static final double MIN_PVALUE = 1E-320;
|
||||||
private static final int MIN_QUAL_FOR_FILTERED_TEST = 17;
|
private static final int MIN_QUAL_FOR_FILTERED_TEST = 17;
|
||||||
private static final int MIN_COUNT = 2;
|
private static final int MIN_COUNT = ARRAY_DIM;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes){
|
protected Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes){
|
||||||
|
|
@ -154,10 +154,12 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
|
||||||
return Collections.singletonMap(FS, value);
|
return Collections.singletonMap(FS, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public List<String> getKeyNames() {
|
public List<String> getKeyNames() {
|
||||||
return Collections.singletonList(FS);
|
return Collections.singletonList(FS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||||
return Collections.singletonList(new VCFInfoHeaderLine(FS, 1, VCFHeaderLineType.Float, "Phred-scaled p-value using Fisher's exact test to detect strand bias"));
|
return Collections.singletonList(new VCFInfoHeaderLine(FS, 1, VCFHeaderLineType.Float, "Phred-scaled p-value using Fisher's exact test to detect strand bias"));
|
||||||
}
|
}
|
||||||
|
|
@ -168,15 +170,19 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
|
||||||
* @return the array used by the per-sample Strand Bias annotation
|
* @return the array used by the per-sample Strand Bias annotation
|
||||||
*/
|
*/
|
||||||
public static List<Integer> getContingencyArray( final int[][] table ) {
|
public static List<Integer> getContingencyArray( final int[][] table ) {
|
||||||
if(table.length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); }
|
if(table.length != ARRAY_DIM || table[0].length != ARRAY_DIM) {
|
||||||
if(table[0].length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); }
|
logger.warn("Expecting a " + ARRAY_DIM + "x" + ARRAY_DIM + " strand bias table.");
|
||||||
final List<Integer> list = new ArrayList<>(4); // TODO - if we ever want to do something clever with multi-allelic sites this will need to change
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
final List<Integer> list = new ArrayList<>(ARRAY_SIZE); // TODO - if we ever want to do something clever with multi-allelic sites this will need to change
|
||||||
list.add(table[0][0]);
|
list.add(table[0][0]);
|
||||||
list.add(table[0][1]);
|
list.add(table[0][1]);
|
||||||
list.add(table[1][0]);
|
list.add(table[1][0]);
|
||||||
list.add(table[1][1]);
|
list.add(table[1][1]);
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Double pValueForContingencyTable(int[][] originalTable) {
|
private Double pValueForContingencyTable(int[][] originalTable) {
|
||||||
final int[][] normalizedTable = normalizeContingencyTable(originalTable);
|
final int[][] normalizedTable = normalizeContingencyTable(originalTable);
|
||||||
|
|
||||||
|
|
@ -231,9 +237,9 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
|
||||||
|
|
||||||
final double normalizationFactor = (double)sum / TARGET_TABLE_SIZE;
|
final double normalizationFactor = (double)sum / TARGET_TABLE_SIZE;
|
||||||
|
|
||||||
final int[][] normalized = new int[2][2];
|
final int[][] normalized = new int[ARRAY_DIM][ARRAY_DIM];
|
||||||
for ( int i = 0; i < 2; i++ ) {
|
for ( int i = 0; i < ARRAY_DIM; i++ ) {
|
||||||
for ( int j = 0; j < 2; j++ )
|
for ( int j = 0; j < ARRAY_DIM; j++ )
|
||||||
normalized[i][j] = (int)(table[i][j] / normalizationFactor);
|
normalized[i][j] = (int)(table[i][j] / normalizationFactor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -241,10 +247,10 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int [][] copyContingencyTable(int [][] t) {
|
private static int [][] copyContingencyTable(int [][] t) {
|
||||||
int[][] c = new int[2][2];
|
int[][] c = new int[ARRAY_DIM][ARRAY_DIM];
|
||||||
|
|
||||||
for ( int i = 0; i < 2; i++ )
|
for ( int i = 0; i < ARRAY_DIM; i++ )
|
||||||
for ( int j = 0; j < 2; j++ )
|
for ( int j = 0; j < ARRAY_DIM; j++ )
|
||||||
c[i][j] = t[i][j];
|
c[i][j] = t[i][j];
|
||||||
|
|
||||||
return c;
|
return c;
|
||||||
|
|
@ -270,21 +276,21 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean rotateTable(int[][] table) {
|
private static boolean rotateTable(int[][] table) {
|
||||||
table[0][0] -= 1;
|
table[0][0]--;
|
||||||
table[1][0] += 1;
|
table[1][0]++;
|
||||||
|
|
||||||
table[0][1] += 1;
|
table[0][1]++;
|
||||||
table[1][1] -= 1;
|
table[1][1]--;
|
||||||
|
|
||||||
return (table[0][0] >= 0 && table[1][1] >= 0);
|
return (table[0][0] >= 0 && table[1][1] >= 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean unrotateTable(int[][] table) {
|
private static boolean unrotateTable(int[][] table) {
|
||||||
table[0][0] += 1;
|
table[0][0]++;
|
||||||
table[1][0] -= 1;
|
table[1][0]--;
|
||||||
|
|
||||||
table[0][1] -= 1;
|
table[0][1]--;
|
||||||
table[1][1] += 1;
|
table[1][1]++;
|
||||||
|
|
||||||
return (table[0][1] >= 0 && table[1][0] >= 0);
|
return (table[0][1] >= 0 && table[1][0] >= 0);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,8 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.gatk.tools.walkers.genotyper.UnifiedGenotyper;
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
||||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||||
|
|
@ -86,17 +87,33 @@ import java.util.*;
|
||||||
* <p>HaplotypeCaller does not output this annotation because it already evaluates haplotype segregation internally. This annotation is only informative (and available) for variants called by Unified Genotyper.</p>
|
* <p>HaplotypeCaller does not output this annotation because it already evaluates haplotype segregation internally. This annotation is only informative (and available) for variants called by Unified Genotyper.</p>
|
||||||
*/
|
*/
|
||||||
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||||
|
private final static Logger logger = Logger.getLogger(HaplotypeScore.class);
|
||||||
|
private boolean walkerIdentityCheckWarningLogged = false;
|
||||||
|
|
||||||
private final static boolean DEBUG = false;
|
private final static boolean DEBUG = false;
|
||||||
private final static int MIN_CONTEXT_WING_SIZE = 10;
|
private final static int MIN_CONTEXT_WING_SIZE = 10;
|
||||||
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
|
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
|
||||||
private final static char REGEXP_WILDCARD = '.';
|
private final static char REGEXP_WILDCARD = '.';
|
||||||
|
|
||||||
|
@Override
|
||||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
final AnnotatorCompatible walker,
|
final AnnotatorCompatible walker,
|
||||||
final ReferenceContext ref,
|
final ReferenceContext ref,
|
||||||
final Map<String, AlignmentContext> stratifiedContexts,
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
final VariantContext vc,
|
final VariantContext vc,
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
|
// Can only call from UnifiedGenotyper
|
||||||
|
if ( !(walker instanceof UnifiedGenotyper) ) {
|
||||||
|
if ( !walkerIdentityCheckWarningLogged ) {
|
||||||
|
if ( walker != null )
|
||||||
|
logger.warn("Must be called from UnifiedGenotyper, not " + walker.getClass().getName());
|
||||||
|
else
|
||||||
|
logger.warn("Must be called from UnifiedGenotyper");
|
||||||
|
walkerIdentityCheckWarningLogged = true;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
if (vc.isSNP() && stratifiedContexts != null)
|
if (vc.isSNP() && stratifiedContexts != null)
|
||||||
return annotatePileup(ref, stratifiedContexts, vc);
|
return annotatePileup(ref, stratifiedContexts, vc);
|
||||||
else
|
else
|
||||||
|
|
@ -107,7 +124,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
||||||
final Map<String, AlignmentContext> stratifiedContexts,
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
final VariantContext vc) {
|
final VariantContext vc) {
|
||||||
|
|
||||||
if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here
|
if (stratifiedContexts.isEmpty()) // empty means that call was made by someone else and we have no data here
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
final AlignmentContext context = AlignmentContextUtils.joinContexts(stratifiedContexts.values());
|
final AlignmentContext context = AlignmentContextUtils.joinContexts(stratifiedContexts.values());
|
||||||
|
|
@ -393,10 +410,12 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
|
||||||
return mismatches - expected;
|
return mismatches - expected;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public List<String> getKeyNames() {
|
public List<String> getKeyNames() {
|
||||||
return Arrays.asList("HaplotypeScore");
|
return Arrays.asList("HaplotypeScore");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||||
return Arrays.asList(new VCFInfoHeaderLine("HaplotypeScore", 1, VCFHeaderLineType.Float, "Consistency of the site with at most two segregating haplotypes"));
|
return Arrays.asList(new VCFInfoHeaderLine("HaplotypeScore", 1, VCFHeaderLineType.Float, "Consistency of the site with at most two segregating haplotypes"));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -52,13 +52,14 @@
|
||||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||||
|
|
||||||
import htsjdk.tribble.util.popgen.HardyWeinbergCalculation;
|
import htsjdk.tribble.util.popgen.HardyWeinbergCalculation;
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAnnotation;
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
|
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||||
|
|
@ -66,10 +67,7 @@ import htsjdk.variant.variantcontext.Genotype;
|
||||||
import htsjdk.variant.variantcontext.GenotypesContext;
|
import htsjdk.variant.variantcontext.GenotypesContext;
|
||||||
import htsjdk.variant.variantcontext.VariantContext;
|
import htsjdk.variant.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.*;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -82,17 +80,20 @@ import java.util.Map;
|
||||||
*
|
*
|
||||||
* <h3>Caveats</h3>
|
* <h3>Caveats</h3>
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>This annotation requires multiple samples and a valid pedigree file.</li>
|
* <li>This annotation requires multiple samples.</li>
|
||||||
* <li>This is an experimental annotation. As such, it is unsupported; we do not make any guarantees that it will work properly, and you use it at your own risk.</li>
|
* <li>This is an experimental annotation. As such, it is unsupported; we do not make any guarantees that it will work properly, and you use it at your own risk.</li>
|
||||||
* <li>Low confidence genotypes are ignored, which may adversely affect HW ratios. More analysis is needed to determine the right thing to do when the genotyper cannot decide whether a given sample is heterozygous or homozygous variant.</li>
|
* <li>Low confidence genotypes are ignored, which may adversely affect HW ratios. More analysis is needed to determine the right thing to do when the genotyper cannot decide whether a given sample is heterozygous or homozygous variant.</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*/
|
*/
|
||||||
public class HardyWeinberg extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
public class HardyWeinberg extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
|
private final static Logger logger = Logger.getLogger(HardyWeinberg.class);
|
||||||
private static final int MIN_SAMPLES = 10;
|
private static final int MIN_SAMPLES = 10;
|
||||||
private static final int MIN_GENOTYPE_QUALITY = 10;
|
private static final int MIN_GENOTYPE_QUALITY = 10;
|
||||||
private static final int MIN_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10;
|
private static final int MIN_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10;
|
||||||
|
private boolean warningLogged = false;
|
||||||
|
|
||||||
|
@Override
|
||||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
final AnnotatorCompatible walker,
|
final AnnotatorCompatible walker,
|
||||||
final ReferenceContext ref,
|
final ReferenceContext ref,
|
||||||
|
|
@ -101,8 +102,13 @@ public class HardyWeinberg extends InfoFieldAnnotation implements ExperimentalAn
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
|
|
||||||
final GenotypesContext genotypes = vc.getGenotypes();
|
final GenotypesContext genotypes = vc.getGenotypes();
|
||||||
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
|
if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) {
|
||||||
|
if ( !warningLogged ) {
|
||||||
|
logger.warn("Too few genotypes");
|
||||||
|
warningLogged = true;
|
||||||
|
}
|
||||||
return null;
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
int refCount = 0;
|
int refCount = 0;
|
||||||
int hetCount = 0;
|
int hetCount = 0;
|
||||||
|
|
@ -136,7 +142,9 @@ public class HardyWeinberg extends InfoFieldAnnotation implements ExperimentalAn
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public List<String> getKeyNames() { return Arrays.asList("HW"); }
|
public List<String> getKeyNames() { return Arrays.asList("HW"); }
|
||||||
|
|
||||||
|
@Override
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("HW", 1, VCFHeaderLineType.Float, "Phred-scaled p-value for Hardy-Weinberg violation")); }
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("HW", 1, VCFHeaderLineType.Float, "Phred-scaled p-value for Hardy-Weinberg violation")); }
|
||||||
}
|
}
|
||||||
|
|
@ -52,6 +52,7 @@
|
||||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||||
|
|
||||||
import htsjdk.variant.variantcontext.Allele;
|
import htsjdk.variant.variantcontext.Allele;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -85,10 +86,12 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||||
|
|
||||||
|
private final static Logger logger = Logger.getLogger(InbreedingCoeff.class);
|
||||||
private static final int MIN_SAMPLES = 10;
|
private static final int MIN_SAMPLES = 10;
|
||||||
private static final String INBREEDING_COEFFICIENT_KEY_NAME = "InbreedingCoeff";
|
private static final String INBREEDING_COEFFICIENT_KEY_NAME = "InbreedingCoeff";
|
||||||
private Set<String> founderIds;
|
private Set<String> founderIds;
|
||||||
private int sampleCount;
|
private int sampleCount;
|
||||||
|
private boolean pedigreeCheckWarningLogged = false;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
|
@ -98,10 +101,20 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
|
||||||
final VariantContext vc,
|
final VariantContext vc,
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
|
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
|
||||||
//If available, get the founder IDs and cache them. the IC will only be computed on founders then.
|
//If available, get the founder IDs and cache them. the IC will only be computed on founders then.
|
||||||
if(founderIds == null && walker != null)
|
if(founderIds == null && walker != null) {
|
||||||
founderIds = ((Walker) walker).getSampleDB().getFounderIds();
|
founderIds = ((Walker) walker).getSampleDB().getFounderIds();
|
||||||
|
}
|
||||||
|
if ( founderIds == null || founderIds.isEmpty() ) {
|
||||||
|
if ( !pedigreeCheckWarningLogged ) {
|
||||||
|
logger.warn("Annotation will not be calculated, must provide a valid PED file (-ped) from the command line.");
|
||||||
|
pedigreeCheckWarningLogged = true;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
else{
|
||||||
return makeCoeffAnnotation(vc);
|
return makeCoeffAnnotation(vc);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
protected double calculateIC(final VariantContext vc, final GenotypesContext genotypes) {
|
protected double calculateIC(final VariantContext vc, final GenotypesContext genotypes) {
|
||||||
|
|
||||||
|
|
@ -124,7 +137,7 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
|
||||||
{
|
{
|
||||||
if (g.isHetNonRef()) {
|
if (g.isHetNonRef()) {
|
||||||
//all likelihoods go to homCount
|
//all likelihoods go to homCount
|
||||||
homCount += 1;
|
homCount++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -63,7 +64,6 @@ import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.gatk.engine.samples.MendelianViolation;
|
import org.broadinstitute.gatk.engine.samples.MendelianViolation;
|
||||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
|
||||||
import htsjdk.variant.variantcontext.VariantContext;
|
import htsjdk.variant.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -93,9 +93,12 @@ import java.util.*;
|
||||||
|
|
||||||
public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiringAnnotation {
|
public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiringAnnotation {
|
||||||
|
|
||||||
|
private final static Logger logger = Logger.getLogger(MVLikelihoodRatio.class);
|
||||||
private MendelianViolation mendelianViolation = null;
|
private MendelianViolation mendelianViolation = null;
|
||||||
public static final String MVLR_KEY = "MVLR";
|
public static final String MVLR_KEY = "MVLR";
|
||||||
private Set<Trio> trios;
|
private Set<Trio> trios;
|
||||||
|
private boolean walkerIdentityCheckWarningLogged = false;
|
||||||
|
private boolean pedigreeCheckWarningLogged = false;
|
||||||
|
|
||||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
final AnnotatorCompatible walker,
|
final AnnotatorCompatible walker,
|
||||||
|
|
@ -103,15 +106,31 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiri
|
||||||
final Map<String, AlignmentContext> stratifiedContexts,
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
final VariantContext vc,
|
final VariantContext vc,
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
|
|
||||||
|
// Can only be called from VariantAnnotator
|
||||||
|
if ( !(walker instanceof VariantAnnotator) ) {
|
||||||
|
if ( !walkerIdentityCheckWarningLogged ) {
|
||||||
|
if ( walker != null )
|
||||||
|
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName());
|
||||||
|
else
|
||||||
|
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
|
||||||
|
walkerIdentityCheckWarningLogged = true;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
if ( mendelianViolation == null ) {
|
if ( mendelianViolation == null ) {
|
||||||
|
// Must have a pedigree file
|
||||||
trios = ((Walker) walker).getSampleDB().getTrios();
|
trios = ((Walker) walker).getSampleDB().getTrios();
|
||||||
if ( trios.size() > 0 ) {
|
if ( trios.isEmpty() ) {
|
||||||
|
if ( !pedigreeCheckWarningLogged ) {
|
||||||
|
logger.warn("Annotation will not be calculated, mendelian violation annotation must provide a valid PED file (-ped) from the command line.");
|
||||||
|
pedigreeCheckWarningLogged = true;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Map<String,Object> attributeMap = new HashMap<String,Object>(1);
|
Map<String,Object> attributeMap = new HashMap<String,Object>(1);
|
||||||
//double pNoMV = 1.0;
|
//double pNoMV = 1.0;
|
||||||
|
|
@ -131,9 +150,11 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiri
|
||||||
return attributeMap;
|
return attributeMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
// return the descriptions used for the VCF INFO meta field
|
// return the names and descriptions used for the VCF INFO meta field
|
||||||
|
@Override
|
||||||
public List<String> getKeyNames() { return Arrays.asList(MVLR_KEY); }
|
public List<String> getKeyNames() { return Arrays.asList(MVLR_KEY); }
|
||||||
|
|
||||||
|
@Override
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(MVLR_KEY, 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(MVLR_KEY, 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); }
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -51,20 +51,21 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||||
|
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.gatk.engine.samples.Trio;
|
import org.broadinstitute.gatk.engine.samples.Trio;
|
||||||
import org.broadinstitute.gatk.engine.walkers.Walker;
|
import org.broadinstitute.gatk.engine.walkers.Walker;
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAnnotation;
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.RodRequiringAnnotation;
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||||
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
|
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.gatk.engine.samples.MendelianViolation;
|
import org.broadinstitute.gatk.engine.samples.MendelianViolation;
|
||||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
|
||||||
import htsjdk.variant.variantcontext.VariantContext;
|
import htsjdk.variant.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -93,6 +94,8 @@ import java.util.*;
|
||||||
|
|
||||||
public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringAnnotation, ExperimentalAnnotation {
|
public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringAnnotation, ExperimentalAnnotation {
|
||||||
|
|
||||||
|
private final static Logger logger = Logger.getLogger(PossibleDeNovo.class);
|
||||||
|
|
||||||
private MendelianViolation mendelianViolation = null;
|
private MendelianViolation mendelianViolation = null;
|
||||||
public static final String HI_CONF_DENOVO_KEY = "hiConfDeNovo";
|
public static final String HI_CONF_DENOVO_KEY = "hiConfDeNovo";
|
||||||
public static final String LO_CONF_DENOVO_KEY = "loConfDeNovo";
|
public static final String LO_CONF_DENOVO_KEY = "loConfDeNovo";
|
||||||
|
|
@ -101,6 +104,8 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
|
||||||
private final double percentOfSamplesCutoff = 0.001; //for many, many samples use 0.1% of samples as allele frequency threshold for de novos
|
private final double percentOfSamplesCutoff = 0.001; //for many, many samples use 0.1% of samples as allele frequency threshold for de novos
|
||||||
private final int flatNumberOfSamplesCutoff = 4;
|
private final int flatNumberOfSamplesCutoff = 4;
|
||||||
private Set<Trio> trios;
|
private Set<Trio> trios;
|
||||||
|
private boolean walkerIdentityCheckWarningLogged = false;
|
||||||
|
private boolean pedigreeCheckWarningLogged = false;
|
||||||
|
|
||||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
final AnnotatorCompatible walker,
|
final AnnotatorCompatible walker,
|
||||||
|
|
@ -108,15 +113,29 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
|
||||||
final Map<String, AlignmentContext> stratifiedContexts,
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
final VariantContext vc,
|
final VariantContext vc,
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
|
|
||||||
|
if ( !(walker instanceof VariantAnnotator ) ) {
|
||||||
|
if ( !walkerIdentityCheckWarningLogged ) {
|
||||||
|
if ( walker != null )
|
||||||
|
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName());
|
||||||
|
else
|
||||||
|
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
|
||||||
|
walkerIdentityCheckWarningLogged = true;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
if ( mendelianViolation == null ) {
|
if ( mendelianViolation == null ) {
|
||||||
trios = ((Walker) walker).getSampleDB().getTrios();
|
trios = ((Walker) walker).getSampleDB().getTrios();
|
||||||
if ( trios.size() > 0 ) {
|
if ( trios.isEmpty() ) {
|
||||||
|
if ( !pedigreeCheckWarningLogged ) {
|
||||||
|
logger.warn("Annotation will not be calculated, must provide a valid PED file (-ped) from the command line.");
|
||||||
|
pedigreeCheckWarningLogged = true;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
throw new UserException("Possible de novos annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final Map<String,Object> attributeMap = new HashMap<String,Object>(1);
|
final Map<String,Object> attributeMap = new HashMap<String,Object>(1);
|
||||||
boolean isHighConfDeNovo = false;
|
boolean isHighConfDeNovo = false;
|
||||||
|
|
@ -152,8 +171,10 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
|
||||||
}
|
}
|
||||||
|
|
||||||
// return the descriptions used for the VCF INFO meta field
|
// return the descriptions used for the VCF INFO meta field
|
||||||
|
@Override
|
||||||
public List<String> getKeyNames() { return Arrays.asList(HI_CONF_DENOVO_KEY,LO_CONF_DENOVO_KEY); }
|
public List<String> getKeyNames() { return Arrays.asList(HI_CONF_DENOVO_KEY,LO_CONF_DENOVO_KEY); }
|
||||||
|
|
||||||
|
@Override
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(HI_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "High confidence possible de novo mutation (GQ >= "+hi_GQ_threshold+" for all trio members)=[comma-delimited list of child samples]"),
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(HI_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "High confidence possible de novo mutation (GQ >= "+hi_GQ_threshold+" for all trio members)=[comma-delimited list of child samples]"),
|
||||||
new VCFInfoHeaderLine(LO_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "Low confidence possible de novo mutation (GQ >= "+lo_GQ_threshold+" for child, GQ > 0 for parents)=[comma-delimited list of child samples]")); }
|
new VCFInfoHeaderLine(LO_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "Low confidence possible de novo mutation (GQ >= "+lo_GQ_threshold+" for child, GQ > 0 for parents)=[comma-delimited list of child samples]")); }
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -51,22 +51,21 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||||
|
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
import org.broadinstitute.gatk.tools.walkers.genotyper.UnifiedGenotyper;
|
||||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
|
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
||||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||||
import htsjdk.variant.variantcontext.VariantContext;
|
import htsjdk.variant.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.*;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -83,13 +82,29 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
|
public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
|
private final static Logger logger = Logger.getLogger(SpanningDeletions.class);
|
||||||
|
private boolean walkerIdentityCheckWarningLogged = false;
|
||||||
|
|
||||||
|
@Override
|
||||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
final AnnotatorCompatible walker,
|
final AnnotatorCompatible walker,
|
||||||
final ReferenceContext ref,
|
final ReferenceContext ref,
|
||||||
final Map<String, AlignmentContext> stratifiedContexts,
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
final VariantContext vc,
|
final VariantContext vc,
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
if ( stratifiedContexts.size() == 0 )
|
// Can only call from UnifiedGenotyper
|
||||||
|
if ( !(walker instanceof UnifiedGenotyper) ) {
|
||||||
|
if ( !walkerIdentityCheckWarningLogged ) {
|
||||||
|
if ( walker != null )
|
||||||
|
logger.warn("Annotation will not be calculated, must be called from UnifiedGenotyper, not " + walker.getClass().getName());
|
||||||
|
else
|
||||||
|
logger.warn("Annotation will not be calculated, must be called from UnifiedGenotyper");
|
||||||
|
walkerIdentityCheckWarningLogged = true;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( stratifiedContexts.isEmpty() )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// not meaningful when we're at an indel location: deletions that start at location N are by definition called at the position N-1, and at position N-1
|
// not meaningful when we're at an indel location: deletions that start at location N are by definition called at the position N-1, and at position N-1
|
||||||
|
|
@ -111,7 +126,9 @@ public class SpanningDeletions extends InfoFieldAnnotation implements StandardAn
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public List<String> getKeyNames() { return Arrays.asList("Dels"); }
|
public List<String> getKeyNames() { return Arrays.asList("Dels"); }
|
||||||
|
|
||||||
|
@Override
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Dels", 1, VCFHeaderLineType.Float, "Fraction of Reads Containing Spanning Deletions")); }
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Dels", 1, VCFHeaderLineType.Float, "Fraction of Reads Containing Spanning Deletions")); }
|
||||||
}
|
}
|
||||||
|
|
@ -53,6 +53,7 @@ package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||||
|
|
||||||
import htsjdk.variant.variantcontext.Allele;
|
import htsjdk.variant.variantcontext.Allele;
|
||||||
import htsjdk.variant.variantcontext.VariantContext;
|
import htsjdk.variant.variantcontext.VariantContext;
|
||||||
|
import htsjdk.variant.vcf.VCFConstants;
|
||||||
import htsjdk.variant.vcf.VCFFormatHeaderLine;
|
import htsjdk.variant.vcf.VCFFormatHeaderLine;
|
||||||
import htsjdk.variant.vcf.VCFHeaderLine;
|
import htsjdk.variant.vcf.VCFHeaderLine;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
@ -77,34 +78,33 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
public abstract class StrandBiasTest extends InfoFieldAnnotation {
|
public abstract class StrandBiasTest extends InfoFieldAnnotation {
|
||||||
private final static Logger logger = Logger.getLogger(StrandBiasTest.class);
|
private final static Logger logger = Logger.getLogger(StrandBiasTest.class);
|
||||||
|
private static boolean stratifiedPerReadAlleleLikelihoodMapWarningLogged = false;
|
||||||
|
private static boolean inputVariantContextWarningLogged = false;
|
||||||
|
private static boolean getTableFromSamplesWarningLogged = false;
|
||||||
|
private static boolean decodeSBBSWarningLogged = false;
|
||||||
|
|
||||||
|
protected static final int ARRAY_DIM = 2;
|
||||||
|
protected static final int ARRAY_SIZE = ARRAY_DIM * ARRAY_DIM;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void initialize(final AnnotatorCompatible walker, final GenomeAnalysisEngine toolkit, final Set<VCFHeaderLine> headerLines) {
|
public void initialize(final AnnotatorCompatible walker, final GenomeAnalysisEngine toolkit, final Set<VCFHeaderLine> headerLines) {
|
||||||
boolean hasSBBSannotation = false;
|
// Does the VCF header contain strand bias (SB) by sample annotation?
|
||||||
for ( final VCFHeaderLine line : headerLines) {
|
for ( final VCFHeaderLine line : headerLines) {
|
||||||
if ( line instanceof VCFFormatHeaderLine) {
|
if ( line instanceof VCFFormatHeaderLine) {
|
||||||
final VCFFormatHeaderLine formatline = (VCFFormatHeaderLine)line;
|
final VCFFormatHeaderLine formatline = (VCFFormatHeaderLine)line;
|
||||||
if ( formatline.getID().equals(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME) ) {
|
if ( formatline.getID().equals(VCFConstants.STRAND_BIAS_KEY) ) {
|
||||||
hasSBBSannotation = true;
|
logger.warn("StrandBiasBySample annotation exists in input VCF header. Attempting to use StrandBiasBySample " +
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hasSBBSannotation) {
|
|
||||||
logger.info("StrandBiasBySample annotation exists in input VCF header. Attempting to use StrandBiasBySample " +
|
|
||||||
"values to calculate strand bias annotation values. If no sample has the SB genotype annotation, annotation may still fail.");
|
"values to calculate strand bias annotation values. If no sample has the SB genotype annotation, annotation may still fail.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
boolean hasReads = toolkit.getReadsDataSource().getReaderIDs().size() > 0;
|
|
||||||
if (hasReads) {
|
|
||||||
logger.info("SAM/BAM data was found. Attempting to use read data to calculate strand bias annotations values.");
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info("No StrandBiasBySample annotation or read data was found. Strand bias annotations will not be output.");
|
// Are there reads from a SAM/BAM file?
|
||||||
|
if (toolkit.getReadsDataSource().getReaderIDs().isEmpty())
|
||||||
|
logger.warn("No StrandBiasBySample annotation or read data was found. Strand bias annotations will not be output.");
|
||||||
|
else
|
||||||
|
logger.info("SAM/BAM data was found. Attempting to use read data to calculate strand bias annotations values.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -115,35 +115,38 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
|
||||||
final Map<String,AlignmentContext> stratifiedContexts,
|
final Map<String,AlignmentContext> stratifiedContexts,
|
||||||
final VariantContext vc,
|
final VariantContext vc,
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
|
|
||||||
|
// do not process if not a variant
|
||||||
if ( !vc.isVariant() )
|
if ( !vc.isVariant() )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
// if the genotype and strand bias are provided, calculate the annotation from the Genotype (GT) field
|
||||||
if ( vc.hasGenotypes() ) {
|
if ( vc.hasGenotypes() ) {
|
||||||
boolean hasSB = false;
|
|
||||||
for (final Genotype g : vc.getGenotypes()) {
|
for (final Genotype g : vc.getGenotypes()) {
|
||||||
if (g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME)) {
|
if (g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME)) {
|
||||||
hasSB = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (hasSB)
|
|
||||||
return calculateAnnotationFromGTfield(vc.getGenotypes());
|
return calculateAnnotationFromGTfield(vc.getGenotypes());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//stratifiedContexts can come come from VariantAnnotator, but will be size 0 if no reads were provided
|
// if a the variant is a snp and has stratified contexts, calculate the annotation from the stratified contexts
|
||||||
if (vc.isSNP() && stratifiedContexts != null && stratifiedContexts.size() > 0) {
|
//stratifiedContexts can come come from VariantAnnotator, but will be empty if no reads were provided
|
||||||
|
if (vc.isSNP() && stratifiedContexts != null && !stratifiedContexts.isEmpty()) {
|
||||||
return calculateAnnotationFromStratifiedContexts(stratifiedContexts, vc);
|
return calculateAnnotationFromStratifiedContexts(stratifiedContexts, vc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// calculate the annotation from the stratified per read likelihood map
|
||||||
// stratifiedPerReadAllelelikelihoodMap can come from HaplotypeCaller call to VariantAnnotatorEngine
|
// stratifiedPerReadAllelelikelihoodMap can come from HaplotypeCaller call to VariantAnnotatorEngine
|
||||||
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
|
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
|
||||||
return calculateAnnotationFromLikelihoodMap(stratifiedPerReadAlleleLikelihoodMap, vc);
|
return calculateAnnotationFromLikelihoodMap(stratifiedPerReadAlleleLikelihoodMap, vc);
|
||||||
}
|
}
|
||||||
else
|
else {
|
||||||
// for non-snp variants, we need per-read likelihoods.
|
// for non-snp variants, we need per-read likelihoods.
|
||||||
// for snps, we can get same result from simple pileup
|
// for snps, we can get same result from simple pileup
|
||||||
|
// for indels that do not have a computed strand bias (SB) or strand bias by sample (SBBS)
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
protected abstract Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes);
|
protected abstract Map<String, Object> calculateAnnotationFromGTfield(final GenotypesContext genotypes);
|
||||||
|
|
||||||
|
|
@ -161,7 +164,13 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
|
||||||
* @return the table used for several strand bias tests, will be null if none of the genotypes contain the per-sample SB annotation
|
* @return the table used for several strand bias tests, will be null if none of the genotypes contain the per-sample SB annotation
|
||||||
*/
|
*/
|
||||||
protected int[][] getTableFromSamples( final GenotypesContext genotypes, final int minCount ) {
|
protected int[][] getTableFromSamples( final GenotypesContext genotypes, final int minCount ) {
|
||||||
if( genotypes == null ) { throw new IllegalArgumentException("Genotypes cannot be null."); }
|
if( genotypes == null ) {
|
||||||
|
if ( !getTableFromSamplesWarningLogged ) {
|
||||||
|
logger.warn("Genotypes cannot be null.");
|
||||||
|
getTableFromSamplesWarningLogged = true;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final int[] sbArray = {0,0,0,0}; // reference-forward-reverse -by- alternate-forward-reverse
|
final int[] sbArray = {0,0,0,0}; // reference-forward-reverse -by- alternate-forward-reverse
|
||||||
boolean foundData = false;
|
boolean foundData = false;
|
||||||
|
|
@ -195,10 +204,10 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
|
||||||
final List<Allele> allAlts,
|
final List<Allele> allAlts,
|
||||||
final int minQScoreToConsider,
|
final int minQScoreToConsider,
|
||||||
final int minCount ) {
|
final int minCount ) {
|
||||||
int[][] table = new int[2][2];
|
int[][] table = new int[ARRAY_DIM][ARRAY_DIM];
|
||||||
|
|
||||||
for (final Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
for (final Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
||||||
final int[] myTable = new int[4];
|
final int[] myTable = new int[ARRAY_SIZE];
|
||||||
for (final PileupElement p : sample.getValue().getBasePileup()) {
|
for (final PileupElement p : sample.getValue().getBasePileup()) {
|
||||||
|
|
||||||
if ( ! isUsableBase(p) ) // ignore deletions and bad MQ
|
if ( ! isUsableBase(p) ) // ignore deletions and bad MQ
|
||||||
|
|
@ -229,16 +238,28 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
|
||||||
public static int[][] getContingencyTable( final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
|
public static int[][] getContingencyTable( final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
|
||||||
final VariantContext vc,
|
final VariantContext vc,
|
||||||
final int minCount) {
|
final int minCount) {
|
||||||
if( stratifiedPerReadAlleleLikelihoodMap == null ) { throw new IllegalArgumentException("stratifiedPerReadAlleleLikelihoodMap cannot be null"); }
|
if( stratifiedPerReadAlleleLikelihoodMap == null ) {
|
||||||
if( vc == null ) { throw new IllegalArgumentException("input vc cannot be null"); }
|
if ( !stratifiedPerReadAlleleLikelihoodMapWarningLogged ) {
|
||||||
|
logger.warn("stratifiedPerReadAlleleLikelihoodMap cannot be null");
|
||||||
|
stratifiedPerReadAlleleLikelihoodMapWarningLogged = true;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if( vc == null ) {
|
||||||
|
if ( !inputVariantContextWarningLogged ) {
|
||||||
|
logger.warn("input vc cannot be null");
|
||||||
|
inputVariantContextWarningLogged = true;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final Allele ref = vc.getReference();
|
final Allele ref = vc.getReference();
|
||||||
final Allele alt = vc.getAltAlleleWithHighestAlleleCount();
|
final Allele alt = vc.getAltAlleleWithHighestAlleleCount();
|
||||||
final List<Allele> allAlts = vc.getAlternateAlleles();
|
final List<Allele> allAlts = vc.getAlternateAlleles();
|
||||||
final int[][] table = new int[2][2];
|
final int[][] table = new int[ARRAY_DIM][ARRAY_DIM];
|
||||||
|
|
||||||
for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
|
for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
|
||||||
final int[] myTable = new int[4];
|
final int[] myTable = new int[ARRAY_SIZE];
|
||||||
for (final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
|
for (final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
|
||||||
final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
|
final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
|
||||||
final GATKSAMRecord read = el.getKey();
|
final GATKSAMRecord read = el.getKey();
|
||||||
|
|
@ -286,7 +307,7 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
|
||||||
final boolean matchesAnyAlt = allAlts.contains(allele);
|
final boolean matchesAnyAlt = allAlts.contains(allele);
|
||||||
|
|
||||||
if ( matchesRef || matchesAnyAlt ) {
|
if ( matchesRef || matchesAnyAlt ) {
|
||||||
final int offset = matchesRef ? 0 : 2;
|
final int offset = matchesRef ? 0 : ARRAY_DIM;
|
||||||
|
|
||||||
if ( read.isStrandless() ) {
|
if ( read.isStrandless() ) {
|
||||||
// a strandless read counts as observations on both strand, at 50% weight, with a minimum of 1
|
// a strandless read counts as observations on both strand, at 50% weight, with a minimum of 1
|
||||||
|
|
@ -320,9 +341,9 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
|
||||||
* @return the array used by the per-sample Strand Bias annotation
|
* @return the array used by the per-sample Strand Bias annotation
|
||||||
*/
|
*/
|
||||||
private static int[] encodeSBBS( final String string ) {
|
private static int[] encodeSBBS( final String string ) {
|
||||||
final int[] array = new int[4];
|
final int[] array = new int[ARRAY_SIZE];
|
||||||
final StringTokenizer tokenizer = new StringTokenizer(string, ",", false);
|
final StringTokenizer tokenizer = new StringTokenizer(string, ",", false);
|
||||||
for( int index = 0; index < 4; index++ ) {
|
for( int index = 0; index < ARRAY_SIZE; index++ ) {
|
||||||
array[index] = Integer.parseInt(tokenizer.nextToken());
|
array[index] = Integer.parseInt(tokenizer.nextToken());
|
||||||
}
|
}
|
||||||
return array;
|
return array;
|
||||||
|
|
@ -334,8 +355,14 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
|
||||||
* @return the table used by the StrandOddsRatio annotation
|
* @return the table used by the StrandOddsRatio annotation
|
||||||
*/
|
*/
|
||||||
private static int[][] decodeSBBS( final int[] array ) {
|
private static int[][] decodeSBBS( final int[] array ) {
|
||||||
if(array.length != 4) { throw new IllegalArgumentException("Expecting a length = 4 strand bias array."); }
|
if(array.length != ARRAY_SIZE) {
|
||||||
final int[][] table = new int[2][2];
|
if ( !decodeSBBSWarningLogged ) {
|
||||||
|
logger.warn("Expecting a length = " + ARRAY_SIZE + " strand bias array.");
|
||||||
|
decodeSBBSWarningLogged = true;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
final int[][] table = new int[ARRAY_DIM][ARRAY_DIM];
|
||||||
table[0][0] = array[0];
|
table[0][0] = array[0];
|
||||||
table[0][1] = array[1];
|
table[0][1] = array[1];
|
||||||
table[1][0] = array[2];
|
table[1][0] = array[2];
|
||||||
|
|
|
||||||
|
|
@ -162,9 +162,9 @@ public class StrandOddsRatio extends StrandBiasTest implements StandardAnnotatio
|
||||||
* @return the augmented table
|
* @return the augmented table
|
||||||
*/
|
*/
|
||||||
private static double[][] augmentContingencyTable(final int[][] table) {
|
private static double[][] augmentContingencyTable(final int[][] table) {
|
||||||
double[][] augmentedTable = new double[2][2];
|
double[][] augmentedTable = new double[ARRAY_DIM][ARRAY_DIM];
|
||||||
for ( int i = 0; i < 2; i++ ) {
|
for ( int i = 0; i < ARRAY_DIM; i++ ) {
|
||||||
for ( int j = 0; j < 2; j++ )
|
for ( int j = 0; j < ARRAY_DIM; j++ )
|
||||||
augmentedTable[i][j] = table[i][j] + AUGMENTATION_CONSTANT;
|
augmentedTable[i][j] = table[i][j] + AUGMENTATION_CONSTANT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -51,12 +51,15 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
|
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller;
|
||||||
|
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||||
import org.broadinstitute.gatk.utils.collections.Pair;
|
import org.broadinstitute.gatk.utils.collections.Pair;
|
||||||
|
|
@ -65,10 +68,7 @@ import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||||
import htsjdk.variant.variantcontext.VariantContext;
|
import htsjdk.variant.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.*;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tandem repeat unit composition and counts per allele
|
* Tandem repeat unit composition and counts per allele
|
||||||
|
|
@ -84,15 +84,29 @@ import java.util.Map;
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class TandemRepeatAnnotator extends InfoFieldAnnotation implements StandardAnnotation {
|
public class TandemRepeatAnnotator extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
private final static Logger logger = Logger.getLogger(TandemRepeatAnnotator.class);
|
||||||
private static final String STR_PRESENT = "STR";
|
private static final String STR_PRESENT = "STR";
|
||||||
private static final String REPEAT_UNIT_KEY = "RU";
|
private static final String REPEAT_UNIT_KEY = "RU";
|
||||||
private static final String REPEATS_PER_ALLELE_KEY = "RPA";
|
private static final String REPEATS_PER_ALLELE_KEY = "RPA";
|
||||||
|
private boolean walkerIdentityCheckWarningLogged = false;
|
||||||
|
|
||||||
|
@Override
|
||||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
final AnnotatorCompatible walker,
|
final AnnotatorCompatible walker,
|
||||||
final ReferenceContext ref,
|
final ReferenceContext ref,
|
||||||
final Map<String, AlignmentContext> stratifiedContexts,
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
final VariantContext vc,
|
final VariantContext vc,
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) throws UserException {
|
||||||
|
|
||||||
|
// Can not be called from HaplotypeCaller
|
||||||
|
if ( walker instanceof HaplotypeCaller ) {
|
||||||
|
if ( !walkerIdentityCheckWarningLogged ) {
|
||||||
|
logger.warn("Annotation will not be calculated, can not be called from HaplotypeCaller");
|
||||||
|
walkerIdentityCheckWarningLogged = true;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
if ( !vc.isIndel())
|
if ( !vc.isIndel())
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
@ -117,10 +131,12 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
|
||||||
new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"),
|
new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"),
|
||||||
new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };
|
new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };
|
||||||
|
|
||||||
|
@Override
|
||||||
public List<String> getKeyNames() {
|
public List<String> getKeyNames() {
|
||||||
return Arrays.asList(keyNames);
|
return Arrays.asList(keyNames);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(descriptions); }
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(descriptions); }
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -63,7 +64,6 @@ import org.broadinstitute.gatk.utils.MathUtils;
|
||||||
import htsjdk.variant.vcf.VCFHeaderLineCount;
|
import htsjdk.variant.vcf.VCFHeaderLineCount;
|
||||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
|
||||||
import htsjdk.variant.variantcontext.VariantContext;
|
import htsjdk.variant.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -87,21 +87,41 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements RodRequiringAnnotation {
|
public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements RodRequiringAnnotation {
|
||||||
|
private final static Logger logger = Logger.getLogger(TransmissionDisequilibriumTest.class);
|
||||||
private Set<Sample> trios = null;
|
private Set<Sample> trios = null;
|
||||||
private final static int MIN_NUM_VALID_TRIOS = 5; // don't calculate this population-level statistic if there are less than X trios with full genotype likelihood information
|
private final static int MIN_NUM_VALID_TRIOS = 5; // don't calculate this population-level statistic if there are less than X trios with full genotype likelihood information
|
||||||
|
private boolean walkerIdentityCheckWarningLogged = false;
|
||||||
|
private boolean pedigreeCheckWarningLogged = false;
|
||||||
|
|
||||||
|
@Override
|
||||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
final AnnotatorCompatible walker,
|
final AnnotatorCompatible walker,
|
||||||
final ReferenceContext ref,
|
final ReferenceContext ref,
|
||||||
final Map<String, AlignmentContext> stratifiedContexts,
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
final VariantContext vc,
|
final VariantContext vc,
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap){
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap){
|
||||||
|
|
||||||
|
// Can only be called from VariantAnnotator
|
||||||
|
if ( !(walker instanceof VariantAnnotator) ) {
|
||||||
|
if ( !walkerIdentityCheckWarningLogged ) {
|
||||||
|
if ( walker != null )
|
||||||
|
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName());
|
||||||
|
else
|
||||||
|
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
|
||||||
|
walkerIdentityCheckWarningLogged = true;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get trios from the input pedigree file.
|
||||||
if ( trios == null ) {
|
if ( trios == null ) {
|
||||||
if ( walker instanceof VariantAnnotator ) {
|
|
||||||
trios = ((VariantAnnotator) walker).getSampleDB().getChildrenWithParents();
|
trios = ((VariantAnnotator) walker).getSampleDB().getChildrenWithParents();
|
||||||
} else {
|
if (trios == null || trios.isEmpty()) {
|
||||||
throw new UserException("Transmission disequilibrium test annotation can only be used from the Variant Annotator and requires a valid ped file be passed in.");
|
if ( !pedigreeCheckWarningLogged ) {
|
||||||
|
logger.warn("Transmission disequilibrium test annotation requires a valid ped file be passed in.");
|
||||||
|
pedigreeCheckWarningLogged = true;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -125,8 +145,10 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
|
||||||
}
|
}
|
||||||
|
|
||||||
// return the descriptions used for the VCF INFO meta field
|
// return the descriptions used for the VCF INFO meta field
|
||||||
|
@Override
|
||||||
public List<String> getKeyNames() { return Arrays.asList("TDT"); }
|
public List<String> getKeyNames() { return Arrays.asList("TDT"); }
|
||||||
|
|
||||||
|
@Override
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); }
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); }
|
||||||
|
|
||||||
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
|
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,6 @@ import htsjdk.tribble.readers.LineIterator;
|
||||||
import htsjdk.tribble.readers.PositionalBufferedStream;
|
import htsjdk.tribble.readers.PositionalBufferedStream;
|
||||||
import htsjdk.variant.variantcontext.Genotype;
|
import htsjdk.variant.variantcontext.Genotype;
|
||||||
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
|
||||||
import htsjdk.variant.variantcontext.VariantContext;
|
import htsjdk.variant.variantcontext.VariantContext;
|
||||||
import htsjdk.variant.vcf.VCFCodec;
|
import htsjdk.variant.vcf.VCFCodec;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
|
|
@ -95,7 +94,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
public void testHasAnnotsAsking1() {
|
public void testHasAnnotsAsking1() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("4f7ebd519451a776c1aa61493ff33943"));
|
Arrays.asList("92eb47332dd9d7ee7fbe3120dc39c594"));
|
||||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -103,7 +102,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
public void testHasAnnotsAsking2() {
|
public void testHasAnnotsAsking2() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||||
Arrays.asList("8cd16a59e4697beb1c6d75d0b82c8cf5"));
|
Arrays.asList("c367bf7cebd7b26305f8d4736788aec8"));
|
||||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -129,7 +128,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
public void testNoAnnotsAsking1() {
|
public void testNoAnnotsAsking1() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("a4df0258a61170c74c85b3cd516c8153"));
|
Arrays.asList("098dcad8d90d90391755a0191c9db59c"));
|
||||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -137,7 +136,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
public void testNoAnnotsAsking2() {
|
public void testNoAnnotsAsking2() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||||
Arrays.asList("1554af900d1caee1d85824ee85e54398"));
|
Arrays.asList("f3bbfbc179d2e1bae49890f1e9dfde34"));
|
||||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -145,7 +144,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
public void testExcludeAnnotations() {
|
public void testExcludeAnnotations() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("11935c8d5cc5a170d06f0b624b31079f"));
|
Arrays.asList("7267450fc4d002f75a24ca17278e0950"));
|
||||||
executeTest("test exclude annotations", spec);
|
executeTest("test exclude annotations", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -153,7 +152,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
public void testOverwritingHeader() {
|
public void testOverwritingHeader() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
|
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
|
||||||
Arrays.asList("06b4127795a67bd26156cc1651f3a98b"));
|
Arrays.asList("18592c72d83ee84e1326acb999518c38"));
|
||||||
executeTest("test overwriting header", spec);
|
executeTest("test overwriting header", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -271,7 +270,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
"--snpEffFile " + privateTestDir + "snpEff_unsupported_version_no_gatk_mode.vcf " +
|
"--snpEffFile " + privateTestDir + "snpEff_unsupported_version_no_gatk_mode.vcf " +
|
||||||
"-L 1:10001292-10012424",
|
"-L 1:10001292-10012424",
|
||||||
1,
|
1,
|
||||||
UserException.class
|
Arrays.asList("87cbf53c65ef4498b721f901f87f0161")
|
||||||
);
|
);
|
||||||
executeTest("Testing SnpEff annotations (unsupported version, no GATK mode)", spec);
|
executeTest("Testing SnpEff annotations (unsupported version, no GATK mode)", spec);
|
||||||
}
|
}
|
||||||
|
|
@ -309,10 +308,13 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testStrandBiasBySample() throws IOException {
|
public void testStrandBiasBySample() throws IOException {
|
||||||
|
// pipeline 1: create variant via HalotypeCaller with no default annotations
|
||||||
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, CEUTRIO_BAM) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800";
|
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, CEUTRIO_BAM) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800";
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(""));
|
final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList(""));
|
||||||
final File outputVCF = executeTest("testStrandBiasBySample", spec).getFirst().get(0);
|
final File outputVCF = executeTest("testStrandBiasBySample", spec).getFirst().get(0);
|
||||||
|
|
||||||
|
// pipeline 2: create variant via HalotypeCaller; include StrandBiasBySample, exclude FisherStrand annotation
|
||||||
|
// re-Annotate the variant with VariantAnnotator using FisherStrand annotation
|
||||||
final String baseNoFS = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, CEUTRIO_BAM) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -XA FisherStrand -A StrandBiasBySample";
|
final String baseNoFS = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, CEUTRIO_BAM) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -XA FisherStrand -A StrandBiasBySample";
|
||||||
final WalkerTestSpec specNoFS = new WalkerTestSpec(baseNoFS, 1, Arrays.asList(""));
|
final WalkerTestSpec specNoFS = new WalkerTestSpec(baseNoFS, 1, Arrays.asList(""));
|
||||||
specNoFS.disableShadowBCF();
|
specNoFS.disableShadowBCF();
|
||||||
|
|
|
||||||
|
|
@ -310,7 +310,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000 " +
|
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000 " +
|
||||||
"-A SnpEff",
|
"-A SnpEff",
|
||||||
1,
|
1,
|
||||||
UserException.class);
|
Arrays.asList("037ce3364668ee6527fba80c4f4bff95"));
|
||||||
executeTest("testSnpEffAnnotationRequestedWithoutRodBinding", spec);
|
executeTest("testSnpEffAnnotationRequestedWithoutRodBinding", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -69,7 +69,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSampleComplex1() {
|
public void testHaplotypeCallerMultiSampleComplex1() {
|
||||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "846af1842d2d42e43ce87583d227667d");
|
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "dc7906ed73dc071162c98e4bbe77df44");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||||
|
|
|
||||||
|
|
@ -367,7 +367,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
public void testLackSensitivityDueToBadHaplotypeSelectionFix() {
|
public void testLackSensitivityDueToBadHaplotypeSelectionFix() {
|
||||||
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16",
|
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16",
|
||||||
b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list");
|
b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list");
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("ae2d947d3ba3b139cc99efa877c4785c"));
|
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("9fa83f82ba63729edd8696e82bfeea49"));
|
||||||
spec.disableShadowBCF();
|
spec.disableShadowBCF();
|
||||||
executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);
|
executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,6 @@ import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.gatk.utils.Utils;
|
import org.broadinstitute.gatk.utils.Utils;
|
||||||
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
import org.broadinstitute.gatk.engine.GATKVCFUtils;
|
||||||
import htsjdk.variant.vcf.*;
|
import htsjdk.variant.vcf.*;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
|
||||||
import htsjdk.variant.variantcontext.VariantContext;
|
import htsjdk.variant.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -58,6 +57,8 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
||||||
|
|
||||||
private static Logger logger = Logger.getLogger(SnpEff.class);
|
private static Logger logger = Logger.getLogger(SnpEff.class);
|
||||||
|
|
||||||
|
private boolean canAnnotate = true;
|
||||||
|
|
||||||
// We refuse to parse SnpEff output files generated by unsupported versions, or
|
// We refuse to parse SnpEff output files generated by unsupported versions, or
|
||||||
// lacking a SnpEff version number in the VCF header:
|
// lacking a SnpEff version number in the VCF header:
|
||||||
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.5" };
|
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.5" };
|
||||||
|
|
@ -209,10 +210,15 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public void initialize ( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
|
public void initialize ( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
|
||||||
// Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
|
// Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
|
||||||
// without providing a SnpEff rod via --snpEffFile):
|
// without providing a SnpEff rod via --snpEffFile):
|
||||||
validateRodBinding(walker.getSnpEffRodBinding());
|
if ( !isValidRodBinding(walker.getSnpEffRodBinding()) ) {
|
||||||
|
canAnnotate = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
|
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
|
||||||
|
|
||||||
// Make sure that the SnpEff version number and command-line header lines are present in the VCF header of
|
// Make sure that the SnpEff version number and command-line header lines are present in the VCF header of
|
||||||
|
|
@ -221,21 +227,40 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
||||||
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
|
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
|
||||||
VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY);
|
VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY);
|
||||||
|
|
||||||
checkSnpEffVersionAndCommandLine(snpEffVersionLine, snpEffCommandLine);
|
if ( !isValidSnpEffVersionAndCommandLine(snpEffVersionLine, snpEffCommandLine) ) {
|
||||||
|
canAnnotate = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// If everything looks ok, add the SnpEff version number and command-line header lines to the
|
// If everything looks ok, add the SnpEff version number and command-line header lines to the
|
||||||
// header of the VCF output file, changing the key names so that our output file won't be
|
// header of the VCF output file, changing the key names so that our output file won't be
|
||||||
// mistaken in the future for a SnpEff output file:
|
// mistaken in the future for a SnpEff output file:
|
||||||
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
|
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
|
||||||
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
|
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
|
||||||
|
|
||||||
|
// Can only be called from VariantAnnotator
|
||||||
|
if ( !(walker instanceof VariantAnnotator) ) {
|
||||||
|
if ( walker != null )
|
||||||
|
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator, not " + walker.getClass().getName());
|
||||||
|
else
|
||||||
|
logger.warn("Annotation will not be calculated, must be called from VariantAnnotator");
|
||||||
|
canAnnotate = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
final AnnotatorCompatible walker,
|
final AnnotatorCompatible walker,
|
||||||
final ReferenceContext ref,
|
final ReferenceContext ref,
|
||||||
final Map<String, AlignmentContext> stratifiedContexts,
|
final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
final VariantContext vc,
|
final VariantContext vc,
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||||
|
|
||||||
|
// Can not annotate if failed initialization conditions
|
||||||
|
if ( !canAnnotate )
|
||||||
|
return null;
|
||||||
|
|
||||||
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
|
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
|
||||||
|
|
||||||
// Get only SnpEff records that start at this locus, not merely span it:
|
// Get only SnpEff records that start at this locus, not merely span it:
|
||||||
|
|
@ -251,7 +276,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
||||||
|
|
||||||
// Parse the SnpEff INFO field annotation from the matching record into individual effect objects:
|
// Parse the SnpEff INFO field annotation from the matching record into individual effect objects:
|
||||||
List<SnpEffEffect> effects = parseSnpEffRecord(matchingRecord);
|
List<SnpEffEffect> effects = parseSnpEffRecord(matchingRecord);
|
||||||
if ( effects.size() == 0 ) {
|
if ( effects.isEmpty() ) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -260,35 +285,42 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
||||||
return mostSignificantEffect.getAnnotations();
|
return mostSignificantEffect.getAnnotations();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void validateRodBinding ( RodBinding<VariantContext> snpEffRodBinding ) {
|
private boolean isValidRodBinding ( RodBinding<VariantContext> snpEffRodBinding ) {
|
||||||
if ( snpEffRodBinding == null || ! snpEffRodBinding.isBound() ) {
|
if ( snpEffRodBinding == null || ! snpEffRodBinding.isBound() ) {
|
||||||
throw new UserException("The SnpEff annotator requires that a SnpEff VCF output file be provided " +
|
logger.warn("The SnpEff annotator requires that a SnpEff VCF output file be provided " +
|
||||||
"as a rodbinding on the command line via the --snpEffFile option, but " +
|
"as a rodbinding on the command line via the --snpEffFile option, but " +
|
||||||
"no SnpEff rodbinding was found.");
|
"no SnpEff rodbinding was found.");
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkSnpEffVersionAndCommandLine( final VCFHeaderLine snpEffVersionLine, final VCFHeaderLine snpEffCommandLine ) {
|
private boolean isValidSnpEffVersionAndCommandLine( final VCFHeaderLine snpEffVersionLine, final VCFHeaderLine snpEffCommandLine ){
|
||||||
if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
|
if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
|
||||||
throw new UserException(String.format("Could not find a %s entry in the VCF header for the SnpEff input file, " +
|
logger.warn(String.format("Could not find a %s entry in the VCF header for the SnpEff input file, " +
|
||||||
"and so could not verify that the file was generated by a supported version of SnpEff (%s)",
|
"and so could not verify that the file was generated by a supported version of SnpEff (%s)",
|
||||||
SNPEFF_VCF_HEADER_VERSION_LINE_KEY, supportedSnpEffVersionsString()));
|
SNPEFF_VCF_HEADER_VERSION_LINE_KEY, supportedSnpEffVersionsString()));
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) {
|
if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) {
|
||||||
throw new UserException(String.format("Could not find a %s entry in the VCF header for the SnpEff input file, " +
|
logger.warn(String.format("Could not find a %s entry in the VCF header for the SnpEff input file, " +
|
||||||
"which should be added by all supported versions of SnpEff (%s)",
|
"which should be added by all supported versions of SnpEff (%s)",
|
||||||
SNPEFF_VCF_HEADER_COMMAND_LINE_KEY, supportedSnpEffVersionsString()));
|
SNPEFF_VCF_HEADER_COMMAND_LINE_KEY, supportedSnpEffVersionsString()));
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
String snpEffVersionString = snpEffVersionLine.getValue().replaceAll("\"", "").split(" ")[0];
|
String snpEffVersionString = snpEffVersionLine.getValue().replaceAll("\"", "").split(" ")[0];
|
||||||
|
|
||||||
if ( ! isSupportedSnpEffVersion(snpEffVersionString, snpEffCommandLine.getValue()) ) {
|
if ( ! isSupportedSnpEffVersion(snpEffVersionString, snpEffCommandLine.getValue()) ) {
|
||||||
throw new UserException(String.format("The version of SnpEff used to generate the SnpEff input file (%s) " +
|
logger.warn(String.format("The version of SnpEff used to generate the SnpEff input file (%s) " +
|
||||||
"is not currently supported by the GATK, and was not run in GATK " +
|
"is not currently supported by the GATK, and was not run in GATK " +
|
||||||
"compatibility mode. Supported versions are: %s",
|
"compatibility mode. Supported versions are: %s",
|
||||||
snpEffVersionString, supportedSnpEffVersionsString()));
|
snpEffVersionString, supportedSnpEffVersionsString()));
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isSupportedSnpEffVersion( final String versionString, final String commandLine ) {
|
private boolean isSupportedSnpEffVersion( final String versionString, final String commandLine ) {
|
||||||
|
|
@ -377,6 +409,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
||||||
return mostSignificantEffect;
|
return mostSignificantEffect;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public List<String> getKeyNames() {
|
public List<String> getKeyNames() {
|
||||||
return Arrays.asList( InfoFieldKey.EFFECT_KEY.getKeyName(),
|
return Arrays.asList( InfoFieldKey.EFFECT_KEY.getKeyName(),
|
||||||
InfoFieldKey.IMPACT_KEY.getKeyName(),
|
InfoFieldKey.IMPACT_KEY.getKeyName(),
|
||||||
|
|
@ -390,6 +423,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||||
return Arrays.asList(
|
return Arrays.asList(
|
||||||
new VCFInfoHeaderLine(InfoFieldKey.EFFECT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
|
new VCFInfoHeaderLine(InfoFieldKey.EFFECT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue