diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVCFConstants.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVCFConstants.java index 95cf3e593..8b093d4d5 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVCFConstants.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVCFConstants.java @@ -51,10 +51,14 @@ public final class GATKVCFConstants { public static final String SPANNING_DELETIONS_KEY = "Dels"; public static final String ORIGINAL_DP_KEY = "DP_Orig"; //SelectVariants public static final String DOWNSAMPLED_KEY = "DS"; + public static final String EVENT_COUNT_IN_HAPLOTYPE_KEY = "ECNT"; //M2 + public static final String EVENT_DISTANCE_MAX_KEY = "MAX_ED"; //M2 + public static final String EVENT_DISTANCE_MIN_KEY = "MIN_ED"; //M2 public static final String FISHER_STRAND_KEY = "FS"; public static final String GC_CONTENT_KEY = "GC"; public static final String GQ_MEAN_KEY = "GQ_MEAN"; public static final String GQ_STDEV_KEY = "GQ_STDDEV"; + public static final String HAPLOTYPE_COUNT_KEY = "HCNT"; //M2 public static final String HAPLOTYPE_SCORE_KEY = "HaplotypeScore"; public static final String HI_CONF_DENOVO_KEY = "hiConfDeNovo"; public static final String HOMOPOLYMER_RUN_KEY = "HRun"; @@ -80,8 +84,10 @@ public final class GATKVCFConstants { public static final String ORIGINAL_CONTIG_KEY = "OriginalChr"; //LiftoverVariants public static final String ORIGINAL_START_KEY = "OriginalStart"; //LiftoverVariants public static final String N_BASE_COUNT_KEY = "PercentNBase"; + public static final String NORMAL_LOD_KEY = "NLOD"; //M2 public static final String RBP_INCONSISTENT_KEY = "PhasingInconsistent"; //ReadBackedPhasing public static final String GENOTYPE_PRIOR_KEY = "PG"; + public static final String PANEL_OF_NORMALS_COUNT_KEY = "PON"; //M2 public static final String POSITIVE_LABEL_KEY = "POSITIVE_TRAIN_SITE"; public static final String QUAL_BY_DEPTH_KEY = "QD"; public static final String BEAGLE_R2_KEY = "R2"; //BeagleOutputToVCF @@ -93,11 +99,13 @@ public final class GATKVCFConstants { public static final String STRAND_ODDS_RATIO_KEY = "SOR"; public static final String STR_PRESENT_KEY = "STR"; public static final String TRANSMISSION_DISEQUILIBRIUM_KEY = "TDT"; + public static final String TUMOR_LOD_KEY = "TLOD"; //M2 public static final String VARIANT_TYPE_KEY = "VariantType"; public static final String VQS_LOD_KEY = "VQSLOD"; //FORMAT keys public static final String ALLELE_BALANCE_KEY = "AB"; + public static final String ALLELE_FRACTION_KEY = "AF"; //M2 public static final String PL_FOR_ALL_SNP_ALLELES_KEY = "APL"; public static final String RBP_HAPLOTYPE_KEY = "HP"; //ReadBackedPhasing public static final String AVG_INTERVAL_DP_BY_SAMPLE_KEY = "IDP"; //DiagnoseTargets @@ -110,6 +118,7 @@ public final class GATKVCFConstants { public static final String HAPLOTYPE_CALLER_PHASING_GT_KEY = "PGT"; public static final String HAPLOTYPE_CALLER_PHASING_ID_KEY = "PID"; public static final String PHRED_SCALED_POSTERIORS_KEY = "PP"; //FamilyLikelihoodsUtils / PosteriorLikelihoodsUtils + public static final String QUALITY_SCORE_SUM_KEY = "QSS"; //M2 public static final String REFERENCE_GENOTYPE_QUALITY = "RGQ"; public static final String STRAND_COUNT_BY_SAMPLE_KEY = "SAC"; public static final String STRAND_BIAS_BY_SAMPLE_KEY = "SB"; @@ -120,8 +129,16 @@ public final class GATKVCFConstants { /* Note that many filters used throughout GATK (most notably in VariantRecalibration) are dynamic, their names (or descriptions) depend on some threshold. Those filters are not included here */ - public static final String BEAGLE_MONO_FILTER_NAME = "BGL_SET_TO_MONOMORPHIC"; - public static final String LOW_QUAL_FILTER_NAME = "LowQual"; + public static final String ALT_ALLELE_IN_NORMAL_FILTER_NAME = "alt_allele_in_normal"; //M2 + public static final String BEAGLE_MONO_FILTER_NAME = "BGL_SET_TO_MONOMORPHIC"; + public static final String CLUSTERED_EVENTS_FILTER_NAME = "clustered_events"; //M2 + public static final String GERMLINE_RISK_FILTER_NAME = "germline_risk"; //M2 + public static final String HOMOLOGOUS_MAPPING_EVENT_FILTER_NAME = "homologous_mapping_event"; //M2 + public static final String LOW_QUAL_FILTER_NAME = "LowQual"; + public static final String MULTI_EVENT_ALT_ALLELE_IN_NORMAL_FILTER_NAME = "multi_event_alt_allele_in_normal"; //M2 + public static final String PON_FILTER_NAME = "panel_of_normals"; //M2 + public static final String STR_CONTRACTION_FILTER_NAME = "str_contraction"; //M2 + public static final String TUMOR_LOD_FILTER_NAME = "t_lod_fstar"; //M2 // Symbolic alleles public final static String SYMBOLIC_ALLELE_DEFINITION_HEADER_TAG = "ALT"; diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVCFHeaderLines.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVCFHeaderLines.java index 89b9510d2..6dc7a2122 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVCFHeaderLines.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVCFHeaderLines.java @@ -25,16 +25,11 @@ package org.broadinstitute.gatk.utils.variant; -import htsjdk.variant.vcf.VCFFilterHeaderLine; -import htsjdk.variant.vcf.VCFFormatHeaderLine; -import htsjdk.variant.vcf.VCFHeaderLineCount; -import htsjdk.variant.vcf.VCFHeaderLineType; -import htsjdk.variant.vcf.VCFInfoHeaderLine; +import htsjdk.variant.vcf.*; import static org.broadinstitute.gatk.utils.variant.GATKVCFConstants.*; -import java.util.HashMap; -import java.util.Map; +import java.util.*; /** * This class contains the VCFHeaderLine definitions for the annotation keys in GATKVCFConstants. @@ -66,6 +61,16 @@ public class GATKVCFHeaderLines { addFilterLine(new VCFFilterHeaderLine(LOW_QUAL_FILTER_NAME, "Low quality")); addFilterLine(new VCFFilterHeaderLine(BEAGLE_MONO_FILTER_NAME, "This site was set to monomorphic by Beagle")); + // M2-related filters + addFilterLine(new VCFFilterHeaderLine(GATKVCFConstants.ALT_ALLELE_IN_NORMAL_FILTER_NAME, "Evidence seen in the normal sample")); + addFilterLine(new VCFFilterHeaderLine(GATKVCFConstants.CLUSTERED_EVENTS_FILTER_NAME, "Clustered events observed in the tumor ")); + addFilterLine(new VCFFilterHeaderLine(GATKVCFConstants.GERMLINE_RISK_FILTER_NAME, "Evidence indicates this site is germline, not somatic")); + addFilterLine(new VCFFilterHeaderLine(GATKVCFConstants.HOMOLOGOUS_MAPPING_EVENT_FILTER_NAME, "More than three events were observed in the tumor")); + addFilterLine(new VCFFilterHeaderLine(GATKVCFConstants.MULTI_EVENT_ALT_ALLELE_IN_NORMAL_FILTER_NAME, "Multiple events observed in tumor and normal")); + addFilterLine(new VCFFilterHeaderLine(GATKVCFConstants.PON_FILTER_NAME, "Seen in at least 2 samples in the panel of normals")); + addFilterLine(new VCFFilterHeaderLine(GATKVCFConstants.TUMOR_LOD_FILTER_NAME, "Tumor does not meet likelihood threshold")); + addFilterLine(new VCFFilterHeaderLine(GATKVCFConstants.STR_CONTRACTION_FILTER_NAME, "Site filtered due to contraction of short repeat region")); + addFormatLine(new VCFFormatHeaderLine(ALLELE_BALANCE_KEY, 1, VCFHeaderLineType.Float, "Allele balance for each het genotype")); addFormatLine(new VCFFormatHeaderLine(MAPPING_QUALITY_ZERO_BY_SAMPLE_KEY, 1, VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); addFormatLine(new VCFFormatHeaderLine(MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample")); @@ -89,6 +94,10 @@ public class GATKVCFHeaderLines { addFormatLine(new VCFFormatHeaderLine(JOINT_POSTERIOR_TAG_NAME, 1, VCFHeaderLineType.Integer, "Phred-scaled joint posterior probability of the genotype combination (after applying family priors)")); addFormatLine(new VCFFormatHeaderLine(ORIGINAL_GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Original Genotype input to Beagle")); + // M2-related info lines + addFormatLine(new VCFFormatHeaderLine(GATKVCFConstants.ALLELE_FRACTION_KEY, 1, VCFHeaderLineType.Float, "Allele fraction of the event in the tumor")); + + addInfoLine(new VCFInfoHeaderLine(MLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed")); addInfoLine(new VCFInfoHeaderLine(MLE_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed")); addInfoLine(new VCFInfoHeaderLine(DOWNSAMPLED_KEY, 0, VCFHeaderLineType.Flag, "Were any of the samples downsampled?")); @@ -147,5 +156,15 @@ public class GATKVCFHeaderLines { addInfoLine(new VCFInfoHeaderLine(BEAGLE_AC_COMP_KEY, 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site")); addInfoLine(new VCFInfoHeaderLine(BEAGLE_AF_COMP_KEY, 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site")); addInfoLine(new VCFInfoHeaderLine(BEAGLE_AN_COMP_KEY, 1, VCFHeaderLineType.Float, "Allele Number from Comparison ROD at this site")); + + // M2-related info lines + addInfoLine(new VCFInfoHeaderLine(GATKVCFConstants.EVENT_COUNT_IN_HAPLOTYPE_KEY, 1, VCFHeaderLineType.String, "Number of events in this haplotype")); + addInfoLine(new VCFInfoHeaderLine(GATKVCFConstants.EVENT_DISTANCE_MAX_KEY, 1, VCFHeaderLineType.Integer, "Maximum distance between events in this active region")); + addInfoLine(new VCFInfoHeaderLine(GATKVCFConstants.EVENT_DISTANCE_MIN_KEY, 1, VCFHeaderLineType.Integer, "Minimum distance between events in this active region")); + addInfoLine(new VCFInfoHeaderLine(GATKVCFConstants.HAPLOTYPE_COUNT_KEY, 1, VCFHeaderLineType.String, "Number of haplotypes that support this variant")); + addInfoLine(new VCFInfoHeaderLine(GATKVCFConstants.NORMAL_LOD_KEY, 1, VCFHeaderLineType.String, "Normal LOD score")); + addInfoLine(new VCFInfoHeaderLine(GATKVCFConstants.PANEL_OF_NORMALS_COUNT_KEY, 1, VCFHeaderLineType.String, "Count from Panel of Normals")); + addInfoLine(new VCFInfoHeaderLine(GATKVCFConstants.TUMOR_LOD_KEY, 1, VCFHeaderLineType.String, "Tumor LOD score")); + } }