Added the RGQ format annotation to monomorphic sites in the VCF output of GenotypeGVCFs.
Now, instead of stripping out the GQs for mono sites, we transfer them to the RGQ. This is extremely useful for people who want to know how confident the hom ref genotype calls are. Perhaps this is just what CRSP needs for pertinent negatives. Note that I also changed the tool to no longer use the GenotypeSummaries annotation by default since it was adding some seemingly unnecessary annotations (like mean GQ now that we keep the GQ around and number of no-calls). Let me know if this was a mistake (although Laura gave me a thumbs up).
This commit is contained in:
parent
f5ec870964
commit
1ff9463285
|
|
@ -60,7 +60,6 @@ import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
|
|||
import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.genotyper.IndexedSampleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.SampleListUtils;
|
||||
|
|
@ -154,7 +153,7 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
*/
|
||||
@Advanced
|
||||
@Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to recompute. The single value 'none' removes the default annotations", required=false)
|
||||
protected List<String> annotationsToUse = new ArrayList<>(Arrays.asList(new String[]{"InbreedingCoeff", "FisherStrand", "QualByDepth", "ChromosomeCounts", "GenotypeSummaries", "StrandOddsRatio"}));
|
||||
protected List<String> annotationsToUse = new ArrayList<>(Arrays.asList(new String[]{"InbreedingCoeff", "FisherStrand", "QualByDepth", "ChromosomeCounts", "StrandOddsRatio"}));
|
||||
|
||||
/**
|
||||
* The rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate. Note that dbSNP is not used in any way for the calculations themselves.
|
||||
|
|
@ -215,6 +214,7 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
// add the pool values for each genotype
|
||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY));
|
||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
|
||||
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.REFERENCE_GENOTYPE_QUALITY));
|
||||
if ( dbsnp != null && dbsnp.dbsnp.isBound() )
|
||||
VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY);
|
||||
|
||||
|
|
@ -311,6 +311,7 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
* 2. propagate DP to AD if not present
|
||||
* 3. remove SB if present
|
||||
* 4. change the PGT value from "0|1" to "1|1" for homozygous variant genotypes
|
||||
* 5. move GQ to RGQ if the site is monomorphic
|
||||
*
|
||||
* @param VC the VariantContext with the Genotypes to fix
|
||||
* @param createRefGTs if true we will also create proper hom ref genotypes since we assume the site is monomorphic
|
||||
|
|
@ -332,6 +333,12 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
attrs.remove("MIN_DP");
|
||||
}
|
||||
|
||||
// move the GQ to RGQ
|
||||
if ( createRefGTs && oldGT.hasGQ() ) {
|
||||
builder.noGQ();
|
||||
attrs.put(GATKVCFConstants.REFERENCE_GENOTYPE_QUALITY, oldGT.getGQ());
|
||||
}
|
||||
|
||||
// remove SB
|
||||
attrs.remove("SB");
|
||||
|
||||
|
|
|
|||
|
|
@ -340,7 +340,7 @@ public class ReferenceConfidenceVariantContextMerger {
|
|||
: genotypeIndexMapsByPloidy[ploidy];
|
||||
final int[] PLs = generatePL(g, genotypeIndexMapByPloidy);
|
||||
final int[] AD = g.hasAD() ? generateAD(g.getAD(), perSampleIndexesOfRelevantAlleles) : null;
|
||||
genotypeBuilder.PL(PLs).AD(AD).noGQ();
|
||||
genotypeBuilder.PL(PLs).AD(AD);
|
||||
}
|
||||
mergedGenotypes.add(genotypeBuilder.make());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "tetraploid-gvcf-3.vcf" +
|
||||
" -L " + privateTestDir + "tetraploid-gvcfs.intervals",
|
||||
1,
|
||||
Arrays.asList("20f55be01d01bed48bf66f354fa72e5b"));
|
||||
Arrays.asList("7b3153135e4f8e1d137d3f4beb46f182"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -112,7 +112,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "diploid-gvcf-3.vcf" +
|
||||
" -L " + privateTestDir + "tetraploid-gvcfs.intervals",
|
||||
1,
|
||||
Arrays.asList("c8bf3da5eb641d0082bdd5f12ea58e1e"));
|
||||
Arrays.asList("4f546634213ece6f08ec9258620b92bb"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -190,7 +190,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testMD5s() throws Exception {
|
||||
final String cmd = baseTestString(" -L 1:69485-69791");
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("1df56fdfc71729cc82ba5dbfc75a72c4"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("b7c753452ab0c05f9cee538e420b87fa"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testMD5s", spec);
|
||||
}
|
||||
|
|
@ -198,7 +198,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testBasepairResolutionOutput() throws Exception {
|
||||
final String cmd = baseTestString(" -L 1:69485-69791 --convertToBasePairResolution");
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("9c8fc4d9e330fbe41a00b7f71a784f4e"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("bb6420ead95da4c72e76ca4bf5860ef0"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testBasepairResolutionOutput", spec);
|
||||
}
|
||||
|
|
@ -206,7 +206,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testBreakBlocks() throws Exception {
|
||||
final String cmd = baseTestString(" -L 1:69485-69791 --breakBandsAtMultiplesOf 5");
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("3a8e53b8b590eaa2675149ceccb80a7a"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("dd31182124c4b78a8a03edb1e0cf618b"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testBreakBlocks", spec);
|
||||
}
|
||||
|
|
@ -215,7 +215,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
public void testWrongReferenceBaseBugFix() throws Exception {
|
||||
final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -V " + (privateTestDir + "combine-gvcf-wrong-ref-input1.vcf"
|
||||
+ " -V " + (privateTestDir + "combine-gvcf-wrong-ref-input2.vcf") + " -o %s --no_cmdline_in_header");
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("04fa9d6084e4b3a7ea2ebe5675561e2f"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("75d81c247bef5e394b4a2d4126aee2b3"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testWrongReferenceBaseBugFix",spec);
|
||||
|
||||
|
|
@ -224,7 +224,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testBasepairResolutionInput() throws Exception {
|
||||
final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -V " + privateTestDir + "gvcf.basepairResolution.vcf";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("d3b3f0c8d8f8ef09475306a7814ddd38"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("994ff5c219c683635eadc1624cbbda74"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testBasepairResolutionInput", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -V " + privateTestDir + "testUpdatePGT.vcf", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("6483df1dfa3a5290ba2dc10cc8e15370"));
|
||||
Arrays.asList("91d0df8d9d57e573c32064b83510eb3e"));
|
||||
executeTest("testUpdatePGT", spec);
|
||||
}
|
||||
|
||||
|
|
@ -91,7 +91,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -V " + privateTestDir + "testUpdatePGT.vcf -A StrandAlleleCountsBySample", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("4cbc86b7a44cf347eb543558de7b2ee6"));
|
||||
Arrays.asList("88fa4a021e4aac9a0e48bd54b2949ece"));
|
||||
executeTest("testUpdatePGT, adding StrandAlleleCountsBySample annotation", spec);
|
||||
}
|
||||
|
||||
|
|
@ -103,7 +103,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-20,000,000", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("d873327b474fa341cee7823a35efda89"));
|
||||
Arrays.asList("56aef2d8c7e051cd8fa351474cd5ec10"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -115,7 +115,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "tetraploid-gvcf-3.vcf" +
|
||||
" -L " + privateTestDir + "tetraploid-gvcfs.intervals", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("f5b3c4b4b45f7d3bc4a38ff5ac7076f0"));
|
||||
Arrays.asList("599394c205c1d6641b9bebabbd29e13c"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -127,7 +127,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "diploid-gvcf-3.vcf" +
|
||||
" -L " + privateTestDir + "tetraploid-gvcfs.intervals", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("1f4632023ac646d7d04f65d797109f91"));
|
||||
Arrays.asList("f7d5344a85e6d7fc2437d4253b424cb0"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -139,7 +139,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" --includeNonVariantSites -L 20:10,030,000-10,033,000 -L 20:10,386,000-10,386,500", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("70376c02babc75c15a1e9a6be47e34fa"));
|
||||
Arrays.asList("c9e4d1e52ee1f3a5233f1fb100f24d5e"));
|
||||
executeTest("combineSingleSamplePipelineGVCF_includeNonVariants", spec);
|
||||
}
|
||||
|
||||
|
|
@ -152,7 +152,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-20,000,000", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("8c814998059fda80cf5a18242db13c19"));
|
||||
Arrays.asList("0fc91d30c7de29c3507c8b9f9902c4f0"));
|
||||
executeTest("combineSingleSamplePipelineGVCFHierarchical", spec);
|
||||
}
|
||||
|
||||
|
|
@ -164,7 +164,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference),
|
||||
1,
|
||||
Arrays.asList("114a75003083cbe1a9966cc489d441af"));
|
||||
Arrays.asList("f23c9d62542a69b5cbf0e9f89fdd235d"));
|
||||
executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec);
|
||||
}
|
||||
|
||||
|
|
@ -174,7 +174,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:69485-69791 -o %s -R " + b37KGReference +
|
||||
" -V " + privateTestDir + "gvcfExample1.vcf",
|
||||
1,
|
||||
Arrays.asList("364043ee77d4c6dfe1403a90b4938a65"));
|
||||
Arrays.asList("97da49ef380f4b290095d6d2ac760393"));
|
||||
executeTest("testJustOneSample", spec);
|
||||
}
|
||||
|
||||
|
|
@ -185,23 +185,24 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V " + privateTestDir + "gvcfExample1.vcf" +
|
||||
" -V " + privateTestDir + "gvcfExample2.vcf",
|
||||
1,
|
||||
Arrays.asList("3fc58414196213bc3a85237b055b7883"));
|
||||
Arrays.asList("654c4220c579e66e8b5e681267a3d795"));
|
||||
executeTest("testSamplesWithDifferentLs", spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testNoPLsException() {
|
||||
// Test with input files with (1) 0/0 and (2) ./.
|
||||
final String md5 = "d04b32cf2fa97d303ff7fdc779a653d4";
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:1115550-1115551 -o %s -R " + hg19Reference +
|
||||
" --variant " + privateTestDir + "combined_genotype_gvcf_exception.vcf",
|
||||
1,
|
||||
Arrays.asList("08e4b839dede1b91ce6bd89c66ff063c"));
|
||||
Arrays.asList(md5));
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:1115550-1115551 -o %s -R " + hg19Reference +
|
||||
" --variant " + privateTestDir + "combined_genotype_gvcf_exception.nocall.vcf",
|
||||
1,
|
||||
Arrays.asList("08e4b839dede1b91ce6bd89c66ff063c"));
|
||||
Arrays.asList(md5));
|
||||
executeTest("testNoPLsException.1", spec1);
|
||||
executeTest("testNoPLsException.2", spec2);
|
||||
}
|
||||
|
|
@ -211,7 +212,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseBPResolutionString("-nda"),
|
||||
1,
|
||||
Arrays.asList("6bfc0d58eed9fc98642f36a09b1a235d"));
|
||||
Arrays.asList("48636a1aa2cc38d5c22a28b68106433a"));
|
||||
executeTest("testNDA", spec);
|
||||
}
|
||||
|
||||
|
|
@ -220,7 +221,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseBPResolutionString("-maxAltAlleles 1"),
|
||||
1,
|
||||
Arrays.asList("1e238c736e3f43e3693327a89455faaa"));
|
||||
Arrays.asList("2bd7c520fb94f7f776973ad40b592a1a"));
|
||||
executeTest("testMaxAltAlleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -229,7 +230,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseBPResolutionString("-stand_call_conf 300 -stand_emit_conf 100"),
|
||||
1,
|
||||
Arrays.asList("9c192402a005216649ff44a36cc7c45c"));
|
||||
Arrays.asList("aa483875c5d36fd0115bb7e88f2ae5a7"));
|
||||
executeTest("testStandardConf", spec);
|
||||
}
|
||||
|
||||
|
|
@ -273,7 +274,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:combined2 " + privateTestDir + "combine.single.sample.pipeline.combined.vcf" +
|
||||
" --uniquifySamples", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("ef6a96d57434bb63935fa7d9f012da9f"));
|
||||
Arrays.asList("c285b05d52b2593ac22c2a123c7cc028"));
|
||||
executeTest("testUniquifiedSamples", spec);
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -109,6 +109,7 @@ public final class GATKVCFConstants {
|
|||
public static final String HAPLOTYPE_CALLER_PHASING_GT_KEY = "PGT";
|
||||
public static final String HAPLOTYPE_CALLER_PHASING_ID_KEY = "PID";
|
||||
public static final String PHRED_SCALED_POSTERIORS_KEY = "PP"; //FamilyLikelihoodsUtils / PosteriorLikelihoodsUtils
|
||||
public static final String REFERENCE_GENOTYPE_QUALITY = "RGQ";
|
||||
public static final String STRAND_COUNT_BY_SAMPLE_KEY = "SAC";
|
||||
public static final String STRAND_BIAS_BY_SAMPLE_KEY = "SB";
|
||||
public final static String TRANSMISSION_PROBABILITY_KEY = "TP"; //PhaseByTransmission
|
||||
|
|
|
|||
|
|
@ -78,6 +78,7 @@ public class GATKVCFHeaderLines {
|
|||
addFormatLine(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_ID_KEY, 1, VCFHeaderLineType.String, "Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group"));
|
||||
addFormatLine(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_GT_KEY, 1, VCFHeaderLineType.String, "Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another"));
|
||||
addFormatLine(new VCFFormatHeaderLine(MIN_DP_FORMAT_KEY, 1, VCFHeaderLineType.Integer, "Minimum DP observed within the GVCF block"));
|
||||
addFormatLine(new VCFFormatHeaderLine(REFERENCE_GENOTYPE_QUALITY, 1, VCFHeaderLineType.Integer, "Unconditional reference genotype confidence, encoded as a phred quality -10*log10 p(genotype call is wrong)"));
|
||||
addFormatLine(new VCFFormatHeaderLine(TRANSMISSION_PROBABILITY_KEY, 1, VCFHeaderLineType.Integer, "Phred score of the genotype combination and phase given that the genotypes are correct"));
|
||||
addFormatLine(new VCFFormatHeaderLine(RBP_HAPLOTYPE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Read-backed phasing haplotype identifiers"));
|
||||
addFormatLine(new VCFFormatHeaderLine(AVG_INTERVAL_DP_BY_SAMPLE_KEY, 1, VCFHeaderLineType.Float, "Average sample depth across the interval. Sum of the sample specific depth in all loci divided by interval size."));
|
||||
|
|
|
|||
Loading…
Reference in New Issue