VariantRecalibrator's VQSR.vcf now contains NEG/POS labels
-- It's useful to know which sites have been used in the training of the model. The recal_file emitted by VR now contains VCF info field annotations labeling each site that was used in the positive or negative training models with POSITIVE_TRAINING_SITE and/or NEGATIVE_TRAINING_SITE -- Update MD5s, which all changed now that the recal file and the resulting applied vcfs all have these pos / neg labels
This commit is contained in:
parent
ee51195bf5
commit
564fe36d22
|
|
@ -200,6 +200,8 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
|
||||||
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
|
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
|
||||||
hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "Log odds ratio of being a true variant versus being false under the trained gaussian mixture model"));
|
hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "Log odds ratio of being a true variant versus being false under the trained gaussian mixture model"));
|
||||||
hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.CULPRIT_KEY, 1, VCFHeaderLineType.String, "The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out"));
|
hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.CULPRIT_KEY, 1, VCFHeaderLineType.String, "The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out"));
|
||||||
|
hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.POSITIVE_LABEL_KEY, 1, VCFHeaderLineType.Flag, "This variant was used to build the positive training set of good variants"));
|
||||||
|
hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.NEGATIVE_LABEL_KEY, 1, VCFHeaderLineType.Flag, "This variant was used to build the negative training set of bad variants"));
|
||||||
}
|
}
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
|
@ -243,6 +245,10 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
|
||||||
// Annotate the new record with its VQSLOD and the worst performing annotation
|
// Annotate the new record with its VQSLOD and the worst performing annotation
|
||||||
builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lod);
|
builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lod);
|
||||||
builder.attribute(VariantRecalibrator.CULPRIT_KEY, recalDatum.getAttribute(VariantRecalibrator.CULPRIT_KEY));
|
builder.attribute(VariantRecalibrator.CULPRIT_KEY, recalDatum.getAttribute(VariantRecalibrator.CULPRIT_KEY));
|
||||||
|
if ( recalDatum.hasAttribute(VariantRecalibrator.POSITIVE_LABEL_KEY))
|
||||||
|
builder.attribute(VariantRecalibrator.POSITIVE_LABEL_KEY, true);
|
||||||
|
if ( recalDatum.hasAttribute(VariantRecalibrator.NEGATIVE_LABEL_KEY))
|
||||||
|
builder.attribute(VariantRecalibrator.NEGATIVE_LABEL_KEY, true);
|
||||||
|
|
||||||
for( int i = tranches.size() - 1; i >= 0; i-- ) {
|
for( int i = tranches.size() - 1; i >= 0; i-- ) {
|
||||||
final Tranche tranche = tranches.get(i);
|
final Tranche tranche = tranches.get(i);
|
||||||
|
|
|
||||||
|
|
@ -335,19 +335,17 @@ public class VariantDataManager {
|
||||||
}} );
|
}} );
|
||||||
|
|
||||||
// create dummy alleles to be used
|
// create dummy alleles to be used
|
||||||
final List<Allele> alleles = new ArrayList<Allele>(2);
|
final List<Allele> alleles = Arrays.asList(Allele.create("N", true), Allele.create("<VQSR>", false));
|
||||||
alleles.add(Allele.create("N", true));
|
|
||||||
alleles.add(Allele.create("<VQSR>", false));
|
|
||||||
|
|
||||||
// to be used for the important INFO tags
|
|
||||||
final HashMap<String, Object> attributes = new HashMap<String, Object>(3);
|
|
||||||
|
|
||||||
for( final VariantDatum datum : data ) {
|
for( final VariantDatum datum : data ) {
|
||||||
attributes.put(VCFConstants.END_KEY, datum.loc.getStop());
|
VariantContextBuilder builder = new VariantContextBuilder("VQSR", datum.loc.getContig(), datum.loc.getStart(), datum.loc.getStop(), alleles);
|
||||||
attributes.put(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", datum.lod));
|
builder.attribute(VCFConstants.END_KEY, datum.loc.getStop());
|
||||||
attributes.put(VariantRecalibrator.CULPRIT_KEY, (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL"));
|
builder.attribute(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", datum.lod));
|
||||||
|
builder.attribute(VariantRecalibrator.CULPRIT_KEY, (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL"));
|
||||||
|
|
||||||
|
if ( datum.atTrainingSite ) builder.attribute(VariantRecalibrator.POSITIVE_LABEL_KEY, true);
|
||||||
|
if ( datum.atAntiTrainingSite ) builder.attribute(VariantRecalibrator.NEGATIVE_LABEL_KEY, true);
|
||||||
|
|
||||||
VariantContextBuilder builder = new VariantContextBuilder("VQSR", datum.loc.getContig(), datum.loc.getStart(), datum.loc.getStop(), alleles).attributes(attributes);
|
|
||||||
recalWriter.add(builder.make());
|
recalWriter.add(builder.make());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -135,6 +135,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
|
|
||||||
public static final String VQS_LOD_KEY = "VQSLOD"; // Log odds ratio of being a true variant versus being false under the trained gaussian mixture model
|
public static final String VQS_LOD_KEY = "VQSLOD"; // Log odds ratio of being a true variant versus being false under the trained gaussian mixture model
|
||||||
public static final String CULPRIT_KEY = "culprit"; // The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out
|
public static final String CULPRIT_KEY = "culprit"; // The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out
|
||||||
|
public static final String NEGATIVE_LABEL_KEY = "NEGATIVE_TRAIN_SITE"; // this variant was used in the negative training set
|
||||||
|
public static final String POSITIVE_LABEL_KEY = "POSITIVE_TRAIN_SITE"; // this variant was used in the positive traning set
|
||||||
private static final String PLOT_TRANCHES_RSCRIPT = "plot_Tranches.R";
|
private static final String PLOT_TRANCHES_RSCRIPT = "plot_Tranches.R";
|
||||||
|
|
||||||
@ArgumentCollection private VariantRecalibratorArgumentCollection VRAC = new VariantRecalibratorArgumentCollection();
|
@ArgumentCollection private VariantRecalibratorArgumentCollection VRAC = new VariantRecalibratorArgumentCollection();
|
||||||
|
|
|
||||||
|
|
@ -73,8 +73,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
VRTest lowPass = new VRTest(validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf",
|
VRTest lowPass = new VRTest(validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf",
|
||||||
"4d08c8eee61dd1bdea8c5765f34e41f0", // tranches
|
"4d08c8eee61dd1bdea8c5765f34e41f0", // tranches
|
||||||
"ce396fe4045e020b61471f6737dff36e", // recal file
|
"83756d1058ee3c816edf643148ae20df", // recal file
|
||||||
"4f59bd61be900b25c6ecedaa68b9c8de"); // cut VCF
|
"06353a59fa4857135b5a63ea0791b035"); // cut VCF
|
||||||
|
|
||||||
@DataProvider(name = "VRTest")
|
@DataProvider(name = "VRTest")
|
||||||
public Object[][] createData1() {
|
public Object[][] createData1() {
|
||||||
|
|
@ -122,8 +122,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf",
|
VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf",
|
||||||
"6a1eef4d02857dbb117a15420b5c0ce9", // tranches
|
"6a1eef4d02857dbb117a15420b5c0ce9", // tranches
|
||||||
"238366af66b05b6d21749e799c25353d", // recal file
|
"ea85f0293e9c016bd1bbe3c2977905d8", // recal file
|
||||||
"3928d6bc5007becf52312ade70f14c42"); // cut VCF
|
"4cab4a11130e2f84bd5fe4f9981811bd"); // cut VCF
|
||||||
|
|
||||||
@DataProvider(name = "VRBCFTest")
|
@DataProvider(name = "VRBCFTest")
|
||||||
public Object[][] createVRBCFTest() {
|
public Object[][] createVRBCFTest() {
|
||||||
|
|
@ -174,14 +174,14 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
VRTest indelUnfiltered = new VRTest(
|
VRTest indelUnfiltered = new VRTest(
|
||||||
validationDataLocation + "combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as .
|
validationDataLocation + "combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as .
|
||||||
"b7589cd098dc153ec64c02dcff2838e4", // tranches
|
"b7589cd098dc153ec64c02dcff2838e4", // tranches
|
||||||
"a04a9001f62eff43d363f4d63769f3ee", // recal file
|
"6091d44e5c750620c6d5493864eeb160", // recal file
|
||||||
"b2c6827be592c24a4692b1753edc7d23"); // cut VCF
|
"ef4c7931f134c1c860864772d69dd89c"); // cut VCF
|
||||||
|
|
||||||
VRTest indelFiltered = new VRTest(
|
VRTest indelFiltered = new VRTest(
|
||||||
validationDataLocation + "combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS
|
validationDataLocation + "combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS
|
||||||
"b7589cd098dc153ec64c02dcff2838e4", // tranches
|
"b7589cd098dc153ec64c02dcff2838e4", // tranches
|
||||||
"a04a9001f62eff43d363f4d63769f3ee", // recal file
|
"6091d44e5c750620c6d5493864eeb160", // recal file
|
||||||
"5d483fe1ba2ef36ee9e6c14cbd654706"); // cut VCF
|
"f8decee61f409b6041856c5a20e3865d"); // cut VCF
|
||||||
|
|
||||||
@DataProvider(name = "VRIndelTest")
|
@DataProvider(name = "VRIndelTest")
|
||||||
public Object[][] createTestVariantRecalibratorIndel() {
|
public Object[][] createTestVariantRecalibratorIndel() {
|
||||||
|
|
@ -239,7 +239,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
" -o %s" +
|
" -o %s" +
|
||||||
" -tranchesFile " + privateTestDir + "VQSR.mixedTest.tranches" +
|
" -tranchesFile " + privateTestDir + "VQSR.mixedTest.tranches" +
|
||||||
" -recalFile " + privateTestDir + "VQSR.mixedTest.recal",
|
" -recalFile " + privateTestDir + "VQSR.mixedTest.recal",
|
||||||
Arrays.asList("018b3a5cc7cf0cb5468c6a0c80ccaa8b"));
|
Arrays.asList("8d2e886523c050e0ea2952cbbde4cc26"));
|
||||||
executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
|
executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue