add annotation mismatch warning and refactor tests
This commit is contained in:
parent
f5d133df87
commit
68bdb93c8c
|
|
@ -279,11 +279,11 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
* model Gaussians can be subset by the value in the "Gaussian" column if desired.
|
* model Gaussians can be subset by the value in the "Gaussian" column if desired.
|
||||||
*/
|
*/
|
||||||
@Argument(fullName="output_model", shortName = "outputModel", doc="If specified, the variant recalibrator will output the VQSR model fit to the file specified by -modelFile or to stdout", required=false)
|
@Argument(fullName="output_model", shortName = "outputModel", doc="If specified, the variant recalibrator will output the VQSR model fit to the file specified by -modelFile or to stdout", required=false)
|
||||||
private boolean outputModel = false;
|
private String outputModel = null;
|
||||||
@Argument(fullName="input_model", shortName = "inputModel", doc="If specified, the variant recalibrator will read the VQSR model from this file path.", required=false)
|
@Argument(fullName="input_model", shortName = "inputModel", doc="If specified, the variant recalibrator will read the VQSR model from this file path.", required=false)
|
||||||
private String inputModel = "";
|
private String inputModel = "";
|
||||||
@Output(fullName="model_file", shortName = "modelFile", doc="A GATKReport containing the positive and negative model fits", required=false)
|
//@Output(fullName="model_file", shortName = "modelFile", doc="A GATKReport containing the positive and negative model fits", required=false)
|
||||||
protected PrintStream modelReport = null;
|
//protected PrintStream modelReport = null;
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName="replicate", shortName="replicate", doc="Used to debug the random number generation inside the VQSR. Do not use.", required=false)
|
@Argument(fullName="replicate", shortName="replicate", doc="Used to debug the random number generation inside the VQSR. Do not use.", required=false)
|
||||||
|
|
@ -370,7 +370,11 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
final GATKReportTable pmcTable = reportIn.getTable("PositiveModelCovariances");
|
final GATKReportTable pmcTable = reportIn.getTable("PositiveModelCovariances");
|
||||||
final GATKReportTable pmmTable = reportIn.getTable("PositiveModelMeans");
|
final GATKReportTable pmmTable = reportIn.getTable("PositiveModelMeans");
|
||||||
final GATKReportTable pPMixTable = reportIn.getTable("GoodGaussianPMix");
|
final GATKReportTable pPMixTable = reportIn.getTable("GoodGaussianPMix");
|
||||||
final int numAnnotations = dataManager.getMeanVector().length;
|
final int numAnnotations = dataManager.annotationKeys.size();
|
||||||
|
|
||||||
|
if( numAnnotations != pmmTable.getNumColumns()-1 || numAnnotations != nmmTable.getNumColumns()-1 ) { // -1 because the first column is the gaussian number.
|
||||||
|
throw new UserException.CommandLineException( "Annotations specified on the command line do not match annotations in the model report." );
|
||||||
|
}
|
||||||
|
|
||||||
goodModel = GMMFromTables(pmmTable, pmcTable, pPMixTable, numAnnotations);
|
goodModel = GMMFromTables(pmmTable, pmcTable, pPMixTable, numAnnotations);
|
||||||
badModel = GMMFromTables(nmmTable, nmcTable, nPMixTable, numAnnotations);
|
badModel = GMMFromTables(nmmTable, nmcTable, nPMixTable, numAnnotations);
|
||||||
|
|
@ -534,9 +538,13 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
dataManager.dropAggregateData(); // Don't need the aggregate data anymore so let's free up the memory
|
dataManager.dropAggregateData(); // Don't need the aggregate data anymore so let's free up the memory
|
||||||
engine.evaluateData(dataManager.getData(), badModel, true);
|
engine.evaluateData(dataManager.getData(), badModel, true);
|
||||||
|
|
||||||
if (outputModel) {
|
if (outputModel != null) {
|
||||||
GATKReport report = writeModelReport(goodModel, badModel, USE_ANNOTATIONS);
|
try (PrintStream modelReporter = new PrintStream(outputModel)) {
|
||||||
report.print(modelReport);
|
GATKReport report = writeModelReport(goodModel, badModel, USE_ANNOTATIONS);
|
||||||
|
report.print(modelReporter);
|
||||||
|
} catch (FileNotFoundException e){
|
||||||
|
throw new UserException("Could not open output model file:" + outputModel);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
engine.calculateWorstPerformingAnnotation(dataManager.getData(), goodModel, badModel);
|
engine.calculateWorstPerformingAnnotation(dataManager.getData(), goodModel, badModel);
|
||||||
|
|
|
||||||
|
|
@ -68,17 +68,14 @@ import java.util.Random;
|
||||||
public class VariantRecalibratorModelOutputUnitTest {
|
public class VariantRecalibratorModelOutputUnitTest {
|
||||||
protected final static Logger logger = Logger.getLogger(VariantRecalibratorModelOutputUnitTest.class);
|
protected final static Logger logger = Logger.getLogger(VariantRecalibratorModelOutputUnitTest.class);
|
||||||
private final boolean printTables = true;
|
private final boolean printTables = true;
|
||||||
|
private final int numAnnotations = 6;
|
||||||
|
private final double shrinkage = 1.0;
|
||||||
|
private final double dirichlet = 0.001;
|
||||||
|
private final double priorCounts = 20.0;
|
||||||
|
private final double epsilon = 1e-6;
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testVQSRModelOutput() {
|
public void testVQSRModelOutput() {
|
||||||
final int numAnnotations = 6;
|
|
||||||
final double shrinkage = 1.0;
|
|
||||||
final double dirichlet = 0.001;
|
|
||||||
final double priorCounts = 20.0;
|
|
||||||
final int numGoodGaussians = 2;
|
|
||||||
final int numBadGaussians = 1;
|
|
||||||
final double epsilon = 1e-6;
|
|
||||||
|
|
||||||
Random rand = new Random(12878);
|
Random rand = new Random(12878);
|
||||||
MultivariateGaussian goodGaussian1 = new MultivariateGaussian(numAnnotations);
|
MultivariateGaussian goodGaussian1 = new MultivariateGaussian(numAnnotations);
|
||||||
goodGaussian1.initializeRandomMu(rand);
|
goodGaussian1.initializeRandomMu(rand);
|
||||||
|
|
@ -170,10 +167,53 @@ public class VariantRecalibratorModelOutputUnitTest {
|
||||||
Assert.assertEquals(badGaussian1.sigma.get(i,j), (Double)badSigma.get(i,annotationList.get(j)), epsilon);
|
Assert.assertEquals(badGaussian1.sigma.get(i,j), (Double)badSigma.get(i,annotationList.get(j)), epsilon);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testVQSRModelInput(){
|
||||||
|
Random rand = new Random(12878);
|
||||||
|
MultivariateGaussian goodGaussian1 = new MultivariateGaussian(numAnnotations);
|
||||||
|
goodGaussian1.initializeRandomMu(rand);
|
||||||
|
goodGaussian1.initializeRandomSigma(rand);
|
||||||
|
|
||||||
|
MultivariateGaussian goodGaussian2 = new MultivariateGaussian(numAnnotations);
|
||||||
|
goodGaussian2.initializeRandomMu(rand);
|
||||||
|
goodGaussian2.initializeRandomSigma(rand);
|
||||||
|
|
||||||
|
MultivariateGaussian badGaussian1 = new MultivariateGaussian(numAnnotations);
|
||||||
|
badGaussian1.initializeRandomMu(rand);
|
||||||
|
badGaussian1.initializeRandomSigma(rand);
|
||||||
|
|
||||||
|
List<MultivariateGaussian> goodGaussianList = new ArrayList<>();
|
||||||
|
goodGaussianList.add(goodGaussian1);
|
||||||
|
goodGaussianList.add(goodGaussian2);
|
||||||
|
|
||||||
|
List<MultivariateGaussian> badGaussianList = new ArrayList<>();
|
||||||
|
badGaussianList.add(badGaussian1);
|
||||||
|
|
||||||
|
GaussianMixtureModel goodModel = new GaussianMixtureModel(goodGaussianList, shrinkage, dirichlet, priorCounts);
|
||||||
|
GaussianMixtureModel badModel = new GaussianMixtureModel(badGaussianList, shrinkage, dirichlet, priorCounts);
|
||||||
|
|
||||||
|
VariantRecalibrator vqsr = new VariantRecalibrator();
|
||||||
|
List<String> annotationList = new ArrayList<>();
|
||||||
|
annotationList.add("QD");
|
||||||
|
annotationList.add("MQ");
|
||||||
|
annotationList.add("FS");
|
||||||
|
annotationList.add("SOR");
|
||||||
|
annotationList.add("ReadPosRankSum");
|
||||||
|
annotationList.add("MQRankSum");
|
||||||
|
|
||||||
|
GATKReport report = vqsr.writeModelReport(goodModel, badModel, annotationList);
|
||||||
|
|
||||||
// Now test model report reading
|
// Now test model report reading
|
||||||
// Read the gaussian weighting tables
|
// Read all the tables
|
||||||
|
final GATKReportTable badMus = report.getTable("NegativeModelMeans");
|
||||||
|
final GATKReportTable badSigma = report.getTable("NegativeModelCovariances");
|
||||||
final GATKReportTable nPMixTable = report.getTable("BadGaussianPMix");
|
final GATKReportTable nPMixTable = report.getTable("BadGaussianPMix");
|
||||||
|
|
||||||
|
final GATKReportTable goodMus = report.getTable("PositiveModelMeans");
|
||||||
|
final GATKReportTable goodSigma = report.getTable("PositiveModelCovariances");
|
||||||
final GATKReportTable pPMixTable = report.getTable("GoodGaussianPMix");
|
final GATKReportTable pPMixTable = report.getTable("GoodGaussianPMix");
|
||||||
|
|
||||||
GaussianMixtureModel goodModelFromFile = vqsr.GMMFromTables(goodMus, goodSigma, pPMixTable, annotationList.size());
|
GaussianMixtureModel goodModelFromFile = vqsr.GMMFromTables(goodMus, goodSigma, pPMixTable, annotationList.size());
|
||||||
|
|
@ -183,7 +223,6 @@ public class VariantRecalibratorModelOutputUnitTest {
|
||||||
testGMMsForEquality(badModel, badModelFromFile, epsilon);
|
testGMMsForEquality(badModel, badModelFromFile, epsilon);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
//This is tested separately to avoid setting up a VariantDataManager and populating it with fake data
|
//This is tested separately to avoid setting up a VariantDataManager and populating it with fake data
|
||||||
public void testAnnotationNormalizationOutput() {
|
public void testAnnotationNormalizationOutput() {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue