Fix for the VQSR visualization script with the new ordering of annotations.
This commit is contained in:
parent
08a7ef6620
commit
a46f633bd6
|
|
@ -77,7 +77,7 @@ public class GaussianMixtureModel {
|
||||||
public GaussianMixtureModel( final int numGaussians, final int numAnnotations,
|
public GaussianMixtureModel( final int numGaussians, final int numAnnotations,
|
||||||
final double shrinkage, final double dirichletParameter, final double priorCounts ) {
|
final double shrinkage, final double dirichletParameter, final double priorCounts ) {
|
||||||
|
|
||||||
gaussians = new ArrayList<MultivariateGaussian>( numGaussians );
|
gaussians = new ArrayList<>( numGaussians );
|
||||||
for( int iii = 0; iii < numGaussians; iii++ ) {
|
for( int iii = 0; iii < numGaussians; iii++ ) {
|
||||||
final MultivariateGaussian gaussian = new MultivariateGaussian( numAnnotations );
|
final MultivariateGaussian gaussian = new MultivariateGaussian( numAnnotations );
|
||||||
gaussians.add( gaussian );
|
gaussians.add( gaussian );
|
||||||
|
|
|
||||||
|
|
@ -77,7 +77,7 @@ public class MultivariateGaussian {
|
||||||
public MultivariateGaussian( final int numAnnotations ) {
|
public MultivariateGaussian( final int numAnnotations ) {
|
||||||
mu = new double[numAnnotations];
|
mu = new double[numAnnotations];
|
||||||
sigma = new Matrix(numAnnotations, numAnnotations);
|
sigma = new Matrix(numAnnotations, numAnnotations);
|
||||||
pVarInGaussian = new ExpandingArrayList<Double>();
|
pVarInGaussian = new ExpandingArrayList<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void zeroOutMu() {
|
public void zeroOutMu() {
|
||||||
|
|
|
||||||
|
|
@ -215,6 +215,10 @@ public class VariantDataManager {
|
||||||
trainingSets.add( trainingSet );
|
trainingSets.add( trainingSet );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<String> getAnnotationKeys() {
|
||||||
|
return annotationKeys;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean checkHasTrainingSet() {
|
public boolean checkHasTrainingSet() {
|
||||||
for( final TrainingSet trainingSet : trainingSets ) {
|
for( final TrainingSet trainingSet : trainingSets ) {
|
||||||
if( trainingSet.isTraining ) { return true; }
|
if( trainingSet.isTraining ) { return true; }
|
||||||
|
|
|
||||||
|
|
@ -367,7 +367,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
dataManager.writeOutRecalibrationTable( recalWriter );
|
dataManager.writeOutRecalibrationTable( recalWriter );
|
||||||
if( RSCRIPT_FILE != null ) {
|
if( RSCRIPT_FILE != null ) {
|
||||||
logger.info( "Writing out visualization Rscript file...");
|
logger.info( "Writing out visualization Rscript file...");
|
||||||
createVisualizationScript( dataManager.getRandomDataForPlotting( 6000 ), goodModel, badModel, lodCutoff );
|
createVisualizationScript( dataManager.getRandomDataForPlotting( 6000 ), goodModel, badModel, lodCutoff, dataManager.getAnnotationKeys().toArray(new String[USE_ANNOTATIONS.length]) );
|
||||||
}
|
}
|
||||||
|
|
||||||
// Execute the RScript command to plot the table of truth values
|
// Execute the RScript command to plot the table of truth values
|
||||||
|
|
@ -379,7 +379,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
executor.exec();
|
executor.exec();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void createVisualizationScript( final ExpandingArrayList<VariantDatum> randomData, final GaussianMixtureModel goodModel, final GaussianMixtureModel badModel, final double lodCutoff ) {
|
private void createVisualizationScript( final ExpandingArrayList<VariantDatum> randomData, final GaussianMixtureModel goodModel, final GaussianMixtureModel badModel, final double lodCutoff, final String[] annotationKeys ) {
|
||||||
PrintStream stream;
|
PrintStream stream;
|
||||||
try {
|
try {
|
||||||
stream = new PrintStream(RSCRIPT_FILE);
|
stream = new PrintStream(RSCRIPT_FILE);
|
||||||
|
|
@ -399,9 +399,9 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
stream.println("outputPDF <- \"" + RSCRIPT_FILE + ".pdf\"");
|
stream.println("outputPDF <- \"" + RSCRIPT_FILE + ".pdf\"");
|
||||||
stream.println("pdf(outputPDF)"); // Unfortunately this is a huge pdf file, BUGBUG: need to work on reducing the file size
|
stream.println("pdf(outputPDF)"); // Unfortunately this is a huge pdf file, BUGBUG: need to work on reducing the file size
|
||||||
|
|
||||||
for(int iii = 0; iii < USE_ANNOTATIONS.length; iii++) {
|
for(int iii = 0; iii < annotationKeys.length; iii++) {
|
||||||
for( int jjj = iii + 1; jjj < USE_ANNOTATIONS.length; jjj++) {
|
for( int jjj = iii + 1; jjj < annotationKeys.length; jjj++) {
|
||||||
logger.info( "Building " + USE_ANNOTATIONS[iii] + " x " + USE_ANNOTATIONS[jjj] + " plot...");
|
logger.info( "Building " + annotationKeys[iii] + " x " + annotationKeys[jjj] + " plot...");
|
||||||
|
|
||||||
final ExpandingArrayList<VariantDatum> fakeData = new ExpandingArrayList<VariantDatum>();
|
final ExpandingArrayList<VariantDatum> fakeData = new ExpandingArrayList<VariantDatum>();
|
||||||
double minAnn1 = 100.0, maxAnn1 = -100.0, minAnn2 = 100.0, maxAnn2 = -100.0;
|
double minAnn1 = 100.0, maxAnn1 = -100.0, minAnn2 = 100.0, maxAnn2 = -100.0;
|
||||||
|
|
@ -454,8 +454,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
stream.println("NA,NA,NA,NA,1)");
|
stream.println("NA,NA,NA,NA,1)");
|
||||||
stream.println("d <- matrix(data,ncol=5,byrow=T)");
|
stream.println("d <- matrix(data,ncol=5,byrow=T)");
|
||||||
|
|
||||||
final String surfaceFrame = "sf." + USE_ANNOTATIONS[iii] + "." + USE_ANNOTATIONS[jjj];
|
final String surfaceFrame = "sf." + annotationKeys[iii] + "." + annotationKeys[jjj];
|
||||||
final String dataFrame = "df." + USE_ANNOTATIONS[iii] + "." + USE_ANNOTATIONS[jjj];
|
final String dataFrame = "df." + annotationKeys[iii] + "." + annotationKeys[jjj];
|
||||||
|
|
||||||
stream.println(surfaceFrame + " <- data.frame(x=s[,1], y=s[,2], lod=s[,3])");
|
stream.println(surfaceFrame + " <- data.frame(x=s[,1], y=s[,2], lod=s[,3])");
|
||||||
stream.println(dataFrame + " <- data.frame(x=d[,1], y=d[,2], retained=d[,3], training=d[,4], novelty=d[,5])");
|
stream.println(dataFrame + " <- data.frame(x=d[,1], y=d[,2], retained=d[,3], training=d[,4], novelty=d[,5])");
|
||||||
|
|
@ -463,16 +463,16 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
stream.println("dummyData$x <- NaN");
|
stream.println("dummyData$x <- NaN");
|
||||||
stream.println("dummyData$y <- NaN");
|
stream.println("dummyData$y <- NaN");
|
||||||
stream.println("p <- ggplot(data=" + surfaceFrame + ", aes(x=x, y=y)) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
|
stream.println("p <- ggplot(data=" + surfaceFrame + ", aes(x=x, y=y)) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
|
||||||
stream.println("p1 = p + opts(title=\"model PDF\") + labs(x=\""+ USE_ANNOTATIONS[iii] +"\", y=\""+ USE_ANNOTATIONS[jjj] +"\") + geom_tile(aes(fill = lod)) + scale_fill_gradient(high=\"green\", low=\"red\")");
|
stream.println("p1 = p + opts(title=\"model PDF\") + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + geom_tile(aes(fill = lod)) + scale_fill_gradient(high=\"green\", low=\"red\")");
|
||||||
stream.println("p <- qplot(x,y,data=" + dataFrame + ", color=retained, alpha=I(1/7),legend=FALSE) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
|
stream.println("p <- qplot(x,y,data=" + dataFrame + ", color=retained, alpha=I(1/7),legend=FALSE) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
|
||||||
stream.println("q <- geom_point(aes(x=x,y=y,color=retained),data=dummyData, alpha=1.0, na.rm=TRUE)");
|
stream.println("q <- geom_point(aes(x=x,y=y,color=retained),data=dummyData, alpha=1.0, na.rm=TRUE)");
|
||||||
stream.println("p2 = p + q + labs(x=\""+ USE_ANNOTATIONS[iii] +"\", y=\""+ USE_ANNOTATIONS[jjj] +"\") + scale_colour_gradient(name=\"outcome\", high=\"black\", low=\"red\",breaks=c(-1,1),labels=c(\"filtered\",\"retained\"))");
|
stream.println("p2 = p + q + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + scale_colour_gradient(name=\"outcome\", high=\"black\", low=\"red\",breaks=c(-1,1),labels=c(\"filtered\",\"retained\"))");
|
||||||
stream.println("p <- qplot(x,y,data="+ dataFrame + "["+dataFrame+"$training != 0,], color=training, alpha=I(1/7)) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
|
stream.println("p <- qplot(x,y,data="+ dataFrame + "["+dataFrame+"$training != 0,], color=training, alpha=I(1/7)) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
|
||||||
stream.println("q <- geom_point(aes(x=x,y=y,color=training),data=dummyData, alpha=1.0, na.rm=TRUE)");
|
stream.println("q <- geom_point(aes(x=x,y=y,color=training),data=dummyData, alpha=1.0, na.rm=TRUE)");
|
||||||
stream.println("p3 = p + q + labs(x=\""+ USE_ANNOTATIONS[iii] +"\", y=\""+ USE_ANNOTATIONS[jjj] +"\") + scale_colour_gradient(high=\"green\", low=\"purple\",breaks=c(-1,1), labels=c(\"neg\", \"pos\"))");
|
stream.println("p3 = p + q + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + scale_colour_gradient(high=\"green\", low=\"purple\",breaks=c(-1,1), labels=c(\"neg\", \"pos\"))");
|
||||||
stream.println("p <- qplot(x,y,data=" + dataFrame + ", color=novelty, alpha=I(1/7)) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
|
stream.println("p <- qplot(x,y,data=" + dataFrame + ", color=novelty, alpha=I(1/7)) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
|
||||||
stream.println("q <- geom_point(aes(x=x,y=y,color=novelty),data=dummyData, alpha=1.0, na.rm=TRUE)");
|
stream.println("q <- geom_point(aes(x=x,y=y,color=novelty),data=dummyData, alpha=1.0, na.rm=TRUE)");
|
||||||
stream.println("p4 = p + q + labs(x=\""+ USE_ANNOTATIONS[iii] +"\", y=\""+ USE_ANNOTATIONS[jjj] +"\") + scale_colour_gradient(name=\"novelty\", high=\"blue\", low=\"red\",breaks=c(-1,1), labels=c(\"novel\",\"known\"))");
|
stream.println("p4 = p + q + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + scale_colour_gradient(name=\"novelty\", high=\"blue\", low=\"red\",breaks=c(-1,1), labels=c(\"novel\",\"known\"))");
|
||||||
stream.println("arrange(p1, p2, p3, p4, ncol=2)");
|
stream.println("arrange(p1, p2, p3, p4, ncol=2)");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue