Fix for the VQSR visualization script with the new ordering of annotations.
This commit is contained in:
parent
08a7ef6620
commit
a46f633bd6
|
|
@ -77,7 +77,7 @@ public class GaussianMixtureModel {
|
|||
public GaussianMixtureModel( final int numGaussians, final int numAnnotations,
|
||||
final double shrinkage, final double dirichletParameter, final double priorCounts ) {
|
||||
|
||||
gaussians = new ArrayList<MultivariateGaussian>( numGaussians );
|
||||
gaussians = new ArrayList<>( numGaussians );
|
||||
for( int iii = 0; iii < numGaussians; iii++ ) {
|
||||
final MultivariateGaussian gaussian = new MultivariateGaussian( numAnnotations );
|
||||
gaussians.add( gaussian );
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ public class MultivariateGaussian {
|
|||
public MultivariateGaussian( final int numAnnotations ) {
|
||||
mu = new double[numAnnotations];
|
||||
sigma = new Matrix(numAnnotations, numAnnotations);
|
||||
pVarInGaussian = new ExpandingArrayList<Double>();
|
||||
pVarInGaussian = new ExpandingArrayList<>();
|
||||
}
|
||||
|
||||
public void zeroOutMu() {
|
||||
|
|
|
|||
|
|
@ -215,6 +215,10 @@ public class VariantDataManager {
|
|||
trainingSets.add( trainingSet );
|
||||
}
|
||||
|
||||
public List<String> getAnnotationKeys() {
|
||||
return annotationKeys;
|
||||
}
|
||||
|
||||
public boolean checkHasTrainingSet() {
|
||||
for( final TrainingSet trainingSet : trainingSets ) {
|
||||
if( trainingSet.isTraining ) { return true; }
|
||||
|
|
|
|||
|
|
@ -367,7 +367,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
dataManager.writeOutRecalibrationTable( recalWriter );
|
||||
if( RSCRIPT_FILE != null ) {
|
||||
logger.info( "Writing out visualization Rscript file...");
|
||||
createVisualizationScript( dataManager.getRandomDataForPlotting( 6000 ), goodModel, badModel, lodCutoff );
|
||||
createVisualizationScript( dataManager.getRandomDataForPlotting( 6000 ), goodModel, badModel, lodCutoff, dataManager.getAnnotationKeys().toArray(new String[USE_ANNOTATIONS.length]) );
|
||||
}
|
||||
|
||||
// Execute the RScript command to plot the table of truth values
|
||||
|
|
@ -379,7 +379,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
executor.exec();
|
||||
}
|
||||
|
||||
private void createVisualizationScript( final ExpandingArrayList<VariantDatum> randomData, final GaussianMixtureModel goodModel, final GaussianMixtureModel badModel, final double lodCutoff ) {
|
||||
private void createVisualizationScript( final ExpandingArrayList<VariantDatum> randomData, final GaussianMixtureModel goodModel, final GaussianMixtureModel badModel, final double lodCutoff, final String[] annotationKeys ) {
|
||||
PrintStream stream;
|
||||
try {
|
||||
stream = new PrintStream(RSCRIPT_FILE);
|
||||
|
|
@ -399,9 +399,9 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
stream.println("outputPDF <- \"" + RSCRIPT_FILE + ".pdf\"");
|
||||
stream.println("pdf(outputPDF)"); // Unfortunately this is a huge pdf file, BUGBUG: need to work on reducing the file size
|
||||
|
||||
for(int iii = 0; iii < USE_ANNOTATIONS.length; iii++) {
|
||||
for( int jjj = iii + 1; jjj < USE_ANNOTATIONS.length; jjj++) {
|
||||
logger.info( "Building " + USE_ANNOTATIONS[iii] + " x " + USE_ANNOTATIONS[jjj] + " plot...");
|
||||
for(int iii = 0; iii < annotationKeys.length; iii++) {
|
||||
for( int jjj = iii + 1; jjj < annotationKeys.length; jjj++) {
|
||||
logger.info( "Building " + annotationKeys[iii] + " x " + annotationKeys[jjj] + " plot...");
|
||||
|
||||
final ExpandingArrayList<VariantDatum> fakeData = new ExpandingArrayList<VariantDatum>();
|
||||
double minAnn1 = 100.0, maxAnn1 = -100.0, minAnn2 = 100.0, maxAnn2 = -100.0;
|
||||
|
|
@ -454,8 +454,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
stream.println("NA,NA,NA,NA,1)");
|
||||
stream.println("d <- matrix(data,ncol=5,byrow=T)");
|
||||
|
||||
final String surfaceFrame = "sf." + USE_ANNOTATIONS[iii] + "." + USE_ANNOTATIONS[jjj];
|
||||
final String dataFrame = "df." + USE_ANNOTATIONS[iii] + "." + USE_ANNOTATIONS[jjj];
|
||||
final String surfaceFrame = "sf." + annotationKeys[iii] + "." + annotationKeys[jjj];
|
||||
final String dataFrame = "df." + annotationKeys[iii] + "." + annotationKeys[jjj];
|
||||
|
||||
stream.println(surfaceFrame + " <- data.frame(x=s[,1], y=s[,2], lod=s[,3])");
|
||||
stream.println(dataFrame + " <- data.frame(x=d[,1], y=d[,2], retained=d[,3], training=d[,4], novelty=d[,5])");
|
||||
|
|
@ -463,16 +463,16 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
stream.println("dummyData$x <- NaN");
|
||||
stream.println("dummyData$y <- NaN");
|
||||
stream.println("p <- ggplot(data=" + surfaceFrame + ", aes(x=x, y=y)) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
|
||||
stream.println("p1 = p + opts(title=\"model PDF\") + labs(x=\""+ USE_ANNOTATIONS[iii] +"\", y=\""+ USE_ANNOTATIONS[jjj] +"\") + geom_tile(aes(fill = lod)) + scale_fill_gradient(high=\"green\", low=\"red\")");
|
||||
stream.println("p1 = p + opts(title=\"model PDF\") + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + geom_tile(aes(fill = lod)) + scale_fill_gradient(high=\"green\", low=\"red\")");
|
||||
stream.println("p <- qplot(x,y,data=" + dataFrame + ", color=retained, alpha=I(1/7),legend=FALSE) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
|
||||
stream.println("q <- geom_point(aes(x=x,y=y,color=retained),data=dummyData, alpha=1.0, na.rm=TRUE)");
|
||||
stream.println("p2 = p + q + labs(x=\""+ USE_ANNOTATIONS[iii] +"\", y=\""+ USE_ANNOTATIONS[jjj] +"\") + scale_colour_gradient(name=\"outcome\", high=\"black\", low=\"red\",breaks=c(-1,1),labels=c(\"filtered\",\"retained\"))");
|
||||
stream.println("p2 = p + q + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + scale_colour_gradient(name=\"outcome\", high=\"black\", low=\"red\",breaks=c(-1,1),labels=c(\"filtered\",\"retained\"))");
|
||||
stream.println("p <- qplot(x,y,data="+ dataFrame + "["+dataFrame+"$training != 0,], color=training, alpha=I(1/7)) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
|
||||
stream.println("q <- geom_point(aes(x=x,y=y,color=training),data=dummyData, alpha=1.0, na.rm=TRUE)");
|
||||
stream.println("p3 = p + q + labs(x=\""+ USE_ANNOTATIONS[iii] +"\", y=\""+ USE_ANNOTATIONS[jjj] +"\") + scale_colour_gradient(high=\"green\", low=\"purple\",breaks=c(-1,1), labels=c(\"neg\", \"pos\"))");
|
||||
stream.println("p3 = p + q + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + scale_colour_gradient(high=\"green\", low=\"purple\",breaks=c(-1,1), labels=c(\"neg\", \"pos\"))");
|
||||
stream.println("p <- qplot(x,y,data=" + dataFrame + ", color=novelty, alpha=I(1/7)) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
|
||||
stream.println("q <- geom_point(aes(x=x,y=y,color=novelty),data=dummyData, alpha=1.0, na.rm=TRUE)");
|
||||
stream.println("p4 = p + q + labs(x=\""+ USE_ANNOTATIONS[iii] +"\", y=\""+ USE_ANNOTATIONS[jjj] +"\") + scale_colour_gradient(name=\"novelty\", high=\"blue\", low=\"red\",breaks=c(-1,1), labels=c(\"novel\",\"known\"))");
|
||||
stream.println("p4 = p + q + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + scale_colour_gradient(name=\"novelty\", high=\"blue\", low=\"red\",breaks=c(-1,1), labels=c(\"novel\",\"known\"))");
|
||||
stream.println("arrange(p1, p2, p3, p4, ncol=2)");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue