Fix for the VQSR visualization script with the new ordering of annotations.

This commit is contained in:
Ryan Poplin 2013-08-02 19:10:45 -04:00
parent 08a7ef6620
commit a46f633bd6
4 changed files with 17 additions and 13 deletions

View File

@ -77,7 +77,7 @@ public class GaussianMixtureModel {
public GaussianMixtureModel( final int numGaussians, final int numAnnotations,
final double shrinkage, final double dirichletParameter, final double priorCounts ) {
gaussians = new ArrayList<MultivariateGaussian>( numGaussians );
gaussians = new ArrayList<>( numGaussians );
for( int iii = 0; iii < numGaussians; iii++ ) {
final MultivariateGaussian gaussian = new MultivariateGaussian( numAnnotations );
gaussians.add( gaussian );

View File

@ -77,7 +77,7 @@ public class MultivariateGaussian {
public MultivariateGaussian( final int numAnnotations ) {
mu = new double[numAnnotations];
sigma = new Matrix(numAnnotations, numAnnotations);
pVarInGaussian = new ExpandingArrayList<Double>();
pVarInGaussian = new ExpandingArrayList<>();
}
public void zeroOutMu() {

View File

@ -215,6 +215,10 @@ public class VariantDataManager {
trainingSets.add( trainingSet );
}
public List<String> getAnnotationKeys() {
return annotationKeys;
}
public boolean checkHasTrainingSet() {
for( final TrainingSet trainingSet : trainingSets ) {
if( trainingSet.isTraining ) { return true; }

View File

@ -367,7 +367,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
dataManager.writeOutRecalibrationTable( recalWriter );
if( RSCRIPT_FILE != null ) {
logger.info( "Writing out visualization Rscript file...");
createVisualizationScript( dataManager.getRandomDataForPlotting( 6000 ), goodModel, badModel, lodCutoff );
createVisualizationScript( dataManager.getRandomDataForPlotting( 6000 ), goodModel, badModel, lodCutoff, dataManager.getAnnotationKeys().toArray(new String[USE_ANNOTATIONS.length]) );
}
// Execute the RScript command to plot the table of truth values
@ -379,7 +379,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
executor.exec();
}
private void createVisualizationScript( final ExpandingArrayList<VariantDatum> randomData, final GaussianMixtureModel goodModel, final GaussianMixtureModel badModel, final double lodCutoff ) {
private void createVisualizationScript( final ExpandingArrayList<VariantDatum> randomData, final GaussianMixtureModel goodModel, final GaussianMixtureModel badModel, final double lodCutoff, final String[] annotationKeys ) {
PrintStream stream;
try {
stream = new PrintStream(RSCRIPT_FILE);
@ -399,9 +399,9 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
stream.println("outputPDF <- \"" + RSCRIPT_FILE + ".pdf\"");
stream.println("pdf(outputPDF)"); // Unfortunately this is a huge pdf file, BUGBUG: need to work on reducing the file size
for(int iii = 0; iii < USE_ANNOTATIONS.length; iii++) {
for( int jjj = iii + 1; jjj < USE_ANNOTATIONS.length; jjj++) {
logger.info( "Building " + USE_ANNOTATIONS[iii] + " x " + USE_ANNOTATIONS[jjj] + " plot...");
for(int iii = 0; iii < annotationKeys.length; iii++) {
for( int jjj = iii + 1; jjj < annotationKeys.length; jjj++) {
logger.info( "Building " + annotationKeys[iii] + " x " + annotationKeys[jjj] + " plot...");
final ExpandingArrayList<VariantDatum> fakeData = new ExpandingArrayList<VariantDatum>();
double minAnn1 = 100.0, maxAnn1 = -100.0, minAnn2 = 100.0, maxAnn2 = -100.0;
@ -454,8 +454,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
stream.println("NA,NA,NA,NA,1)");
stream.println("d <- matrix(data,ncol=5,byrow=T)");
final String surfaceFrame = "sf." + USE_ANNOTATIONS[iii] + "." + USE_ANNOTATIONS[jjj];
final String dataFrame = "df." + USE_ANNOTATIONS[iii] + "." + USE_ANNOTATIONS[jjj];
final String surfaceFrame = "sf." + annotationKeys[iii] + "." + annotationKeys[jjj];
final String dataFrame = "df." + annotationKeys[iii] + "." + annotationKeys[jjj];
stream.println(surfaceFrame + " <- data.frame(x=s[,1], y=s[,2], lod=s[,3])");
stream.println(dataFrame + " <- data.frame(x=d[,1], y=d[,2], retained=d[,3], training=d[,4], novelty=d[,5])");
@ -463,16 +463,16 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
stream.println("dummyData$x <- NaN");
stream.println("dummyData$y <- NaN");
stream.println("p <- ggplot(data=" + surfaceFrame + ", aes(x=x, y=y)) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
stream.println("p1 = p + opts(title=\"model PDF\") + labs(x=\""+ USE_ANNOTATIONS[iii] +"\", y=\""+ USE_ANNOTATIONS[jjj] +"\") + geom_tile(aes(fill = lod)) + scale_fill_gradient(high=\"green\", low=\"red\")");
stream.println("p1 = p + opts(title=\"model PDF\") + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + geom_tile(aes(fill = lod)) + scale_fill_gradient(high=\"green\", low=\"red\")");
stream.println("p <- qplot(x,y,data=" + dataFrame + ", color=retained, alpha=I(1/7),legend=FALSE) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
stream.println("q <- geom_point(aes(x=x,y=y,color=retained),data=dummyData, alpha=1.0, na.rm=TRUE)");
stream.println("p2 = p + q + labs(x=\""+ USE_ANNOTATIONS[iii] +"\", y=\""+ USE_ANNOTATIONS[jjj] +"\") + scale_colour_gradient(name=\"outcome\", high=\"black\", low=\"red\",breaks=c(-1,1),labels=c(\"filtered\",\"retained\"))");
stream.println("p2 = p + q + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + scale_colour_gradient(name=\"outcome\", high=\"black\", low=\"red\",breaks=c(-1,1),labels=c(\"filtered\",\"retained\"))");
stream.println("p <- qplot(x,y,data="+ dataFrame + "["+dataFrame+"$training != 0,], color=training, alpha=I(1/7)) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
stream.println("q <- geom_point(aes(x=x,y=y,color=training),data=dummyData, alpha=1.0, na.rm=TRUE)");
stream.println("p3 = p + q + labs(x=\""+ USE_ANNOTATIONS[iii] +"\", y=\""+ USE_ANNOTATIONS[jjj] +"\") + scale_colour_gradient(high=\"green\", low=\"purple\",breaks=c(-1,1), labels=c(\"neg\", \"pos\"))");
stream.println("p3 = p + q + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + scale_colour_gradient(high=\"green\", low=\"purple\",breaks=c(-1,1), labels=c(\"neg\", \"pos\"))");
stream.println("p <- qplot(x,y,data=" + dataFrame + ", color=novelty, alpha=I(1/7)) + opts(panel.background = theme_rect(colour = NA), panel.grid.minor = theme_line(colour = NA), panel.grid.major = theme_line(colour = NA))");
stream.println("q <- geom_point(aes(x=x,y=y,color=novelty),data=dummyData, alpha=1.0, na.rm=TRUE)");
stream.println("p4 = p + q + labs(x=\""+ USE_ANNOTATIONS[iii] +"\", y=\""+ USE_ANNOTATIONS[jjj] +"\") + scale_colour_gradient(name=\"novelty\", high=\"blue\", low=\"red\",breaks=c(-1,1), labels=c(\"novel\",\"known\"))");
stream.println("p4 = p + q + labs(x=\""+ annotationKeys[iii] +"\", y=\""+ annotationKeys[jjj] +"\") + scale_colour_gradient(name=\"novelty\", high=\"blue\", low=\"red\",breaks=c(-1,1), labels=c(\"novel\",\"known\"))");
stream.println("arrange(p1, p2, p3, p4, ncol=2)");
}
}