Improvements to the VariantRecalibrator R plots
-- VariantRecalibrator now emits plots with denormlized values (original values) instead of their normalized (x - mu / sigma) which helps to understand the distribution of values that are good and bad
This commit is contained in:
parent
564fe36d22
commit
5a74a3190c
|
|
@ -127,30 +127,46 @@ public class VariantDataManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addTrainingSet( final TrainingSet trainingSet ) {
|
/**
|
||||||
trainingSets.add( trainingSet );
|
* Convert a normalized point to it's original annotation value
|
||||||
}
|
*
|
||||||
|
* norm = (orig - mu) / sigma
|
||||||
|
* orig = norm * sigma + mu
|
||||||
|
*
|
||||||
|
* @param normalizedValue the normalized value of the ith annotation
|
||||||
|
* @param annI the index of the annotation value
|
||||||
|
* @return the denormalized value for the annotation
|
||||||
|
*/
|
||||||
|
public double denormalizeDatum(final double normalizedValue, final int annI) {
|
||||||
|
final double mu = meanVector[annI];
|
||||||
|
final double sigma = varianceVector[annI];
|
||||||
|
return normalizedValue * sigma + mu;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean checkHasTrainingSet() {
|
public void addTrainingSet( final TrainingSet trainingSet ) {
|
||||||
for( final TrainingSet trainingSet : trainingSets ) {
|
trainingSets.add( trainingSet );
|
||||||
if( trainingSet.isTraining ) { return true; }
|
}
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean checkHasTruthSet() {
|
public boolean checkHasTrainingSet() {
|
||||||
for( final TrainingSet trainingSet : trainingSets ) {
|
for( final TrainingSet trainingSet : trainingSets ) {
|
||||||
if( trainingSet.isTruth ) { return true; }
|
if( trainingSet.isTraining ) { return true; }
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean checkHasKnownSet() {
|
public boolean checkHasTruthSet() {
|
||||||
for( final TrainingSet trainingSet : trainingSets ) {
|
for( final TrainingSet trainingSet : trainingSets ) {
|
||||||
if( trainingSet.isKnown ) { return true; }
|
if( trainingSet.isTruth ) { return true; }
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean checkHasKnownSet() {
|
||||||
|
for( final TrainingSet trainingSet : trainingSets ) {
|
||||||
|
if( trainingSet.isKnown ) { return true; }
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
public ExpandingArrayList<VariantDatum> getTrainingData() {
|
public ExpandingArrayList<VariantDatum> getTrainingData() {
|
||||||
final ExpandingArrayList<VariantDatum> trainingData = new ExpandingArrayList<VariantDatum>();
|
final ExpandingArrayList<VariantDatum> trainingData = new ExpandingArrayList<VariantDatum>();
|
||||||
|
|
@ -260,7 +276,7 @@ public class VariantDataManager {
|
||||||
value = vc.getAttributeAsDouble( annotationKey, Double.NaN );
|
value = vc.getAttributeAsDouble( annotationKey, Double.NaN );
|
||||||
if( Double.isInfinite(value) ) { value = Double.NaN; }
|
if( Double.isInfinite(value) ) { value = Double.NaN; }
|
||||||
if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM
|
if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM
|
||||||
value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble();
|
value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble();
|
||||||
}
|
}
|
||||||
|
|
||||||
if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
|
if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
|
||||||
|
|
@ -297,7 +313,7 @@ public class VariantDataManager {
|
||||||
|
|
||||||
private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) {
|
private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) {
|
||||||
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && checkVariationClass( evalVC, trainVC ) &&
|
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && checkVariationClass( evalVC, trainVC ) &&
|
||||||
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphicInSamples());
|
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphicInSamples());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static boolean checkVariationClass( final VariantContext evalVC, final VariantContext trainVC ) {
|
protected static boolean checkVariationClass( final VariantContext evalVC, final VariantContext trainVC ) {
|
||||||
|
|
|
||||||
|
|
@ -435,14 +435,20 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
|
|
||||||
stream.print("surface <- c(");
|
stream.print("surface <- c(");
|
||||||
for( final VariantDatum datum : fakeData ) {
|
for( final VariantDatum datum : fakeData ) {
|
||||||
stream.print(String.format("%.3f, %.3f, %.3f, ", datum.annotations[iii], datum.annotations[jjj], Math.min(4.0, Math.max(-4.0, datum.lod))));
|
stream.print(String.format("%.3f, %.3f, %.3f, ",
|
||||||
|
dataManager.denormalizeDatum(datum.annotations[iii], iii),
|
||||||
|
dataManager.denormalizeDatum(datum.annotations[jjj], jjj),
|
||||||
|
Math.min(4.0, Math.max(-4.0, datum.lod))));
|
||||||
}
|
}
|
||||||
stream.println("NA,NA,NA)");
|
stream.println("NA,NA,NA)");
|
||||||
stream.println("s <- matrix(surface,ncol=3,byrow=T)");
|
stream.println("s <- matrix(surface,ncol=3,byrow=T)");
|
||||||
|
|
||||||
stream.print("data <- c(");
|
stream.print("data <- c(");
|
||||||
for( final VariantDatum datum : randomData ) {
|
for( final VariantDatum datum : randomData ) {
|
||||||
stream.print(String.format("%.3f, %.3f, %.3f, %d, %d,", datum.annotations[iii], datum.annotations[jjj], (datum.lod < lodCutoff ? -1.0 : 1.0),
|
stream.print(String.format("%.3f, %.3f, %.3f, %d, %d,",
|
||||||
|
dataManager.denormalizeDatum(datum.annotations[iii], iii),
|
||||||
|
dataManager.denormalizeDatum(datum.annotations[jjj], jjj),
|
||||||
|
(datum.lod < lodCutoff ? -1.0 : 1.0),
|
||||||
(datum.atAntiTrainingSite ? -1 : (datum.atTrainingSite ? 1 : 0)), (datum.isKnown ? 1 : -1)));
|
(datum.atAntiTrainingSite ? -1 : (datum.atTrainingSite ? 1 : 0)), (datum.isKnown ? 1 : -1)));
|
||||||
}
|
}
|
||||||
stream.println("NA,NA,NA,NA,1)");
|
stream.println("NA,NA,NA,NA,1)");
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue