Converting annotation values that are set to Double.Infinity
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2953 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b42e0a398e
commit
ca2a0266dc
|
|
@ -68,23 +68,23 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
public final void run( final String outputPrefix ) {
|
||||
|
||||
// Create the subset of the data to cluster with
|
||||
int numSubset = 0;
|
||||
int numNovel = 0;
|
||||
for( final VariantDatum datum : dataManager.data ) {
|
||||
if( !datum.isKnown ) {
|
||||
numSubset++;
|
||||
numNovel++;
|
||||
}
|
||||
}
|
||||
VariantDatum[] data;
|
||||
|
||||
if( numSubset * 2 * 1.3 < dataManager.numVariants ) {
|
||||
data = new VariantDatum[numSubset*2];
|
||||
if( numNovel * 2 * 1.3 < dataManager.numVariants ) {
|
||||
data = new VariantDatum[numNovel*2];
|
||||
int iii = 0;
|
||||
for( final VariantDatum datum : dataManager.data ) {
|
||||
if( !datum.isKnown ) {
|
||||
data[iii++] = datum;
|
||||
}
|
||||
}
|
||||
while( iii < numSubset*2 ) { // grab an equal number of known variants at random
|
||||
while( iii < numNovel*2 ) { // grab an equal number of known variants at random
|
||||
final VariantDatum datum = dataManager.data[rand.nextInt(dataManager.numVariants)];
|
||||
if( datum.isKnown ) {
|
||||
data[iii++] = datum;
|
||||
|
|
@ -94,7 +94,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
data = dataManager.data;
|
||||
}
|
||||
|
||||
System.out.println("Clustering with " + data.length + " variants...");
|
||||
System.out.println("Clustering with " + numNovel + " novel variants and " + (data.length - numNovel) + " known variants...");
|
||||
if( data.length == dataManager.numVariants ) { System.out.println(" (used all variants since 2*numNovel is so large compared to the full set) "); }
|
||||
createClusters( data ); // Using a subset of the data
|
||||
System.out.println("Printing out cluster parameters...");
|
||||
|
|
|
|||
|
|
@ -57,6 +57,8 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
|||
private boolean IGNORE_INPUT_FILTERS = false;
|
||||
@Argument(fullName="exclude_annotation", shortName="exclude", doc="The names of the annotations which should be excluded from the calculations", required=false)
|
||||
private String[] EXCLUDED_ANNOTATIONS = null;
|
||||
@Argument(fullName="force_annotation", shortName="force", doc="The names of the annotations which should be forced into the calculations even if they aren't present in every variant", required=false)
|
||||
private String[] FORCED_ANNOTATIONS = null;
|
||||
@Argument(fullName="output", shortName="output", doc="The output file name", required=false)
|
||||
private String OUTPUT_FILE = "optimizer.data";
|
||||
@Argument(fullName="numGaussians", shortName="nG", doc="The number of Gaussians to be used in the Gaussian mixture model", required=false)
|
||||
|
|
@ -71,6 +73,7 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
|||
private final ExpandingArrayList<String> annotationKeys = new ExpandingArrayList<String>();
|
||||
private boolean firstVariant = true;
|
||||
private int numAnnotations = 0;
|
||||
private static final double INFINITE_ANNOTATION_VALUE = 10000.0;
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -110,6 +113,11 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
|||
if( annotationKeys.contains( excludedAnnotation ) ) { annotationKeys.remove( excludedAnnotation ); }
|
||||
}
|
||||
}
|
||||
if( FORCED_ANNOTATIONS != null ) {
|
||||
for( final String forcedAnnotation : FORCED_ANNOTATIONS ) {
|
||||
if( !annotationKeys.contains( forcedAnnotation ) ) { annotationKeys.add( forcedAnnotation ); }
|
||||
}
|
||||
}
|
||||
numAnnotations = annotationKeys.size() + 1; // +1 for variant quality ("QUAL")
|
||||
annotationValues = new double[numAnnotations];
|
||||
firstVariant = false;
|
||||
|
|
@ -121,6 +129,9 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
|||
double value = 0.0;
|
||||
try {
|
||||
value = Double.parseDouble( (String)vc.getAttribute( key, "0.0" ) );
|
||||
if( Double.isInfinite(value) ) {
|
||||
value = ( value > 0 ? 1.0 : -1.0 ) * INFINITE_ANNOTATION_VALUE;
|
||||
}
|
||||
} catch( NumberFormatException e ) {
|
||||
// do nothing, default value is 0.0,
|
||||
}
|
||||
|
|
@ -166,7 +177,7 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
|||
logger.info( "The annotations are: " + annotationKeys + " and QUAL." );
|
||||
|
||||
dataManager.normalizeData(); // Each data point is now [ (x - mean) / standard deviation ]
|
||||
|
||||
|
||||
// Create either the Gaussian Mixture Model or the Nearest Neighbors model and run it
|
||||
final VariantOptimizationModel gmm = new VariantGaussianMixtureModel( dataManager, TARGET_TITV, NUM_GAUSSIANS, NUM_ITERATIONS );
|
||||
gmm.run( OUTPUT_FILE );
|
||||
|
|
|
|||
Loading…
Reference in New Issue