Converting annotation values that are set to Double.Infinity

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2953 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-03-08 14:04:33 +00:00
parent b42e0a398e
commit ca2a0266dc
2 changed files with 18 additions and 7 deletions

View File

@ -68,23 +68,23 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
public final void run( final String outputPrefix ) {
// Create the subset of the data to cluster with
int numSubset = 0;
int numNovel = 0;
for( final VariantDatum datum : dataManager.data ) {
if( !datum.isKnown ) {
numSubset++;
numNovel++;
}
}
VariantDatum[] data;
if( numSubset * 2 * 1.3 < dataManager.numVariants ) {
data = new VariantDatum[numSubset*2];
if( numNovel * 2 * 1.3 < dataManager.numVariants ) {
data = new VariantDatum[numNovel*2];
int iii = 0;
for( final VariantDatum datum : dataManager.data ) {
if( !datum.isKnown ) {
data[iii++] = datum;
}
}
while( iii < numSubset*2 ) { // grab an equal number of known variants at random
while( iii < numNovel*2 ) { // grab an equal number of known variants at random
final VariantDatum datum = dataManager.data[rand.nextInt(dataManager.numVariants)];
if( datum.isKnown ) {
data[iii++] = datum;
@ -94,7 +94,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
data = dataManager.data;
}
System.out.println("Clustering with " + data.length + " variants...");
System.out.println("Clustering with " + numNovel + " novel variants and " + (data.length - numNovel) + " known variants...");
if( data.length == dataManager.numVariants ) { System.out.println(" (used all variants since 2*numNovel is so large compared to the full set) "); }
createClusters( data ); // Using a subset of the data
System.out.println("Printing out cluster parameters...");

View File

@ -57,6 +57,8 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
private boolean IGNORE_INPUT_FILTERS = false;
@Argument(fullName="exclude_annotation", shortName="exclude", doc="The names of the annotations which should be excluded from the calculations", required=false)
private String[] EXCLUDED_ANNOTATIONS = null;
@Argument(fullName="force_annotation", shortName="force", doc="The names of the annotations which should be forced into the calculations even if they aren't present in every variant", required=false)
private String[] FORCED_ANNOTATIONS = null;
@Argument(fullName="output", shortName="output", doc="The output file name", required=false)
private String OUTPUT_FILE = "optimizer.data";
@Argument(fullName="numGaussians", shortName="nG", doc="The number of Gaussians to be used in the Gaussian mixture model", required=false)
@ -71,6 +73,7 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
private final ExpandingArrayList<String> annotationKeys = new ExpandingArrayList<String>();
private boolean firstVariant = true;
private int numAnnotations = 0;
private static final double INFINITE_ANNOTATION_VALUE = 10000.0;
//---------------------------------------------------------------------------------------------------------------
//
@ -110,6 +113,11 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
if( annotationKeys.contains( excludedAnnotation ) ) { annotationKeys.remove( excludedAnnotation ); }
}
}
if( FORCED_ANNOTATIONS != null ) {
for( final String forcedAnnotation : FORCED_ANNOTATIONS ) {
if( !annotationKeys.contains( forcedAnnotation ) ) { annotationKeys.add( forcedAnnotation ); }
}
}
numAnnotations = annotationKeys.size() + 1; // +1 for variant quality ("QUAL")
annotationValues = new double[numAnnotations];
firstVariant = false;
@ -121,6 +129,9 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
double value = 0.0;
try {
value = Double.parseDouble( (String)vc.getAttribute( key, "0.0" ) );
if( Double.isInfinite(value) ) {
value = ( value > 0 ? 1.0 : -1.0 ) * INFINITE_ANNOTATION_VALUE;
}
} catch( NumberFormatException e ) {
// do nothing, default value is 0.0,
}
@ -166,7 +177,7 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
logger.info( "The annotations are: " + annotationKeys + " and QUAL." );
dataManager.normalizeData(); // Each data point is now [ (x - mean) / standard deviation ]
// Create either the Gaussian Mixture Model or the Nearest Neighbors model and run it
final VariantOptimizationModel gmm = new VariantGaussianMixtureModel( dataManager, TARGET_TITV, NUM_GAUSSIANS, NUM_ITERATIONS );
gmm.run( OUTPUT_FILE );