AnalyzeCovariates gets the same performance improvements as the recalibrator. NHashMap class is removed completely.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2483 348d0f76-0448-11de-a6fe-93d51630548a
2009-12-30 18:10:10 +00:00 · 2009-12-30 18:10:10 +00:00 · 80658fd99e
parent 9b2733a54a
commit 80658fd99e
3 changed files with 35 additions and 181 deletions
--- a/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java
+++ b/java/src/org/broadinstitute/sting/analyzecovariates/AnalysisDataManager.java
@ -1,10 +1,9 @@
 package org.broadinstitute.sting.analyzecovariates;

 import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
-import org.broadinstitute.sting.utils.NHashMap;
+import org.broadinstitute.sting.utils.NestedHashMap;

 import java.util.ArrayList;
-import java.util.List;

 /**
 * Created by IntelliJ IDEA.
@ -17,17 +16,17 @@ import java.util.List;

 public class AnalysisDataManager {
    
-    private NHashMap<RecalDatum> dataCollapsedReadGroup; // Table where everything except read group has been collapsed
-    private ArrayList<NHashMap<RecalDatum>> dataCollapsedByCovariate; // Tables where everything except read group and given covariate has been collapsed
+    private NestedHashMap dataCollapsedReadGroup; // Table where everything except read group has been collapsed
+    private ArrayList<NestedHashMap> dataCollapsedByCovariate; // Tables where everything except read group and given covariate has been collapsed

    AnalysisDataManager() {
    }

    AnalysisDataManager( final int numCovariates ) {
-        dataCollapsedReadGroup = new NHashMap<RecalDatum>();
-        dataCollapsedByCovariate = new ArrayList<NHashMap<RecalDatum>>();
+        dataCollapsedReadGroup = new NestedHashMap();
+        dataCollapsedByCovariate = new ArrayList<NestedHashMap>();
        for( int iii = 0; iii < numCovariates - 1; iii++ ) { // readGroup isn't counted here, its table is separate
-            dataCollapsedByCovariate.add( new NHashMap<RecalDatum>() );
+            dataCollapsedByCovariate.add( new NestedHashMap() );
        }
    }

@ -37,35 +36,34 @@ public class AnalysisDataManager {
     * @param fullDatum The RecalDatum which is the data for this mapping
     * @param IGNORE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table
     */
-    public final void addToAllTables( final List<? extends Comparable> key, final RecalDatum fullDatum, final int IGNORE_QSCORES_LESS_THAN ) {
+    public final void addToAllTables( final Object[] key, final RecalDatum fullDatum, final int IGNORE_QSCORES_LESS_THAN ) {

-        int qscore = Integer.parseInt( key.get(1).toString() );
-        ArrayList<Comparable> newKey;
+        int qscore = Integer.parseInt( key[1].toString() );
        RecalDatum collapsedDatum;
+        final Object[] readGroupCollapsedKey = new Object[1];
+        final Object[] covariateCollapsedKey = new Object[2];

        if( !(qscore < IGNORE_QSCORES_LESS_THAN) ) {
            // Create dataCollapsedReadGroup, the table where everything except read group has been collapsed
-            newKey = new ArrayList<Comparable>();
-            newKey.add( key.get(0) ); // Make a new key with just the read group
-            collapsedDatum = dataCollapsedReadGroup.get( newKey );
+            readGroupCollapsedKey[0] = key[0]; // Make a new key with just the read group
+            collapsedDatum = (RecalDatum)dataCollapsedReadGroup.get( readGroupCollapsedKey );
            if( collapsedDatum == null ) {
-                dataCollapsedReadGroup.put( newKey, new RecalDatum(fullDatum) );
+                dataCollapsedReadGroup.put( new RecalDatum(fullDatum), readGroupCollapsedKey );
            } else {
                collapsedDatum.combine( fullDatum ); // using combine instead of increment in order to calculate overall aggregateQReported
            }
        }

-        // Create dataCollapsedByCovariate's, the tables where everything except read group, quality score, and given covariate has been collapsed
+        // Create dataCollapsedByCovariate's, the tables where everything except read group and given covariate has been collapsed
        for( int iii = 0; iii < dataCollapsedByCovariate.size(); iii++ ) {
            if( iii == 0 || !(qscore < IGNORE_QSCORES_LESS_THAN) ) { // use all data for the plot versus reported quality, but not for the other plots versus cycle and etc.
-                newKey = new ArrayList<Comparable>();
-                newKey.add( key.get(0) ); // Make a new key with the read group ...
-                Comparable theCovariateElement = key.get(iii + 1); //           and the given covariate
+                covariateCollapsedKey[0] = key[0]; // Make a new key with the read group ...
+                Object theCovariateElement = key[iii + 1]; //           and the given covariate
                if( theCovariateElement != null ) {
-                    newKey.add( theCovariateElement );
-                    collapsedDatum = dataCollapsedByCovariate.get(iii).get( newKey );
+                    covariateCollapsedKey[1] = theCovariateElement;
+                    collapsedDatum = (RecalDatum)dataCollapsedByCovariate.get(iii).get( covariateCollapsedKey );
                    if( collapsedDatum == null ) {
-                        dataCollapsedByCovariate.get(iii).put( newKey, new RecalDatum(fullDatum) );
+                        dataCollapsedByCovariate.get(iii).put( new RecalDatum(fullDatum), covariateCollapsedKey );
                    } else {
                        collapsedDatum.combine( fullDatum );
                    }
@ -79,7 +77,7 @@ public class AnalysisDataManager {
     * @param covariate Which covariate indexes the desired collapsed HashMap
     * @return The desired collapsed HashMap
     */
-    public final NHashMap<RecalDatum> getCollapsedTable( final int covariate ) {
+    public final NestedHashMap getCollapsedTable( final int covariate ) {
        if( covariate == 0) {
            return dataCollapsedReadGroup; // Table where everything except read group has been collapsed
        } else {
--- a/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
+++ b/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
@ -5,10 +5,10 @@ import org.broadinstitute.sting.utils.PackageUtils;
 import org.broadinstitute.sting.utils.xReadLines;
 import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
 import org.broadinstitute.sting.utils.cmdLine.Argument;
-import org.broadinstitute.sting.utils.NHashMap;

 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 import java.util.regex.Pattern;
 import java.io.*;

@ -169,12 +169,12 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
                    " --Perhaps the read group string contains a comma and isn't being parsed correctly.");
        }

-        ArrayList<Comparable> key = new ArrayList<Comparable>();
+        Object[] key = new Object[requestedCovariates.size()];
        Covariate cov;
        int iii;
        for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
            cov = requestedCovariates.get( iii );
-            key.add( cov.getValue( vals[iii] ) );
+            key[iii] = cov.getValue( vals[iii] );
        }
        // Create a new datum using the number of observations, number of mismatches, and reported quality score
        RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
@ -188,12 +188,11 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
        int numReadGroups = 0;

        // for each read group
-        NHashMap<RecalDatum> readGroupTable = dataManager.getCollapsedTable(0);
-        for( List<? extends Comparable> readGroupKey : readGroupTable.keySet() ) {
+        for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) {

            if(NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS) {
-                String readGroup = readGroupKey.get(0).toString();
-                RecalDatum readGroupDatum = readGroupTable.get(readGroupKey);
+                String readGroup = readGroupKey.toString();
+                RecalDatum readGroupDatum = (RecalDatum) dataManager.getCollapsedTable(0).data.get(readGroupKey);
                System.out.print("Writing out data tables for read group: " + readGroup + "\twith " + readGroupDatum.getNumObservations() + " observations"  );
                System.out.println("\tand aggregate residual error = " + String.format("%.3f", readGroupDatum.empiricalQualDouble(0) - readGroupDatum.getEstimatedQReported()));

@ -214,17 +213,13 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
                    // Output the header
                    output.println("Covariate\tQreported\tQempirical\tnMismatches\tnBases");

-                    // Loop through the covariate table looking for keys with matching read groups
-                    // BUGBUG: hopefully rewrite this to be more efficient
-                    for( List<? extends Comparable> covariateKey : dataManager.getCollapsedTable(iii).keySet() ) {
-                        if( covariateKey.get(0).toString().equals(readGroup) ) {
-                            output.print( covariateKey.get(1).toString() + "\t" );                              // Covariate
-                            RecalDatum thisDatum = dataManager.getCollapsedTable(iii).get(covariateKey);
-                            output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" );    // Qreported
-                            output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0)) + "\t" );     // Qempirical
-                            output.print( thisDatum.getNumMismatches() + "\t" );                                // nMismatches
-                            output.println( thisDatum.getNumObservations() );                                   // nBases
-                        }
+                    for( Object covariateKey : ((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).keySet()) {
+                        output.print( covariateKey.toString() + "\t" );                              // Covariate
+                        RecalDatum thisDatum = (RecalDatum)((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).get(covariateKey);
+                        output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" );    // Qreported
+                        output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0)) + "\t" );     // Qempirical
+                        output.print( thisDatum.getNumMismatches() + "\t" );                                // nMismatches
+                        output.println( thisDatum.getNumObservations() );                                   // nBases
                    }

                    // Close the PrintStream
@ -242,11 +237,11 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
        int numReadGroups = 0;
        
        // for each read group
-        for( List<? extends Comparable> readGroupList : dataManager.getCollapsedTable(0).keySet() ) {
+        for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) {

            if(NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS) {

-                String readGroup = readGroupList.get(0).toString();
+                String readGroup = readGroupKey.toString();
                System.out.println("Analyzing read group: " + readGroup);

                // for each covariate
--- a/java/src/org/broadinstitute/sting/utils/NHashMap.java
+++ b/java/src/org/broadinstitute/sting/utils/NHashMap.java
@ -1,139 +0,0 @@
-package org.broadinstitute.sting.utils;
-
-import java.util.*;
-
-/*
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: Oct 30, 2009
- *
- * A HashMap that maps a list of comparables to any Object <T>.
- * There is functionality for the mappings to be given back to you in sorted order.
- */
-
-public class NHashMap<T> extends HashMap<List<? extends Comparable>, T> {
-
-	private static final long serialVersionUID = 1L; // Added by Eclipse
-    private ArrayList<ArrayList<Comparable>> keyLists;
-
-    public NHashMap() {
-        super();
-        keyLists = null;
-    }
-
-    public NHashMap( int initialCapacity, float loadingFactor ) {
-        super( initialCapacity, loadingFactor );
-        keyLists = null;
-    }
-
-
-    // This method is here only to help facilitate outputting the mappings in sorted order
-    public T sortedPut(List<? extends Comparable> key, T value) {
-
-        if( keyLists == null ) {
-            keyLists = new ArrayList<ArrayList<Comparable>>();
-            for( Comparable comp : key ) {
-                keyLists.add( new ArrayList<Comparable>() );
-            }
-        }
-
-        ArrayList<Comparable> thisList;
-        for( int iii = 0; iii < key.size(); iii++ ) {
-            thisList = keyLists.get( iii );
-            if( thisList == null ) {
-                thisList = new ArrayList<Comparable>();
-            }
-            if( !thisList.contains( key.get( iii ) ) ) {
-                thisList.add( key.get(iii ) );
-            }
-        }
-        return super.put( key, value );
-    }
-
-    public ArrayList<Pair<List<? extends Comparable>, T>> entrySetSorted() {
-
-        ArrayList<Pair<List<? extends Comparable>, T>> theSet = new ArrayList<Pair<List<? extends Comparable>, T>>();
-
-        for( ArrayList<Comparable> list : keyLists ) {
-            Collections.sort(list);
-        }
-
-        int[] keyIndex = new int[ keyLists.size() ];
-        int[] maxIndex = new int[ keyLists.size() ];
-        for( int iii = 0; iii < keyLists.size(); iii++ ) {
-            keyIndex[iii] = 0;
-            maxIndex[iii] = keyLists.get(iii).size();
-        }
-
-        // Try all the possible keys in sorted order, add them to the output set if they are in the hashMap
-        boolean triedAllKeys = false;
-        ArrayList<Comparable> newKey = null;
-        while( !triedAllKeys ) {
-            newKey = new ArrayList<Comparable>();
-            for( int iii = 0; iii < keyLists.size(); iii++ ) {
-                newKey.add(keyLists.get(iii).get(keyIndex[iii]));
-            }
-            T value = this.get( newKey );
-            if( value!= null ) {
-                theSet.add(new Pair<List<? extends Comparable>,T>( newKey, value ) );
-            }
-
-            // Increment the keyIndex
-            keyIndex[keyLists.size() - 1]++;
-            for( int iii = keyLists.size() - 1; iii >= 0; iii-- ) {
-                if( keyIndex[iii] >= maxIndex[iii] ) { // Carry it forward
-                    keyIndex[iii] = 0;
-                    if( iii > 0 ) {
-                        keyIndex[iii-1]++;
-                    } else {
-                        triedAllKeys = true;
-                        break;
-                    }
-                } else {
-                    break;
-                }
-            }
-        }
-        return theSet;
-    }
-
-    // Used to make the key from a list of <T> objects
-	public static <T> List<T> makeList(T... args) {
-        List<T> list = new ArrayList<T>();
-        for( T arg : args )
-        {
-            list.add(arg);
-        }
-        return list;
-    }
-}
-
-
-
-
-