AnalyzeCovariates gets the same performance improvements as the recalibrator. NHashMap class is removed completely.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2483 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
9b2733a54a
commit
80658fd99e
|
|
@ -1,10 +1,9 @@
|
|||
package org.broadinstitute.sting.analyzecovariates;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
|
||||
import org.broadinstitute.sting.utils.NHashMap;
|
||||
import org.broadinstitute.sting.utils.NestedHashMap;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -17,17 +16,17 @@ import java.util.List;
|
|||
|
||||
public class AnalysisDataManager {
|
||||
|
||||
private NHashMap<RecalDatum> dataCollapsedReadGroup; // Table where everything except read group has been collapsed
|
||||
private ArrayList<NHashMap<RecalDatum>> dataCollapsedByCovariate; // Tables where everything except read group and given covariate has been collapsed
|
||||
private NestedHashMap dataCollapsedReadGroup; // Table where everything except read group has been collapsed
|
||||
private ArrayList<NestedHashMap> dataCollapsedByCovariate; // Tables where everything except read group and given covariate has been collapsed
|
||||
|
||||
AnalysisDataManager() {
|
||||
}
|
||||
|
||||
AnalysisDataManager( final int numCovariates ) {
|
||||
dataCollapsedReadGroup = new NHashMap<RecalDatum>();
|
||||
dataCollapsedByCovariate = new ArrayList<NHashMap<RecalDatum>>();
|
||||
dataCollapsedReadGroup = new NestedHashMap();
|
||||
dataCollapsedByCovariate = new ArrayList<NestedHashMap>();
|
||||
for( int iii = 0; iii < numCovariates - 1; iii++ ) { // readGroup isn't counted here, its table is separate
|
||||
dataCollapsedByCovariate.add( new NHashMap<RecalDatum>() );
|
||||
dataCollapsedByCovariate.add( new NestedHashMap() );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -37,35 +36,34 @@ public class AnalysisDataManager {
|
|||
* @param fullDatum The RecalDatum which is the data for this mapping
|
||||
* @param IGNORE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table
|
||||
*/
|
||||
public final void addToAllTables( final List<? extends Comparable> key, final RecalDatum fullDatum, final int IGNORE_QSCORES_LESS_THAN ) {
|
||||
public final void addToAllTables( final Object[] key, final RecalDatum fullDatum, final int IGNORE_QSCORES_LESS_THAN ) {
|
||||
|
||||
int qscore = Integer.parseInt( key.get(1).toString() );
|
||||
ArrayList<Comparable> newKey;
|
||||
int qscore = Integer.parseInt( key[1].toString() );
|
||||
RecalDatum collapsedDatum;
|
||||
final Object[] readGroupCollapsedKey = new Object[1];
|
||||
final Object[] covariateCollapsedKey = new Object[2];
|
||||
|
||||
if( !(qscore < IGNORE_QSCORES_LESS_THAN) ) {
|
||||
// Create dataCollapsedReadGroup, the table where everything except read group has been collapsed
|
||||
newKey = new ArrayList<Comparable>();
|
||||
newKey.add( key.get(0) ); // Make a new key with just the read group
|
||||
collapsedDatum = dataCollapsedReadGroup.get( newKey );
|
||||
readGroupCollapsedKey[0] = key[0]; // Make a new key with just the read group
|
||||
collapsedDatum = (RecalDatum)dataCollapsedReadGroup.get( readGroupCollapsedKey );
|
||||
if( collapsedDatum == null ) {
|
||||
dataCollapsedReadGroup.put( newKey, new RecalDatum(fullDatum) );
|
||||
dataCollapsedReadGroup.put( new RecalDatum(fullDatum), readGroupCollapsedKey );
|
||||
} else {
|
||||
collapsedDatum.combine( fullDatum ); // using combine instead of increment in order to calculate overall aggregateQReported
|
||||
}
|
||||
}
|
||||
|
||||
// Create dataCollapsedByCovariate's, the tables where everything except read group, quality score, and given covariate has been collapsed
|
||||
// Create dataCollapsedByCovariate's, the tables where everything except read group and given covariate has been collapsed
|
||||
for( int iii = 0; iii < dataCollapsedByCovariate.size(); iii++ ) {
|
||||
if( iii == 0 || !(qscore < IGNORE_QSCORES_LESS_THAN) ) { // use all data for the plot versus reported quality, but not for the other plots versus cycle and etc.
|
||||
newKey = new ArrayList<Comparable>();
|
||||
newKey.add( key.get(0) ); // Make a new key with the read group ...
|
||||
Comparable theCovariateElement = key.get(iii + 1); // and the given covariate
|
||||
covariateCollapsedKey[0] = key[0]; // Make a new key with the read group ...
|
||||
Object theCovariateElement = key[iii + 1]; // and the given covariate
|
||||
if( theCovariateElement != null ) {
|
||||
newKey.add( theCovariateElement );
|
||||
collapsedDatum = dataCollapsedByCovariate.get(iii).get( newKey );
|
||||
covariateCollapsedKey[1] = theCovariateElement;
|
||||
collapsedDatum = (RecalDatum)dataCollapsedByCovariate.get(iii).get( covariateCollapsedKey );
|
||||
if( collapsedDatum == null ) {
|
||||
dataCollapsedByCovariate.get(iii).put( newKey, new RecalDatum(fullDatum) );
|
||||
dataCollapsedByCovariate.get(iii).put( new RecalDatum(fullDatum), covariateCollapsedKey );
|
||||
} else {
|
||||
collapsedDatum.combine( fullDatum );
|
||||
}
|
||||
|
|
@ -79,7 +77,7 @@ public class AnalysisDataManager {
|
|||
* @param covariate Which covariate indexes the desired collapsed HashMap
|
||||
* @return The desired collapsed HashMap
|
||||
*/
|
||||
public final NHashMap<RecalDatum> getCollapsedTable( final int covariate ) {
|
||||
public final NestedHashMap getCollapsedTable( final int covariate ) {
|
||||
if( covariate == 0) {
|
||||
return dataCollapsedReadGroup; // Table where everything except read group has been collapsed
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -5,10 +5,10 @@ import org.broadinstitute.sting.utils.PackageUtils;
|
|||
import org.broadinstitute.sting.utils.xReadLines;
|
||||
import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.NHashMap;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
import java.io.*;
|
||||
|
||||
|
|
@ -169,12 +169,12 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
|
|||
" --Perhaps the read group string contains a comma and isn't being parsed correctly.");
|
||||
}
|
||||
|
||||
ArrayList<Comparable> key = new ArrayList<Comparable>();
|
||||
Object[] key = new Object[requestedCovariates.size()];
|
||||
Covariate cov;
|
||||
int iii;
|
||||
for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
|
||||
cov = requestedCovariates.get( iii );
|
||||
key.add( cov.getValue( vals[iii] ) );
|
||||
key[iii] = cov.getValue( vals[iii] );
|
||||
}
|
||||
// Create a new datum using the number of observations, number of mismatches, and reported quality score
|
||||
RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
|
||||
|
|
@ -188,12 +188,11 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
|
|||
int numReadGroups = 0;
|
||||
|
||||
// for each read group
|
||||
NHashMap<RecalDatum> readGroupTable = dataManager.getCollapsedTable(0);
|
||||
for( List<? extends Comparable> readGroupKey : readGroupTable.keySet() ) {
|
||||
for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) {
|
||||
|
||||
if(NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS) {
|
||||
String readGroup = readGroupKey.get(0).toString();
|
||||
RecalDatum readGroupDatum = readGroupTable.get(readGroupKey);
|
||||
String readGroup = readGroupKey.toString();
|
||||
RecalDatum readGroupDatum = (RecalDatum) dataManager.getCollapsedTable(0).data.get(readGroupKey);
|
||||
System.out.print("Writing out data tables for read group: " + readGroup + "\twith " + readGroupDatum.getNumObservations() + " observations" );
|
||||
System.out.println("\tand aggregate residual error = " + String.format("%.3f", readGroupDatum.empiricalQualDouble(0) - readGroupDatum.getEstimatedQReported()));
|
||||
|
||||
|
|
@ -214,17 +213,13 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
|
|||
// Output the header
|
||||
output.println("Covariate\tQreported\tQempirical\tnMismatches\tnBases");
|
||||
|
||||
// Loop through the covariate table looking for keys with matching read groups
|
||||
// BUGBUG: hopefully rewrite this to be more efficient
|
||||
for( List<? extends Comparable> covariateKey : dataManager.getCollapsedTable(iii).keySet() ) {
|
||||
if( covariateKey.get(0).toString().equals(readGroup) ) {
|
||||
output.print( covariateKey.get(1).toString() + "\t" ); // Covariate
|
||||
RecalDatum thisDatum = dataManager.getCollapsedTable(iii).get(covariateKey);
|
||||
output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" ); // Qreported
|
||||
output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0)) + "\t" ); // Qempirical
|
||||
output.print( thisDatum.getNumMismatches() + "\t" ); // nMismatches
|
||||
output.println( thisDatum.getNumObservations() ); // nBases
|
||||
}
|
||||
for( Object covariateKey : ((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).keySet()) {
|
||||
output.print( covariateKey.toString() + "\t" ); // Covariate
|
||||
RecalDatum thisDatum = (RecalDatum)((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).get(covariateKey);
|
||||
output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" ); // Qreported
|
||||
output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0)) + "\t" ); // Qempirical
|
||||
output.print( thisDatum.getNumMismatches() + "\t" ); // nMismatches
|
||||
output.println( thisDatum.getNumObservations() ); // nBases
|
||||
}
|
||||
|
||||
// Close the PrintStream
|
||||
|
|
@ -242,11 +237,11 @@ class AnalyzeCovariatesCLP extends CommandLineProgram {
|
|||
int numReadGroups = 0;
|
||||
|
||||
// for each read group
|
||||
for( List<? extends Comparable> readGroupList : dataManager.getCollapsedTable(0).keySet() ) {
|
||||
for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) {
|
||||
|
||||
if(NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS) {
|
||||
|
||||
String readGroup = readGroupList.get(0).toString();
|
||||
String readGroup = readGroupKey.toString();
|
||||
System.out.println("Analyzing read group: " + readGroup);
|
||||
|
||||
// for each covariate
|
||||
|
|
|
|||
|
|
@ -1,139 +0,0 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Oct 30, 2009
|
||||
*
|
||||
* A HashMap that maps a list of comparables to any Object <T>.
|
||||
* There is functionality for the mappings to be given back to you in sorted order.
|
||||
*/
|
||||
|
||||
public class NHashMap<T> extends HashMap<List<? extends Comparable>, T> {
|
||||
|
||||
private static final long serialVersionUID = 1L; // Added by Eclipse
|
||||
private ArrayList<ArrayList<Comparable>> keyLists;
|
||||
|
||||
public NHashMap() {
|
||||
super();
|
||||
keyLists = null;
|
||||
}
|
||||
|
||||
public NHashMap( int initialCapacity, float loadingFactor ) {
|
||||
super( initialCapacity, loadingFactor );
|
||||
keyLists = null;
|
||||
}
|
||||
|
||||
|
||||
// This method is here only to help facilitate outputting the mappings in sorted order
|
||||
public T sortedPut(List<? extends Comparable> key, T value) {
|
||||
|
||||
if( keyLists == null ) {
|
||||
keyLists = new ArrayList<ArrayList<Comparable>>();
|
||||
for( Comparable comp : key ) {
|
||||
keyLists.add( new ArrayList<Comparable>() );
|
||||
}
|
||||
}
|
||||
|
||||
ArrayList<Comparable> thisList;
|
||||
for( int iii = 0; iii < key.size(); iii++ ) {
|
||||
thisList = keyLists.get( iii );
|
||||
if( thisList == null ) {
|
||||
thisList = new ArrayList<Comparable>();
|
||||
}
|
||||
if( !thisList.contains( key.get( iii ) ) ) {
|
||||
thisList.add( key.get(iii ) );
|
||||
}
|
||||
}
|
||||
return super.put( key, value );
|
||||
}
|
||||
|
||||
public ArrayList<Pair<List<? extends Comparable>, T>> entrySetSorted() {
|
||||
|
||||
ArrayList<Pair<List<? extends Comparable>, T>> theSet = new ArrayList<Pair<List<? extends Comparable>, T>>();
|
||||
|
||||
for( ArrayList<Comparable> list : keyLists ) {
|
||||
Collections.sort(list);
|
||||
}
|
||||
|
||||
int[] keyIndex = new int[ keyLists.size() ];
|
||||
int[] maxIndex = new int[ keyLists.size() ];
|
||||
for( int iii = 0; iii < keyLists.size(); iii++ ) {
|
||||
keyIndex[iii] = 0;
|
||||
maxIndex[iii] = keyLists.get(iii).size();
|
||||
}
|
||||
|
||||
// Try all the possible keys in sorted order, add them to the output set if they are in the hashMap
|
||||
boolean triedAllKeys = false;
|
||||
ArrayList<Comparable> newKey = null;
|
||||
while( !triedAllKeys ) {
|
||||
newKey = new ArrayList<Comparable>();
|
||||
for( int iii = 0; iii < keyLists.size(); iii++ ) {
|
||||
newKey.add(keyLists.get(iii).get(keyIndex[iii]));
|
||||
}
|
||||
T value = this.get( newKey );
|
||||
if( value!= null ) {
|
||||
theSet.add(new Pair<List<? extends Comparable>,T>( newKey, value ) );
|
||||
}
|
||||
|
||||
// Increment the keyIndex
|
||||
keyIndex[keyLists.size() - 1]++;
|
||||
for( int iii = keyLists.size() - 1; iii >= 0; iii-- ) {
|
||||
if( keyIndex[iii] >= maxIndex[iii] ) { // Carry it forward
|
||||
keyIndex[iii] = 0;
|
||||
if( iii > 0 ) {
|
||||
keyIndex[iii-1]++;
|
||||
} else {
|
||||
triedAllKeys = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return theSet;
|
||||
}
|
||||
|
||||
// Used to make the key from a list of <T> objects
|
||||
public static <T> List<T> makeList(T... args) {
|
||||
List<T> list = new ArrayList<T>();
|
||||
for( T arg : args )
|
||||
{
|
||||
list.add(arg);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue