Misc clean up in the recalibrator related to the nested hash map implementation. CountCovariates no longer creates the full flattened set of keys and iterates over them. The output csv file is in sorted order by default now but there is a new option -unsorted which can be used to save a little bit of run time.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2482 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2009-12-30 16:58:04 +00:00
parent 4aeb50c87d
commit 9b2733a54a
11 changed files with 140 additions and 162 deletions

View File

@ -86,6 +86,12 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
@Argument(fullName="process_nth_locus", shortName="pN", required=false, doc="Only process every Nth covered locus we see.")
private int PROCESS_EVERY_NTH_LOCUS = 1;
/////////////////////////////
// Debugging-only Arguments
/////////////////////////////
@Argument(fullName="dont_sort_output", shortName="unsorted", required=false, doc="If specified, the output table recalibration csv file will be in an unsorted, arbitrary order to save some run time.")
private boolean DONT_SORT_OUTPUT = false;
/////////////////////////////
// Private Member Variables
/////////////////////////////
@ -97,7 +103,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
private long solidInsertedReferenceBases = 0; // Number of bases where we believe SOLID has inserted the reference because the color space is inconsistent with the read base
private long otherColorSpaceInconsistency = 0; // Number of bases where the color space is inconsistent with the read but the reference wasn't inserted.
private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site
private static final String versionString = "v2.2.0"; // Major version, minor version, and build number
private static final String versionString = "v2.2.1"; // Major version, minor version, and build number
private Pair<Long, Long> dbSNP_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for dbSNP loci
private Pair<Long, Long> novel_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for non-dbSNP loci
private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878)
@ -207,11 +213,6 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
cov.initialize( RAC ); // Initialize any covariate member variables using the shared argument collection
}
// Don't want to crash with out of heap space exception
//if( estimatedCapacity > 300 * 40 * 200 || estimatedCapacity < 0 ) { // Could be negative if overflowed
// estimatedCapacity = 300 * 40 * 200;
//}
dataManager = new RecalDataManager();
}
@ -313,9 +314,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
*/
private static void updateMismatchCounts(Pair<Long, Long> counts, AlignmentContext context, char ref) {
for( PileupElement p : context.getPileup() ) {
char readChar = (char)(p.getBase());
int readCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(readChar);
int refCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(ref);
final char readChar = (char)(p.getBase());
final int readCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(readChar);
final int refCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(ref);
if( readCharBaseIndex != -1 && refCharBaseIndex != -1 ) {
if( readCharBaseIndex != refCharBaseIndex ) {
@ -331,12 +332,12 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
* Validate the dbSNP reference mismatch rates.
*/
private void validateDbsnpMismatchRate() {
if( novel_counts.second == 0 || dbSNP_counts.second == 0 ) {
if( novel_counts.second == 0L || dbSNP_counts.second == 0L ) {
return;
}
double fractionMM_novel = (double)novel_counts.first / (double)novel_counts.second;
double fractionMM_dbsnp = (double)dbSNP_counts.first / (double)dbSNP_counts.second;
final double fractionMM_novel = (double)novel_counts.first / (double)novel_counts.second;
final double fractionMM_dbsnp = (double)dbSNP_counts.first / (double)dbSNP_counts.second;
if( fractionMM_dbsnp < DBSNP_VS_NOVEL_MISMATCH_RATE * fractionMM_novel ) {
Utils.warnUser("The variation rate at the supplied list of known variant sites seems suspiciously low. Please double-check that the correct ROD is being used. " +
@ -358,23 +359,24 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
*/
private void updateDataFromRead(final SAMRecord read, final int offset, final byte refBase) {
List<Object> key = new ArrayList<Object>();
final Object[] key = new Object[requestedCovariates.size()];
// Loop through the list of requested covariates and pick out the value from the read, offset, and reference
// Loop through the list of requested covariates and pick out the value from the read and offset
int iii = 0;
for( Covariate covariate : requestedCovariates ) {
key.add(covariate.getValue( read, offset ));
key[iii++] = covariate.getValue( read, offset );
}
// Using the list of covariate values as a key, pick out the RecalDatum from the data HashMap
RecalDatum datum = (RecalDatum) dataManager.data.get( key.toArray() );
RecalDatum datum = (RecalDatum) dataManager.data.get( key );
if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it
datum = new RecalDatum(); // initialized with zeros, will be incremented at end of method
dataManager.data.put( datum, key.toArray() );
dataManager.data.put( datum, (Object[])key );
}
// Need the bases to determine whether or not we have a mismatch
byte base = read.getReadBases()[offset];
long curMismatches = datum.getNumMismatches();
final byte base = read.getReadBases()[offset];
final long curMismatches = datum.getNumMismatches();
// Add one to the number of observations and potentially one to the number of mismatches
datum.increment( (char)base, (char)refBase ); // Dangerous: If you don't cast to char than the bytes default to the (long, long) version of the increment method which is really bad
@ -449,15 +451,53 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
}
recalTableStream.println("nObservations,nMismatches,Qempirical");
// For each entry in the data hashmap
for( Pair<Object[], Object> entry : dataManager.data.entrySetSorted() ) {
// For each Covariate in the key
for( Object comp : entry.first ) {
// Output the Covariate's value
recalTableStream.print( comp + "," );
if( DONT_SORT_OUTPUT ) {
printMappings(recalTableStream, 0, new Object[requestedCovariates.size()], dataManager.data.data);
} else {
printMappingsSorted(recalTableStream, 0, new Object[requestedCovariates.size()], dataManager.data.data);
}
}
private void printMappingsSorted( final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) {
final ArrayList<Comparable> keyList = new ArrayList<Comparable>();
for( Object comp : data.keySet() ) {
keyList.add((Comparable) comp);
}
Collections.sort(keyList);
for( Comparable comp : keyList ) {
key[curPos] = comp;
final Object val = data.get(comp);
if( val instanceof RecalDatum ) { // We are at the end of the nested hash maps
// For each Covariate in the key
for( Object compToPrint : key ) {
// Output the Covariate's value
recalTableStream.print( compToPrint + "," );
}
// Output the RecalDatum entry
recalTableStream.println( ((RecalDatum)val).outputToCSV() );
} else { // Another layer in the nested hash map
printMappingsSorted( recalTableStream, curPos + 1, key, (Map) val);
}
}
}
private void printMappings( final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) {
for( Object comp : data.keySet() ) {
key[curPos] = comp;
final Object val = data.get(comp);
if( val instanceof RecalDatum ) { // We are at the end of the nested hash maps
// For each Covariate in the key
for( Object compToPrint : key ) {
// Output the Covariate's value
recalTableStream.print( compToPrint + "," );
}
// Output the RecalDatum entry
recalTableStream.println( ((RecalDatum)val).outputToCSV() );
} else { // Another layer in the nested hash map
printMappings( recalTableStream, curPos + 1, key, (Map) val);
}
// Output the RecalDatum entry
recalTableStream.println( ((RecalDatum)entry.second).outputToCSV() );
}
}
}

View File

@ -78,7 +78,7 @@ public class CycleCovariate implements StandardCovariate {
//-----------------------------
else if( read.getReadGroup().getPlatform().contains( "454" ) ) { // Some bams have "LS454" and others have just "454"
byte[] bases = read.getReadBases();
final byte[] bases = read.getReadBases();
// BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
// For example, AAAAAAA was probably read in two flow cycles but here we count it as one

View File

@ -65,7 +65,7 @@ public class DinucCovariate implements StandardCovariate {
byte base;
byte prevBase;
byte[] bases = read.getReadBases();
final byte[] bases = read.getReadBases();
// If this is a negative strand read then we need to reverse the direction for our previous base
if( read.getReadNegativeStrandFlag() ) {
// No dinuc at the beginning of the read
@ -95,7 +95,7 @@ public class DinucCovariate implements StandardCovariate {
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
public final Comparable getValue( final String str ) {
Dinuc returnDinuc = dinucHashMap.get( Dinuc.hashBytes( (byte)str.charAt(0), (byte)str.charAt(1) ) );
final Dinuc returnDinuc = dinucHashMap.get( Dinuc.hashBytes( (byte)str.charAt(0), (byte)str.charAt(1) ) );
if( returnDinuc.compareTo(NO_DINUC) == 0 ) {
return null;
}

View File

@ -73,7 +73,7 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
*/
int numAgree = 0; // The number of consecutive bases that agree with you in the previous numBack bases of the read
byte[] bases = read.getReadBases();
final byte[] bases = read.getReadBases();
int iii = offset;
if( !read.getReadNegativeStrandFlag() ) { // Forward direction
while( iii <= bases.length-2 && bases[iii] == bases[iii+1] && numAgree < numBack ) {

View File

@ -42,8 +42,7 @@ public class MappingQualityCovariate implements ExperimentalCovariate {
}
// Used to pick out the covariate's value from attributes of the read
public final Comparable getValue( final SAMRecord read, final int offset ) {
public final Comparable getValue( final SAMRecord read, final int offset ) {
return read.getMappingQuality();
}

View File

@ -49,10 +49,10 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
public final Comparable getValue( final SAMRecord read, final int offset ) {
// Loop over the list of base quality scores in the window and find the minimum
byte[] quals = read.getBaseQualities();
final byte[] quals = read.getBaseQualities();
int minQual = quals[offset];
int minIndex = Math.max(offset - windowReach, 0);
int maxIndex = Math.min(offset + windowReach, quals.length - 1);
final int minIndex = Math.max(offset - windowReach, 0);
final int maxIndex = Math.min(offset + windowReach, quals.length - 1);
for ( int iii = minIndex; iii < maxIndex; iii++ ) {
if( quals[iii] < minQual ) {
minQual = quals[iii];

View File

@ -42,8 +42,7 @@ public class QualityScoreCovariate implements RequiredCovariate {
}
// Used to pick out the covariate's value from attributes of the read
public final Comparable getValue( final SAMRecord read, final int offset ) {
public final Comparable getValue( final SAMRecord read, final int offset ) {
return (int)(read.getBaseQualities()[offset]);
}

View File

@ -84,7 +84,7 @@ public class RecalDataManager {
// The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around
//data.put(key, thisDatum); // add the mapping to the main table
int qualityScore = Integer.parseInt( key[1].toString() );
final int qualityScore = Integer.parseInt( key[1].toString() );
final Object[] readGroupCollapsedKey = new Object[1];
final Object[] qualityScoreCollapsedKey = new Object[2];
final Object[] covariateCollapsedKey = new Object[3];
@ -135,15 +135,21 @@ public class RecalDataManager {
*/
public final void generateEmpiricalQualities( final int smoothing ) {
for( Pair<Object[],Object> entry : dataCollapsedReadGroup.entrySetSorted() ) {
((RecalDatum)entry.second).calcCombinedEmpiricalQuality(smoothing);
}
for( Pair<Object[],Object> entry : dataCollapsedQualityScore.entrySetSorted() ) {
((RecalDatum)entry.second).calcCombinedEmpiricalQuality(smoothing);
}
recursivelyGenerateEmpiricalQualities(dataCollapsedReadGroup.data, smoothing);
recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.data, smoothing);
for( NestedHashMap map : dataCollapsedByCovariate ) {
for( Pair<Object[],Object> entry : map.entrySetSorted() ) {
((RecalDatum)entry.second).calcCombinedEmpiricalQuality(smoothing);
recursivelyGenerateEmpiricalQualities(map.data, smoothing);
}
}
private void recursivelyGenerateEmpiricalQualities( final Map data, final int smoothing ) {
for( Object comp : data.keySet() ) {
final Object val = data.get(comp);
if( val instanceof RecalDatum ) { // We are at the end of the nested hash maps
((RecalDatum)val).calcCombinedEmpiricalQuality(smoothing);
} else { // Another layer in the nested hash map
recursivelyGenerateEmpiricalQualities( (Map) val, smoothing);
}
}
}

View File

@ -111,8 +111,8 @@ public class RecalDatum {
//---------------------------------------------------------------------------------------------------------------
public final double empiricalQualDouble( final int smoothing ) {
double doubleMismatches = (double) ( numMismatches + smoothing );
double doubleObservations = (double) ( numObservations + smoothing );
final double doubleMismatches = (double) ( numMismatches + smoothing );
final double doubleObservations = (double) ( numObservations + smoothing );
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
if (empiricalQual > QualityUtils.MAX_REASONABLE_Q_SCORE) { empiricalQual = QualityUtils.MAX_REASONABLE_Q_SCORE; }
return empiricalQual;
@ -123,8 +123,8 @@ public class RecalDatum {
public final byte empiricalQualByte( final int smoothing ) {
double doubleMismatches = (double) ( numMismatches + smoothing );
double doubleObservations = (double) ( numObservations + smoothing );
final double doubleMismatches = (double) ( numMismatches + smoothing );
final double doubleObservations = (double) ( numObservations + smoothing );
return QualityUtils.probToQual( 1.0 - doubleMismatches / doubleObservations );
}
public final byte empiricalQualByte() { return empiricalQualByte( 0 ); } // 'default' behavior is to use smoothing value of zero

View File

@ -89,12 +89,10 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
/////////////////////////////
private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
private ArrayList<Covariate> requestedCovariates; // List of covariates to be used in this calculation
private ArrayList<Comparable> fullCovariateKey; // The list that will be used over and over again to hold the full set of covariate values
private ArrayList<Comparable> collapsedTableKey; // The key that will be used over and over again to query the collapsed tables
private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
private static final String versionString = "v2.2.0"; // Major version, minor version, and build number
private static final String versionString = "v2.2.1"; // Major version, minor version, and build number
private SAMFileWriter OUTPUT_BAM = null;// The File Writer that will write out the recalibrated bam
private Random coinFlip; // Random number generator is used to remove reference bias in solid bams
private static final long RANDOM_SEED = 1032861495;
@ -138,9 +136,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
Utils.warnUser("A dbSNP rod file was specified but TableRecalibrationWalker doesn't make use of it.");
}
fullCovariateKey = new ArrayList<Comparable>(); // Initialize the key only once
collapsedTableKey = new ArrayList<Comparable>(); // Initialize the key only once
// Read in the covariates that were used from the input file
requestedCovariates = new ArrayList<Covariate>();
@ -222,13 +217,17 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
logger.info( "\t" + cov.getClass().getSimpleName() );
}
if( dataManager == null ) {
throw new StingException("Can't initialize the data manager. Perhaps the recal csv file contains no data?");
}
// Create the tables of empirical quality scores that will be used in the sequential calculation
logger.info( "Generating tables of empirical qualities for use in sequential calculation..." );
dataManager.generateEmpiricalQualities( SMOOTHING );
logger.info( "...done!" );
// Take the header of the input SAM file and tweak it by adding in a new programRecord with the version number and list of covariates that were used
SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
final SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
if( !NO_PG_TAG ) {
SAMProgramRecord programRecord = new SAMProgramRecord( "GATK TableRecalibration" );
programRecord.setProgramVersion( versionString );
@ -255,7 +254,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
// Create the SAMFileWriter that we will be using to output the reads
if( OUTPUT_BAM_FILE != null ) {
SAMFileWriterFactory factory = new SAMFileWriterFactory();
final SAMFileWriterFactory factory = new SAMFileWriterFactory();
OUTPUT_BAM = factory.makeBAMWriter( header, true, new File(OUTPUT_BAM_FILE), 5 ); // BUGBUG: Bam compression hardcoded to 5
}
}
@ -265,7 +264,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
* @param line A line of CSV data read from the recalibration table data file
*/
private void addCSVData(String line) {
String[] vals = line.split(",");
final String[] vals = line.split(",");
// Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
if( vals.length != requestedCovariates.size() + 3 ) { // +3 because of nObservations, nMismatch, and Qempirical
@ -273,17 +272,18 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
" --Perhaps the read group string contains a comma and isn't being parsed correctly.");
}
ArrayList<Object> key = new ArrayList<Object>();
final Object[] key = new Object[requestedCovariates.size()];
Covariate cov;
int iii;
for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
cov = requestedCovariates.get( iii );
key.add( cov.getValue( vals[iii] ) );
key[iii] = cov.getValue( vals[iii] );
}
// Create a new datum using the number of observations, number of mismatches, and reported quality score
RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
// Add that datum to all the collapsed tables which will be used in the sequential calculation
dataManager.addToAllTables( key.toArray(), datum, PRESERVE_QSCORES_LESS_THAN );
dataManager.addToAllTables( key, datum, PRESERVE_QSCORES_LESS_THAN );
}
@ -304,24 +304,26 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
RecalDataManager.parseSAMRecord( read, RAC );
byte[] originalQuals = read.getBaseQualities();
byte[] recalQuals = originalQuals.clone();
final byte[] recalQuals = originalQuals.clone();
String platform = read.getReadGroup().getPlatform();
if( platform.equalsIgnoreCase("SOLID") && !RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") ) {
originalQuals = RecalDataManager.calcColorSpace( read, originalQuals, RAC.SOLID_RECAL_MODE, coinFlip, refBases );
}
// For each base in the read
int readLength = read.getReadLength();
for( int iii = 0; iii < readLength; iii++ ) {
final Object[] fullCovariateKey = new Object[requestedCovariates.size()];
// Get the covariate values which make up the key
// For each base in the read
final int readLength = read.getReadLength();
for( int offset = 0; offset < readLength; offset++ ) {
// Loop through the list of requested covariates and pick out the value from the read and offset
int iii = 0;
for( Covariate covariate : requestedCovariates ) {
fullCovariateKey.add( covariate.getValue( read, iii ) ); // offset is zero based so passing iii is correct here
fullCovariateKey[iii++] = covariate.getValue( read, offset );
}
recalQuals[iii] = performSequentialQualityCalculation( fullCovariateKey );
fullCovariateKey.clear();
recalQuals[offset] = performSequentialQualityCalculation( fullCovariateKey );
}
preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
@ -353,15 +355,18 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
* @param key The list of Comparables that were calculated from the covariates
* @return A recalibrated quality score as a byte
*/
private byte performSequentialQualityCalculation( List<? extends Comparable> key ) {
private byte performSequentialQualityCalculation(Object... key ) {
String readGroupKeyElement = key.get(0).toString();
int qualityScoreKeyElement = Integer.parseInt(key.get(1).toString());
byte qualFromRead = (byte)qualityScoreKeyElement;
final String readGroupKeyElement = key[0].toString();
final int qualityScoreKeyElement = Integer.parseInt(key[1].toString());
final byte qualFromRead = (byte)qualityScoreKeyElement;
final Object[] readGroupCollapsedKey = new Object[1];
final Object[] qualityScoreCollapsedKey = new Object[2];
final Object[] covariateCollapsedKey = new Object[3];
// The global quality shift (over the read group only)
collapsedTableKey.add( readGroupKeyElement );
RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0).get( collapsedTableKey.toArray() ));
readGroupCollapsedKey[0] = readGroupKeyElement;
RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0).get( readGroupCollapsedKey ));
double globalDeltaQ = 0.0;
if( globalRecalDatum != null ) {
double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
@ -370,8 +375,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
}
// The shift in quality between reported and empirical
collapsedTableKey.add( qualityScoreKeyElement );
RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1).get( collapsedTableKey.toArray() ));
qualityScoreCollapsedKey[0] = readGroupKeyElement;
qualityScoreCollapsedKey[1] = qualityScoreKeyElement;
RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1).get( qualityScoreCollapsedKey ));
double deltaQReported = 0.0;
if( qReportedRecalDatum != null ) {
double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
@ -381,19 +387,19 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
// The shift in quality due to each covariate by itself in turn
double deltaQCovariates = 0.0;
double deltaQCovariateEmpirical;
for( int iii = 2; iii < key.size(); iii++ ) {
collapsedTableKey.add( key.get(iii) ); // The given covariate
RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii).get( collapsedTableKey.toArray() ));
covariateCollapsedKey[0] = readGroupKeyElement;
covariateCollapsedKey[1] = qualityScoreKeyElement;
for( int iii = 2; iii < key.length; iii++ ) {
covariateCollapsedKey[2] = key[iii]; // The given covariate
RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii).get( covariateCollapsedKey ));
if( covariateRecalDatum != null ) {
deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
}
collapsedTableKey.remove( 2 ); // This new covariate is always added in at position 2 in the collapsedTableKey list
// The collapsedTableKey should be: < ReadGroup, Reported Quality Score, This Covariate >
}
double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
byte newQualityByte = QualityUtils.boundQual( (int)Math.round(newQuality), QualityUtils.MAX_REASONABLE_Q_SCORE );
final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
final byte newQualityByte = QualityUtils.boundQual( (int)Math.round(newQuality), QualityUtils.MAX_REASONABLE_Q_SCORE );
// Verbose printouts used to validate with old recalibrator
@ -406,7 +412,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
// key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) );
//}
collapsedTableKey.clear();
return newQualityByte;
}

View File

@ -35,16 +35,14 @@ import java.util.*;
public class NestedHashMap{
private final Map data = new HashMap<Object, Object>();
private ArrayList<ArrayList<Comparable>> keyLists; // Used to output the mappings in sorted order
public final Map data = new HashMap<Object, Object>();
public Object get( final Object... keys ) {
Map map = this.data;
for( int iii = 0; iii < keys.length; iii++ ) {
if( iii == keys.length - 1 ) {
return map.get(keys[iii]);
}
else {
} else {
map = (Map) map.get(keys[iii]);
if( map == null ) { return null; }
}
@ -55,31 +53,11 @@ public class NestedHashMap{
public void put( final Object value, final Object... keys ) {
if( keyLists == null ) {
keyLists = new ArrayList<ArrayList<Comparable>>();
for( Object obj : keys ) {
keyLists.add( new ArrayList<Comparable>() );
}
}
ArrayList<Comparable> thisList;
for( int iii = 0; iii < keys.length; iii++ ) {
thisList = keyLists.get( iii );
if( thisList == null ) {
thisList = new ArrayList<Comparable>();
}
if( !thisList.contains( (Comparable)keys[iii] ) ) {
thisList.add( (Comparable)keys[iii] );
}
}
Map map = this.data;
for( int iii = 0; iii < keys.length; iii++ ) {
if( iii == keys.length - 1 ) {
map.put(keys[iii], value);
}
else {
} else {
Map tmp = (Map) map.get(keys[iii]);
if( tmp == null ) {
tmp = new HashMap();
@ -89,53 +67,4 @@ public class NestedHashMap{
}
}
}
public ArrayList<Pair<Object[], Object>> entrySetSorted() {
ArrayList<Pair<Object[], Object>> theSet = new ArrayList<Pair<Object[], Object>>();
for( ArrayList<Comparable> list : keyLists ) {
Collections.sort(list);
}
int[] keyIndex = new int[ keyLists.size() ];
int[] maxIndex = new int[ keyLists.size() ];
for( int iii = 0; iii < keyLists.size(); iii++ ) {
keyIndex[iii] = 0;
maxIndex[iii] = keyLists.get(iii).size();
}
// Try all the possible keys in sorted order, add them to the output set if they are in the hashMap
// BUGBUG: make this more efficient
boolean triedAllKeys = false;
ArrayList<Object> newKey = null;
while( !triedAllKeys ) {
newKey = new ArrayList<Object>();
for( int iii = 0; iii < keyLists.size(); iii++ ) {
newKey.add(keyLists.get(iii).get(keyIndex[iii]));
}
Object value = this.get( newKey.toArray() );
if( value!= null ) {
theSet.add(new Pair<Object[], Object>( newKey.toArray(), value ) );
}
// Increment the keyIndex
keyIndex[keyLists.size() - 1]++;
for( int iii = keyLists.size() - 1; iii >= 0; iii-- ) {
if( keyIndex[iii] >= maxIndex[iii] ) { // Carry it forward
keyIndex[iii] = 0;
if( iii > 0 ) {
keyIndex[iii-1]++;
} else {
triedAllKeys = true;
break;
}
} else {
break;
}
}
}
return theSet;
}
}