Misc clean up in the recalibrator related to the nested hash map implementation. CountCovariates no longer creates the full flattened set of keys and iterates over them. The output csv file is in sorted order by default now but there is a new option -unsorted which can be used to save a little bit of run time.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2482 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4aeb50c87d
commit
9b2733a54a
|
|
@ -86,6 +86,12 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
@Argument(fullName="process_nth_locus", shortName="pN", required=false, doc="Only process every Nth covered locus we see.")
|
||||
private int PROCESS_EVERY_NTH_LOCUS = 1;
|
||||
|
||||
/////////////////////////////
|
||||
// Debugging-only Arguments
|
||||
/////////////////////////////
|
||||
@Argument(fullName="dont_sort_output", shortName="unsorted", required=false, doc="If specified, the output table recalibration csv file will be in an unsorted, arbitrary order to save some run time.")
|
||||
private boolean DONT_SORT_OUTPUT = false;
|
||||
|
||||
/////////////////////////////
|
||||
// Private Member Variables
|
||||
/////////////////////////////
|
||||
|
|
@ -97,7 +103,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
private long solidInsertedReferenceBases = 0; // Number of bases where we believe SOLID has inserted the reference because the color space is inconsistent with the read base
|
||||
private long otherColorSpaceInconsistency = 0; // Number of bases where the color space is inconsistent with the read but the reference wasn't inserted.
|
||||
private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site
|
||||
private static final String versionString = "v2.2.0"; // Major version, minor version, and build number
|
||||
private static final String versionString = "v2.2.1"; // Major version, minor version, and build number
|
||||
private Pair<Long, Long> dbSNP_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for dbSNP loci
|
||||
private Pair<Long, Long> novel_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for non-dbSNP loci
|
||||
private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878)
|
||||
|
|
@ -207,11 +213,6 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
cov.initialize( RAC ); // Initialize any covariate member variables using the shared argument collection
|
||||
}
|
||||
|
||||
// Don't want to crash with out of heap space exception
|
||||
//if( estimatedCapacity > 300 * 40 * 200 || estimatedCapacity < 0 ) { // Could be negative if overflowed
|
||||
// estimatedCapacity = 300 * 40 * 200;
|
||||
//}
|
||||
|
||||
dataManager = new RecalDataManager();
|
||||
}
|
||||
|
||||
|
|
@ -313,9 +314,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
*/
|
||||
private static void updateMismatchCounts(Pair<Long, Long> counts, AlignmentContext context, char ref) {
|
||||
for( PileupElement p : context.getPileup() ) {
|
||||
char readChar = (char)(p.getBase());
|
||||
int readCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(readChar);
|
||||
int refCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(ref);
|
||||
final char readChar = (char)(p.getBase());
|
||||
final int readCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(readChar);
|
||||
final int refCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(ref);
|
||||
|
||||
if( readCharBaseIndex != -1 && refCharBaseIndex != -1 ) {
|
||||
if( readCharBaseIndex != refCharBaseIndex ) {
|
||||
|
|
@ -331,12 +332,12 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
* Validate the dbSNP reference mismatch rates.
|
||||
*/
|
||||
private void validateDbsnpMismatchRate() {
|
||||
if( novel_counts.second == 0 || dbSNP_counts.second == 0 ) {
|
||||
if( novel_counts.second == 0L || dbSNP_counts.second == 0L ) {
|
||||
return;
|
||||
}
|
||||
|
||||
double fractionMM_novel = (double)novel_counts.first / (double)novel_counts.second;
|
||||
double fractionMM_dbsnp = (double)dbSNP_counts.first / (double)dbSNP_counts.second;
|
||||
final double fractionMM_novel = (double)novel_counts.first / (double)novel_counts.second;
|
||||
final double fractionMM_dbsnp = (double)dbSNP_counts.first / (double)dbSNP_counts.second;
|
||||
|
||||
if( fractionMM_dbsnp < DBSNP_VS_NOVEL_MISMATCH_RATE * fractionMM_novel ) {
|
||||
Utils.warnUser("The variation rate at the supplied list of known variant sites seems suspiciously low. Please double-check that the correct ROD is being used. " +
|
||||
|
|
@ -358,23 +359,24 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
*/
|
||||
private void updateDataFromRead(final SAMRecord read, final int offset, final byte refBase) {
|
||||
|
||||
List<Object> key = new ArrayList<Object>();
|
||||
final Object[] key = new Object[requestedCovariates.size()];
|
||||
|
||||
// Loop through the list of requested covariates and pick out the value from the read, offset, and reference
|
||||
// Loop through the list of requested covariates and pick out the value from the read and offset
|
||||
int iii = 0;
|
||||
for( Covariate covariate : requestedCovariates ) {
|
||||
key.add(covariate.getValue( read, offset ));
|
||||
key[iii++] = covariate.getValue( read, offset );
|
||||
}
|
||||
|
||||
// Using the list of covariate values as a key, pick out the RecalDatum from the data HashMap
|
||||
RecalDatum datum = (RecalDatum) dataManager.data.get( key.toArray() );
|
||||
RecalDatum datum = (RecalDatum) dataManager.data.get( key );
|
||||
if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it
|
||||
datum = new RecalDatum(); // initialized with zeros, will be incremented at end of method
|
||||
dataManager.data.put( datum, key.toArray() );
|
||||
dataManager.data.put( datum, (Object[])key );
|
||||
}
|
||||
|
||||
// Need the bases to determine whether or not we have a mismatch
|
||||
byte base = read.getReadBases()[offset];
|
||||
long curMismatches = datum.getNumMismatches();
|
||||
final byte base = read.getReadBases()[offset];
|
||||
final long curMismatches = datum.getNumMismatches();
|
||||
|
||||
// Add one to the number of observations and potentially one to the number of mismatches
|
||||
datum.increment( (char)base, (char)refBase ); // Dangerous: If you don't cast to char than the bytes default to the (long, long) version of the increment method which is really bad
|
||||
|
|
@ -449,15 +451,53 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
}
|
||||
recalTableStream.println("nObservations,nMismatches,Qempirical");
|
||||
|
||||
// For each entry in the data hashmap
|
||||
for( Pair<Object[], Object> entry : dataManager.data.entrySetSorted() ) {
|
||||
// For each Covariate in the key
|
||||
for( Object comp : entry.first ) {
|
||||
// Output the Covariate's value
|
||||
recalTableStream.print( comp + "," );
|
||||
if( DONT_SORT_OUTPUT ) {
|
||||
printMappings(recalTableStream, 0, new Object[requestedCovariates.size()], dataManager.data.data);
|
||||
} else {
|
||||
printMappingsSorted(recalTableStream, 0, new Object[requestedCovariates.size()], dataManager.data.data);
|
||||
}
|
||||
}
|
||||
|
||||
private void printMappingsSorted( final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) {
|
||||
final ArrayList<Comparable> keyList = new ArrayList<Comparable>();
|
||||
for( Object comp : data.keySet() ) {
|
||||
keyList.add((Comparable) comp);
|
||||
}
|
||||
|
||||
Collections.sort(keyList);
|
||||
|
||||
for( Comparable comp : keyList ) {
|
||||
key[curPos] = comp;
|
||||
final Object val = data.get(comp);
|
||||
if( val instanceof RecalDatum ) { // We are at the end of the nested hash maps
|
||||
// For each Covariate in the key
|
||||
for( Object compToPrint : key ) {
|
||||
// Output the Covariate's value
|
||||
recalTableStream.print( compToPrint + "," );
|
||||
}
|
||||
// Output the RecalDatum entry
|
||||
recalTableStream.println( ((RecalDatum)val).outputToCSV() );
|
||||
} else { // Another layer in the nested hash map
|
||||
printMappingsSorted( recalTableStream, curPos + 1, key, (Map) val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void printMappings( final PrintStream recalTableStream, final int curPos, final Object[] key, final Map data) {
|
||||
for( Object comp : data.keySet() ) {
|
||||
key[curPos] = comp;
|
||||
final Object val = data.get(comp);
|
||||
if( val instanceof RecalDatum ) { // We are at the end of the nested hash maps
|
||||
// For each Covariate in the key
|
||||
for( Object compToPrint : key ) {
|
||||
// Output the Covariate's value
|
||||
recalTableStream.print( compToPrint + "," );
|
||||
}
|
||||
// Output the RecalDatum entry
|
||||
recalTableStream.println( ((RecalDatum)val).outputToCSV() );
|
||||
} else { // Another layer in the nested hash map
|
||||
printMappings( recalTableStream, curPos + 1, key, (Map) val);
|
||||
}
|
||||
// Output the RecalDatum entry
|
||||
recalTableStream.println( ((RecalDatum)entry.second).outputToCSV() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
//-----------------------------
|
||||
|
||||
else if( read.getReadGroup().getPlatform().contains( "454" ) ) { // Some bams have "LS454" and others have just "454"
|
||||
byte[] bases = read.getReadBases();
|
||||
final byte[] bases = read.getReadBases();
|
||||
|
||||
// BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
|
||||
// For example, AAAAAAA was probably read in two flow cycles but here we count it as one
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ public class DinucCovariate implements StandardCovariate {
|
|||
|
||||
byte base;
|
||||
byte prevBase;
|
||||
byte[] bases = read.getReadBases();
|
||||
final byte[] bases = read.getReadBases();
|
||||
// If this is a negative strand read then we need to reverse the direction for our previous base
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
// No dinuc at the beginning of the read
|
||||
|
|
@ -95,7 +95,7 @@ public class DinucCovariate implements StandardCovariate {
|
|||
|
||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||
public final Comparable getValue( final String str ) {
|
||||
Dinuc returnDinuc = dinucHashMap.get( Dinuc.hashBytes( (byte)str.charAt(0), (byte)str.charAt(1) ) );
|
||||
final Dinuc returnDinuc = dinucHashMap.get( Dinuc.hashBytes( (byte)str.charAt(0), (byte)str.charAt(1) ) );
|
||||
if( returnDinuc.compareTo(NO_DINUC) == 0 ) {
|
||||
return null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
|
|||
*/
|
||||
|
||||
int numAgree = 0; // The number of consecutive bases that agree with you in the previous numBack bases of the read
|
||||
byte[] bases = read.getReadBases();
|
||||
final byte[] bases = read.getReadBases();
|
||||
int iii = offset;
|
||||
if( !read.getReadNegativeStrandFlag() ) { // Forward direction
|
||||
while( iii <= bases.length-2 && bases[iii] == bases[iii+1] && numAgree < numBack ) {
|
||||
|
|
|
|||
|
|
@ -42,8 +42,7 @@ public class MappingQualityCovariate implements ExperimentalCovariate {
|
|||
}
|
||||
|
||||
// Used to pick out the covariate's value from attributes of the read
|
||||
public final Comparable getValue( final SAMRecord read, final int offset ) {
|
||||
|
||||
public final Comparable getValue( final SAMRecord read, final int offset ) {
|
||||
return read.getMappingQuality();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -49,10 +49,10 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
|
|||
public final Comparable getValue( final SAMRecord read, final int offset ) {
|
||||
|
||||
// Loop over the list of base quality scores in the window and find the minimum
|
||||
byte[] quals = read.getBaseQualities();
|
||||
final byte[] quals = read.getBaseQualities();
|
||||
int minQual = quals[offset];
|
||||
int minIndex = Math.max(offset - windowReach, 0);
|
||||
int maxIndex = Math.min(offset + windowReach, quals.length - 1);
|
||||
final int minIndex = Math.max(offset - windowReach, 0);
|
||||
final int maxIndex = Math.min(offset + windowReach, quals.length - 1);
|
||||
for ( int iii = minIndex; iii < maxIndex; iii++ ) {
|
||||
if( quals[iii] < minQual ) {
|
||||
minQual = quals[iii];
|
||||
|
|
|
|||
|
|
@ -42,8 +42,7 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
}
|
||||
|
||||
// Used to pick out the covariate's value from attributes of the read
|
||||
public final Comparable getValue( final SAMRecord read, final int offset ) {
|
||||
|
||||
public final Comparable getValue( final SAMRecord read, final int offset ) {
|
||||
return (int)(read.getBaseQualities()[offset]);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ public class RecalDataManager {
|
|||
// The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around
|
||||
//data.put(key, thisDatum); // add the mapping to the main table
|
||||
|
||||
int qualityScore = Integer.parseInt( key[1].toString() );
|
||||
final int qualityScore = Integer.parseInt( key[1].toString() );
|
||||
final Object[] readGroupCollapsedKey = new Object[1];
|
||||
final Object[] qualityScoreCollapsedKey = new Object[2];
|
||||
final Object[] covariateCollapsedKey = new Object[3];
|
||||
|
|
@ -135,15 +135,21 @@ public class RecalDataManager {
|
|||
*/
|
||||
public final void generateEmpiricalQualities( final int smoothing ) {
|
||||
|
||||
for( Pair<Object[],Object> entry : dataCollapsedReadGroup.entrySetSorted() ) {
|
||||
((RecalDatum)entry.second).calcCombinedEmpiricalQuality(smoothing);
|
||||
}
|
||||
for( Pair<Object[],Object> entry : dataCollapsedQualityScore.entrySetSorted() ) {
|
||||
((RecalDatum)entry.second).calcCombinedEmpiricalQuality(smoothing);
|
||||
}
|
||||
recursivelyGenerateEmpiricalQualities(dataCollapsedReadGroup.data, smoothing);
|
||||
recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.data, smoothing);
|
||||
for( NestedHashMap map : dataCollapsedByCovariate ) {
|
||||
for( Pair<Object[],Object> entry : map.entrySetSorted() ) {
|
||||
((RecalDatum)entry.second).calcCombinedEmpiricalQuality(smoothing);
|
||||
recursivelyGenerateEmpiricalQualities(map.data, smoothing);
|
||||
}
|
||||
}
|
||||
|
||||
private void recursivelyGenerateEmpiricalQualities( final Map data, final int smoothing ) {
|
||||
|
||||
for( Object comp : data.keySet() ) {
|
||||
final Object val = data.get(comp);
|
||||
if( val instanceof RecalDatum ) { // We are at the end of the nested hash maps
|
||||
((RecalDatum)val).calcCombinedEmpiricalQuality(smoothing);
|
||||
} else { // Another layer in the nested hash map
|
||||
recursivelyGenerateEmpiricalQualities( (Map) val, smoothing);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -111,8 +111,8 @@ public class RecalDatum {
|
|||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public final double empiricalQualDouble( final int smoothing ) {
|
||||
double doubleMismatches = (double) ( numMismatches + smoothing );
|
||||
double doubleObservations = (double) ( numObservations + smoothing );
|
||||
final double doubleMismatches = (double) ( numMismatches + smoothing );
|
||||
final double doubleObservations = (double) ( numObservations + smoothing );
|
||||
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
||||
if (empiricalQual > QualityUtils.MAX_REASONABLE_Q_SCORE) { empiricalQual = QualityUtils.MAX_REASONABLE_Q_SCORE; }
|
||||
return empiricalQual;
|
||||
|
|
@ -123,8 +123,8 @@ public class RecalDatum {
|
|||
|
||||
|
||||
public final byte empiricalQualByte( final int smoothing ) {
|
||||
double doubleMismatches = (double) ( numMismatches + smoothing );
|
||||
double doubleObservations = (double) ( numObservations + smoothing );
|
||||
final double doubleMismatches = (double) ( numMismatches + smoothing );
|
||||
final double doubleObservations = (double) ( numObservations + smoothing );
|
||||
return QualityUtils.probToQual( 1.0 - doubleMismatches / doubleObservations );
|
||||
}
|
||||
public final byte empiricalQualByte() { return empiricalQualByte( 0 ); } // 'default' behavior is to use smoothing value of zero
|
||||
|
|
|
|||
|
|
@ -89,12 +89,10 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
/////////////////////////////
|
||||
private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
|
||||
private ArrayList<Covariate> requestedCovariates; // List of covariates to be used in this calculation
|
||||
private ArrayList<Comparable> fullCovariateKey; // The list that will be used over and over again to hold the full set of covariate values
|
||||
private ArrayList<Comparable> collapsedTableKey; // The key that will be used over and over again to query the collapsed tables
|
||||
private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
||||
private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
|
||||
private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
|
||||
private static final String versionString = "v2.2.0"; // Major version, minor version, and build number
|
||||
private static final String versionString = "v2.2.1"; // Major version, minor version, and build number
|
||||
private SAMFileWriter OUTPUT_BAM = null;// The File Writer that will write out the recalibrated bam
|
||||
private Random coinFlip; // Random number generator is used to remove reference bias in solid bams
|
||||
private static final long RANDOM_SEED = 1032861495;
|
||||
|
|
@ -138,9 +136,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
Utils.warnUser("A dbSNP rod file was specified but TableRecalibrationWalker doesn't make use of it.");
|
||||
}
|
||||
|
||||
fullCovariateKey = new ArrayList<Comparable>(); // Initialize the key only once
|
||||
collapsedTableKey = new ArrayList<Comparable>(); // Initialize the key only once
|
||||
|
||||
// Read in the covariates that were used from the input file
|
||||
requestedCovariates = new ArrayList<Covariate>();
|
||||
|
||||
|
|
@ -222,13 +217,17 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
logger.info( "\t" + cov.getClass().getSimpleName() );
|
||||
}
|
||||
|
||||
if( dataManager == null ) {
|
||||
throw new StingException("Can't initialize the data manager. Perhaps the recal csv file contains no data?");
|
||||
}
|
||||
|
||||
// Create the tables of empirical quality scores that will be used in the sequential calculation
|
||||
logger.info( "Generating tables of empirical qualities for use in sequential calculation..." );
|
||||
dataManager.generateEmpiricalQualities( SMOOTHING );
|
||||
logger.info( "...done!" );
|
||||
|
||||
// Take the header of the input SAM file and tweak it by adding in a new programRecord with the version number and list of covariates that were used
|
||||
SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
|
||||
final SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
|
||||
if( !NO_PG_TAG ) {
|
||||
SAMProgramRecord programRecord = new SAMProgramRecord( "GATK TableRecalibration" );
|
||||
programRecord.setProgramVersion( versionString );
|
||||
|
|
@ -255,7 +254,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
// Create the SAMFileWriter that we will be using to output the reads
|
||||
if( OUTPUT_BAM_FILE != null ) {
|
||||
SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
||||
final SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
||||
OUTPUT_BAM = factory.makeBAMWriter( header, true, new File(OUTPUT_BAM_FILE), 5 ); // BUGBUG: Bam compression hardcoded to 5
|
||||
}
|
||||
}
|
||||
|
|
@ -265,7 +264,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
* @param line A line of CSV data read from the recalibration table data file
|
||||
*/
|
||||
private void addCSVData(String line) {
|
||||
String[] vals = line.split(",");
|
||||
final String[] vals = line.split(",");
|
||||
|
||||
// Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
|
||||
if( vals.length != requestedCovariates.size() + 3 ) { // +3 because of nObservations, nMismatch, and Qempirical
|
||||
|
|
@ -273,17 +272,18 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
" --Perhaps the read group string contains a comma and isn't being parsed correctly.");
|
||||
}
|
||||
|
||||
ArrayList<Object> key = new ArrayList<Object>();
|
||||
final Object[] key = new Object[requestedCovariates.size()];
|
||||
Covariate cov;
|
||||
int iii;
|
||||
for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
|
||||
cov = requestedCovariates.get( iii );
|
||||
key.add( cov.getValue( vals[iii] ) );
|
||||
key[iii] = cov.getValue( vals[iii] );
|
||||
}
|
||||
|
||||
// Create a new datum using the number of observations, number of mismatches, and reported quality score
|
||||
RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
|
||||
final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
|
||||
// Add that datum to all the collapsed tables which will be used in the sequential calculation
|
||||
dataManager.addToAllTables( key.toArray(), datum, PRESERVE_QSCORES_LESS_THAN );
|
||||
dataManager.addToAllTables( key, datum, PRESERVE_QSCORES_LESS_THAN );
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -304,24 +304,26 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
RecalDataManager.parseSAMRecord( read, RAC );
|
||||
|
||||
byte[] originalQuals = read.getBaseQualities();
|
||||
byte[] recalQuals = originalQuals.clone();
|
||||
final byte[] recalQuals = originalQuals.clone();
|
||||
|
||||
String platform = read.getReadGroup().getPlatform();
|
||||
if( platform.equalsIgnoreCase("SOLID") && !RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") ) {
|
||||
originalQuals = RecalDataManager.calcColorSpace( read, originalQuals, RAC.SOLID_RECAL_MODE, coinFlip, refBases );
|
||||
}
|
||||
|
||||
// For each base in the read
|
||||
int readLength = read.getReadLength();
|
||||
for( int iii = 0; iii < readLength; iii++ ) {
|
||||
final Object[] fullCovariateKey = new Object[requestedCovariates.size()];
|
||||
|
||||
// Get the covariate values which make up the key
|
||||
// For each base in the read
|
||||
final int readLength = read.getReadLength();
|
||||
for( int offset = 0; offset < readLength; offset++ ) {
|
||||
|
||||
// Loop through the list of requested covariates and pick out the value from the read and offset
|
||||
int iii = 0;
|
||||
for( Covariate covariate : requestedCovariates ) {
|
||||
fullCovariateKey.add( covariate.getValue( read, iii ) ); // offset is zero based so passing iii is correct here
|
||||
fullCovariateKey[iii++] = covariate.getValue( read, offset );
|
||||
}
|
||||
|
||||
recalQuals[iii] = performSequentialQualityCalculation( fullCovariateKey );
|
||||
fullCovariateKey.clear();
|
||||
recalQuals[offset] = performSequentialQualityCalculation( fullCovariateKey );
|
||||
}
|
||||
|
||||
preserveQScores( originalQuals, recalQuals ); // Overwrite the work done if original quality score is too low
|
||||
|
|
@ -353,15 +355,18 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
* @param key The list of Comparables that were calculated from the covariates
|
||||
* @return A recalibrated quality score as a byte
|
||||
*/
|
||||
private byte performSequentialQualityCalculation( List<? extends Comparable> key ) {
|
||||
private byte performSequentialQualityCalculation(Object... key ) {
|
||||
|
||||
String readGroupKeyElement = key.get(0).toString();
|
||||
int qualityScoreKeyElement = Integer.parseInt(key.get(1).toString());
|
||||
byte qualFromRead = (byte)qualityScoreKeyElement;
|
||||
final String readGroupKeyElement = key[0].toString();
|
||||
final int qualityScoreKeyElement = Integer.parseInt(key[1].toString());
|
||||
final byte qualFromRead = (byte)qualityScoreKeyElement;
|
||||
final Object[] readGroupCollapsedKey = new Object[1];
|
||||
final Object[] qualityScoreCollapsedKey = new Object[2];
|
||||
final Object[] covariateCollapsedKey = new Object[3];
|
||||
|
||||
// The global quality shift (over the read group only)
|
||||
collapsedTableKey.add( readGroupKeyElement );
|
||||
RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0).get( collapsedTableKey.toArray() ));
|
||||
readGroupCollapsedKey[0] = readGroupKeyElement;
|
||||
RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0).get( readGroupCollapsedKey ));
|
||||
double globalDeltaQ = 0.0;
|
||||
if( globalRecalDatum != null ) {
|
||||
double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
|
||||
|
|
@ -370,8 +375,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
}
|
||||
|
||||
// The shift in quality between reported and empirical
|
||||
collapsedTableKey.add( qualityScoreKeyElement );
|
||||
RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1).get( collapsedTableKey.toArray() ));
|
||||
qualityScoreCollapsedKey[0] = readGroupKeyElement;
|
||||
qualityScoreCollapsedKey[1] = qualityScoreKeyElement;
|
||||
RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1).get( qualityScoreCollapsedKey ));
|
||||
double deltaQReported = 0.0;
|
||||
if( qReportedRecalDatum != null ) {
|
||||
double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
|
||||
|
|
@ -381,19 +387,19 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
// The shift in quality due to each covariate by itself in turn
|
||||
double deltaQCovariates = 0.0;
|
||||
double deltaQCovariateEmpirical;
|
||||
for( int iii = 2; iii < key.size(); iii++ ) {
|
||||
collapsedTableKey.add( key.get(iii) ); // The given covariate
|
||||
RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii).get( collapsedTableKey.toArray() ));
|
||||
covariateCollapsedKey[0] = readGroupKeyElement;
|
||||
covariateCollapsedKey[1] = qualityScoreKeyElement;
|
||||
for( int iii = 2; iii < key.length; iii++ ) {
|
||||
covariateCollapsedKey[2] = key[iii]; // The given covariate
|
||||
RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii).get( covariateCollapsedKey ));
|
||||
if( covariateRecalDatum != null ) {
|
||||
deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
|
||||
deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
|
||||
}
|
||||
collapsedTableKey.remove( 2 ); // This new covariate is always added in at position 2 in the collapsedTableKey list
|
||||
// The collapsedTableKey should be: < ReadGroup, Reported Quality Score, This Covariate >
|
||||
}
|
||||
|
||||
double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
|
||||
byte newQualityByte = QualityUtils.boundQual( (int)Math.round(newQuality), QualityUtils.MAX_REASONABLE_Q_SCORE );
|
||||
final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
|
||||
final byte newQualityByte = QualityUtils.boundQual( (int)Math.round(newQuality), QualityUtils.MAX_REASONABLE_Q_SCORE );
|
||||
|
||||
|
||||
// Verbose printouts used to validate with old recalibrator
|
||||
|
|
@ -406,7 +412,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
// key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) );
|
||||
//}
|
||||
|
||||
collapsedTableKey.clear();
|
||||
return newQualityByte;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -35,16 +35,14 @@ import java.util.*;
|
|||
|
||||
public class NestedHashMap{
|
||||
|
||||
private final Map data = new HashMap<Object, Object>();
|
||||
private ArrayList<ArrayList<Comparable>> keyLists; // Used to output the mappings in sorted order
|
||||
public final Map data = new HashMap<Object, Object>();
|
||||
|
||||
public Object get( final Object... keys ) {
|
||||
Map map = this.data;
|
||||
for( int iii = 0; iii < keys.length; iii++ ) {
|
||||
if( iii == keys.length - 1 ) {
|
||||
return map.get(keys[iii]);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
map = (Map) map.get(keys[iii]);
|
||||
if( map == null ) { return null; }
|
||||
}
|
||||
|
|
@ -55,31 +53,11 @@ public class NestedHashMap{
|
|||
|
||||
public void put( final Object value, final Object... keys ) {
|
||||
|
||||
if( keyLists == null ) {
|
||||
keyLists = new ArrayList<ArrayList<Comparable>>();
|
||||
for( Object obj : keys ) {
|
||||
keyLists.add( new ArrayList<Comparable>() );
|
||||
}
|
||||
}
|
||||
|
||||
ArrayList<Comparable> thisList;
|
||||
for( int iii = 0; iii < keys.length; iii++ ) {
|
||||
thisList = keyLists.get( iii );
|
||||
if( thisList == null ) {
|
||||
thisList = new ArrayList<Comparable>();
|
||||
}
|
||||
if( !thisList.contains( (Comparable)keys[iii] ) ) {
|
||||
thisList.add( (Comparable)keys[iii] );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Map map = this.data;
|
||||
for( int iii = 0; iii < keys.length; iii++ ) {
|
||||
if( iii == keys.length - 1 ) {
|
||||
map.put(keys[iii], value);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
Map tmp = (Map) map.get(keys[iii]);
|
||||
if( tmp == null ) {
|
||||
tmp = new HashMap();
|
||||
|
|
@ -89,53 +67,4 @@ public class NestedHashMap{
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
public ArrayList<Pair<Object[], Object>> entrySetSorted() {
|
||||
|
||||
ArrayList<Pair<Object[], Object>> theSet = new ArrayList<Pair<Object[], Object>>();
|
||||
|
||||
for( ArrayList<Comparable> list : keyLists ) {
|
||||
Collections.sort(list);
|
||||
}
|
||||
|
||||
int[] keyIndex = new int[ keyLists.size() ];
|
||||
int[] maxIndex = new int[ keyLists.size() ];
|
||||
for( int iii = 0; iii < keyLists.size(); iii++ ) {
|
||||
keyIndex[iii] = 0;
|
||||
maxIndex[iii] = keyLists.get(iii).size();
|
||||
}
|
||||
|
||||
// Try all the possible keys in sorted order, add them to the output set if they are in the hashMap
|
||||
// BUGBUG: make this more efficient
|
||||
boolean triedAllKeys = false;
|
||||
ArrayList<Object> newKey = null;
|
||||
while( !triedAllKeys ) {
|
||||
newKey = new ArrayList<Object>();
|
||||
for( int iii = 0; iii < keyLists.size(); iii++ ) {
|
||||
newKey.add(keyLists.get(iii).get(keyIndex[iii]));
|
||||
}
|
||||
Object value = this.get( newKey.toArray() );
|
||||
if( value!= null ) {
|
||||
theSet.add(new Pair<Object[], Object>( newKey.toArray(), value ) );
|
||||
}
|
||||
|
||||
// Increment the keyIndex
|
||||
keyIndex[keyLists.size() - 1]++;
|
||||
for( int iii = keyLists.size() - 1; iii >= 0; iii-- ) {
|
||||
if( keyIndex[iii] >= maxIndex[iii] ) { // Carry it forward
|
||||
keyIndex[iii] = 0;
|
||||
if( iii > 0 ) {
|
||||
keyIndex[iii-1]++;
|
||||
} else {
|
||||
triedAllKeys = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return theSet;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue