diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Covariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Covariate.java index e107aca84..0dccab54f 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Covariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Covariate.java @@ -32,11 +32,11 @@ import net.sf.samtools.SAMRecord; * User: rpoplin * Date: Oct 30, 2009 * - * The Covariate interface. A Covariate is a feature used in the recalibration that can be picked out of the read and corresponding reference bases + * The Covariate interface. A Covariate is a feature used in the recalibration that can be picked out of the read, offset, and corresponding reference bases */ public interface Covariate { - public Comparable getValue(SAMRecord read, int offset, char[] refBases); // used to pick out the value from the read and reference bases + public Comparable getValue(SAMRecord read, int offset, char[] refBases); // used to pick out the value from the read, offset, and reference bases public Comparable getValue(String str); // used to get value from input file - public int estimatedNumberOfBins(); // used to estimate the amount space required in the hashmap + public int estimatedNumberOfBins(); // used to estimate the amount space required in the HashMap } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java index 1ec584ebf..404ce4959 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java @@ -9,6 +9,7 @@ import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.PackageUtils; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Pair; import java.io.PrintStream; import java.io.FileNotFoundException; @@ -72,10 +73,16 @@ public class CovariateCounterWalker extends LocusWalker { public boolean USE_ORIGINAL_QUALS = false; @Argument(fullName="recal_file", shortName="rf", required=false, doc="Filename for the outputted covariates table recalibration file") public String RECAL_FILE = "output.recal_data.csv"; + @Argument(fullName="validateOldRecalibrator", shortName="valor", required=false, doc="If yes will reorder the output to match the old recalibrator exactly but makes the file useless to the refactored TableRecalibrationWalker") + public boolean validateOldRecalibrator = false; protected static RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps protected static ArrayList requestedCovariates; // A list to hold the covariate objects that were requested + private long countedSites = 0; // used for reporting at the end + private long countedBases = 0; // used for reporting at the end + private long skippedSites = 0; // used for reporting at the end + /** * Parse the -cov arguments and create a list of covariates to be used here * Based on the covariates' estimates for initial capacity allocate the data hashmap @@ -99,25 +106,30 @@ public class CovariateCounterWalker extends LocusWalker { // Initialize the requested covariates by parsing the -cov argument requestedCovariates = new ArrayList(); - requestedCovariates.add( new ReadGroupCovariate() ); // Read Group Covariate is a required covariate for the recalibration calculation - requestedCovariates.add( new QualityScoreCovariate() ); // Quality Score Covariate is a required covariate for the recalibration calculation - if( COVARIATES != null ) { + if( validateOldRecalibrator ) { + requestedCovariates.add( new ReadGroupCovariate() ); + requestedCovariates.add( new CycleCovariate() ); // unfortunately the ordering here is different to match the output of the old recalibrator + requestedCovariates.add( new QualityScoreCovariate() ); + requestedCovariates.add( new DinucCovariate() ); + estimatedCapacity = 300 * 200 * 40 * 16; + } else if( COVARIATES != null ) { + int covNumber = 1; for( String requestedCovariateString : COVARIATES ) { - boolean foundClass = false; for( Class covClass : classes ) { - if( requestedCovariateString.equalsIgnoreCase( covClass.getSimpleName() ) ) { // -cov argument matches the class name for an implementing class foundClass = true; + // Read Group Covariate and Quality Score Covariate are required covariates for the recalibration calculation and must begin the list + if( (covNumber == 1 && !requestedCovariateString.equalsIgnoreCase( "ReadGroupCovariate" )) || + (covNumber == 2 && !requestedCovariateString.equalsIgnoreCase( "QualityScoreCovariate" )) ) { + throw new StingException("ReadGroupCovariate and QualityScoreCovariate are required covariates for the recalibration calculation and must begin the list" ); + } + covNumber++; try { + // Now that we've found a matching class, try to instantiate it Covariate covariate = (Covariate)covClass.newInstance(); requestedCovariates.add( covariate ); estimatedCapacity *= covariate.estimatedNumberOfBins(); // update the estimated initial capacity - - if (covariate instanceof ReadGroupCovariate || covariate instanceof QualityScoreCovariate) { - throw new StingException( "ReadGroupCovariate and QualityScoreCovariate are required covariates and are therefore added for you. Please remove them from the -cov list" ); - } - } catch ( InstantiationException e ) { throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) ); } catch ( IllegalAccessException e ) { @@ -130,6 +142,13 @@ public class CovariateCounterWalker extends LocusWalker { throw new StingException( "The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates." ); } } + } else { // Not validating old recalibrator and no covariates were specified by the user so add the default ones + logger.info( "Note: Using default set of covariates because none were specified." ); + requestedCovariates.add( new ReadGroupCovariate() ); + requestedCovariates.add( new QualityScoreCovariate() ); + requestedCovariates.add( new CycleCovariate() ); + requestedCovariates.add( new DinucCovariate() ); + estimatedCapacity = 300 * 40 * 200 * 16; } logger.info( "The covariates being used here: " ); @@ -172,7 +191,7 @@ public class CovariateCounterWalker extends LocusWalker { // Only use data from reads with mapping quality above specified quality value and base quality greater than zero byte[] quals = read.getBaseQualities(); - if( read.getMappingQuality() >= MIN_MAPPING_QUALITY && quals[offset] > 0) + if( read.getMappingQuality() >= MIN_MAPPING_QUALITY && quals[offset] > 0 ) { // Skip first and last bases because they don't have a dinuc count if( offset > 0 && offset < (read.getReadLength() - 1) ) { @@ -181,10 +200,13 @@ public class CovariateCounterWalker extends LocusWalker { } } + countedSites++; + } else { // We skipped over the dbSNP site + skippedSites++; } - return 1; + return 1; // This value isn't actually used anywhere } /** @@ -208,7 +230,7 @@ public class CovariateCounterWalker extends LocusWalker { if( keyElement != null ) { key.add( keyElement ); } else { - badKey = true; // covariate returned bad value, for example dinuc returns null because base = 'N' + badKey = true; // Covariate returned bad value, for example dinuc returns null because base = 'N' } } @@ -218,7 +240,11 @@ public class CovariateCounterWalker extends LocusWalker { datum = dataManager.data.get( key ); if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it datum = new RecalDatum(); // initialized with zeros, will be incremented at end of method - dataManager.data.put( key, datum ); + if( validateOldRecalibrator ) { + dataManager.data.myPut( key, datum ); + } else { + dataManager.data.put( key, datum ); + } } } @@ -236,6 +262,12 @@ public class CovariateCounterWalker extends LocusWalker { if( datum != null ) { // Add one to the number of observations and potentially one to the number of mismatches datum.increment( base, refBase ); + countedBases++; + } else { + if( validateOldRecalibrator ) { + countedBases++; // This line here to replicate behavior in the old recalibrator + // (reads with bad covariates [prev_base = 'N', for example] were properly tossed out but this count was still incremented) + } } } @@ -274,9 +306,6 @@ public class CovariateCounterWalker extends LocusWalker { */ public void onTraversalDone( PrintStream recalTableStream ) { out.print( "Writing raw recalibration data..." ); - for( Covariate cov : requestedCovariates ) { - recalTableStream.println( "@!" + cov.getClass().getSimpleName() ); // The "@!" is a code for TableRecalibrationWalker to recognize this line as a Covariate class name - } outputToCSV( recalTableStream ); out.println( "...done!" ); @@ -288,15 +317,45 @@ public class CovariateCounterWalker extends LocusWalker { * @param recalTableStream the PrintStream to write out to */ private void outputToCSV( PrintStream recalTableStream ) { - // For each entry in the data hashmap - for( Map.Entry, RecalDatum> entry : dataManager.data.entrySet() ) { - // For each Covariate in the key - for( Comparable comp : entry.getKey() ) { - // Output the Covariate's value - recalTableStream.print( comp + "," ); - } - // Output the RecalDatum entry - recalTableStream.println( entry.getValue().outputToCSV() ); + + if( validateOldRecalibrator ) { + boolean collapsePos = false; + boolean collapseDinuc = false; + recalTableStream.printf("# collapsed_pos %b%n", collapsePos); + recalTableStream.printf("# collapsed_dinuc %b%n", collapseDinuc); + recalTableStream.printf("# counted_sites %d%n", countedSites); + recalTableStream.printf("# counted_bases %d%n", countedBases); + recalTableStream.printf("# skipped_sites %d%n", skippedSites); + recalTableStream.printf("# fraction_skipped 1 / %.0f bp%n", (double)countedSites / skippedSites); + recalTableStream.println("rg,pos,Qrep,dn,nBases,nMismatches,Qemp"); + for(Pair, RecalDatum> entry : dataManager.data.entrySetSorted4() ) { + // For each Covariate in the key + for( Comparable comp : entry.getFirst() ) { + // Output the Covariate's value + recalTableStream.print( comp + "," ); + } + // Output the RecalDatum entry + recalTableStream.println( entry.getSecond().outputToCSV() ); + } + } else { + recalTableStream.printf("# Counted Sites %d%n", countedSites); + recalTableStream.printf("# Counted Bases %d%n", countedBases); + recalTableStream.printf("# Skipped Sites %d%n", skippedSites); + recalTableStream.printf("# Fraction Skipped 1 / %.0f bp%n", (double)countedSites / skippedSites); + for( Covariate cov : requestedCovariates ) { + // The "@!" is a code for TableRecalibrationWalker to recognize this line as a Covariate class name + recalTableStream.println( "@!" + cov.getClass().getSimpleName() ); + } + // For each entry in the data hashmap + for( Map.Entry, RecalDatum> entry : dataManager.data.entrySet() ) { + // For each Covariate in the key + for( Comparable comp : entry.getKey() ) { + // Output the Covariate's value + recalTableStream.print( comp + "," ); + } + // Output the RecalDatum entry + recalTableStream.println( entry.getValue().outputToCSV() ); + } } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CycleCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CycleCovariate.java index e5d2d20a8..3d85555d5 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CycleCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CycleCovariate.java @@ -31,6 +31,14 @@ import net.sf.samtools.SAMRecord; * Created by IntelliJ IDEA. * User: rpoplin * Date: Oct 30, 2009 + * + * The Cycle covariate. + * For Solexa the cycle is simply the position in the read (counting backwards if it is a negative strand read) + * Yet to be implemented: + * - For 454 the cycle is the number of discontinuous nucleotides seen during the length of the read + * For example, for the read: AAACCCCGAAATTTTTTT + * the cycle would be 111222234445555555 + * - For SOLiD the cycle is a more complicated mixture of ligation cycle and primer round */ public class CycleCovariate implements Covariate { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/DinucCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/DinucCovariate.java index 2613a26a1..9090403db 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/DinucCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/DinucCovariate.java @@ -3,8 +3,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.BaseUtils; -import java.util.*; - /* * Copyright (c) 2009 The Broad Institute * @@ -34,34 +32,31 @@ import java.util.*; * Created by IntelliJ IDEA. * User: rpoplin * Date: Nov 3, 2009 + * + * The Dinucleotide covariate. This base and the one that came before it in the read, remembering to swap directions on cardinality if negative strand read. + * This covariate will return null if there are bases that don't belong such as 'N' or 'X'. + * This covariate will also return null if attempting to get previous base at the start of the read. */ public class DinucCovariate implements Covariate { - public static ArrayList BASES; - public DinucCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker - BASES = new ArrayList(); - BASES.add("A"); - BASES.add("G"); - BASES.add("C"); - BASES.add("T"); - BASES.add("a"); - BASES.add("g"); - BASES.add("c"); - BASES.add("t"); } public Comparable getValue(SAMRecord read, int offset, char[] refBases) { byte[] bases = read.getReadBases(); char base = (char)bases[offset]; - char prevBase = (char)bases[offset - 1]; + char prevBase; // set below if( read.getReadNegativeStrandFlag() ) { base = BaseUtils.simpleComplement(base); + if ( offset + 1 > bases.length - 1 ) { return null; } // no prevBase because at the beginning of the read prevBase = BaseUtils.simpleComplement( (char)bases[offset + 1] ); + } else { + if ( offset - 1 < 0 ) { return null; } // no prevBase because at the beginning of the read + prevBase = (char)bases[offset - 1]; } // Check if bad base, probably an 'N' - if( !BASES.contains( String.format( "%c", prevBase ) ) || !BASES.contains( String.format( "%c", base) ) ) { + if( ( BaseUtils.simpleBaseToBaseIndex(prevBase) == -1 ) || ( BaseUtils.simpleBaseToBaseIndex(base) == -1 ) ) { return null; // CovariateCounterWalker will recognize that null means skip this particular location in the read } else { return String.format("%c%c", prevBase, base); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MappingQualityCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MappingQualityCovariate.java index 7b79fcb79..47ed8d7d8 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MappingQualityCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MappingQualityCovariate.java @@ -31,6 +31,8 @@ import net.sf.samtools.SAMRecord; * Created by IntelliJ IDEA. * User: rpoplin * Date: Nov 4, 2009 + * + * The Mapping Quality covariate. */ public class MappingQualityCovariate implements Covariate { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MinimumNQSCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MinimumNQSCovariate.java index 963f2490d..ec9a20aca 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MinimumNQSCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MinimumNQSCovariate.java @@ -32,6 +32,10 @@ import org.broadinstitute.sting.utils.QualityUtils; * Created by IntelliJ IDEA. * User: rpoplin * Date: Nov 4, 2009 + * + * The Minimum Neighborhood Quality Score covariate, originally described by Chris Hartl. + * This covariate is the minimum base quality score along the length of the read. + * BUGBUG: I don't think this is what was intended by Chris. Covariate interface must change to implement the real covariate. */ public class MinimumNQSCovariate implements Covariate { @@ -58,6 +62,7 @@ public class MinimumNQSCovariate implements Covariate { } } + // Loop over the list of qualities and find the minimum Integer minQual = (int)(quals[0]); for ( int qual : quals ) { if( qual < minQual ) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/NHashMap.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/NHashMap.java index 8dd85cbde..f46dc03c4 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/NHashMap.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/NHashMap.java @@ -1,5 +1,8 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration; +import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.StingException; + import java.util.*; /* @@ -40,21 +43,26 @@ public class NHashMap extends HashMap, T> { public NHashMap() { super(); - keyLists = new ArrayList>(); + keyLists = null; } public NHashMap( int initialCapacity, float loadingFactor ) { super( initialCapacity, loadingFactor ); - keyLists = new ArrayList>(); + keyLists = null; } - /* // This method is here only to help facilitate direct comparison of old and refactored recalibrator. - // The old recalibrator prints out it's mappings in a sorted order but the refactored recalibrator doesn't need to. - // Comment out this overriding method to improve performance - public T put(List key, T value) { + // The old recalibrator prints out its mappings in a sorted order but the refactored recalibrator doesn't need to. + public T myPut(List key, T value) { + if( keyLists == null ) { + keyLists = new ArrayList>(); + for( Comparable comp : key ) { + keyLists.add( new ArrayList() ); + } + } + ArrayList thisList; for( int iii = 0; iii < key.size(); iii++ ) { thisList = keyLists.get( iii ); @@ -68,27 +76,46 @@ public class NHashMap extends HashMap, T> { return super.put( key, value ); } - // This method is here only to help facilitate direct comparison of old and refactored recalibrator. - // The old recalibrator prints out it's mappings in a sorted order but the refactored recalibrator doesn't need to. - // Comment out this overriding method to improve performance - public Set, T>> entrySet() { - + // This method is very ugly but is here only to help facilitate direct comparison of old and refactored recalibrator. + // The old recalibrator prints out its mappings in a sorted order but the refactored recalibrator doesn't need to. + @SuppressWarnings(value = "unchecked") + public ArrayList, T>> entrySetSorted4() { + + ArrayList, T>> theSet = new ArrayList, T>>(); + for( ArrayList list : keyLists ) { Collections.sort(list); } - for( ArrayList list : keyLists ) { - for( Comparable comp : list ) { - // BUGBUG: unfinished + if( keyLists.size() != 4 ) { + throw new StingException("Are you sure you want to be calling this ugly method? NHashMap.entrySetSorted4()"); + } + + ArrayList newKey = null; + for( Comparable c0 : keyLists.get(0) ) { + for( Comparable c1 : keyLists.get(1) ) { + for( Comparable c2 : keyLists.get(2) ) { + for( Comparable c3 : keyLists.get(3) ) { + newKey = new ArrayList(); + newKey.add(c0); + newKey.add(c1); + newKey.add(c2); + newKey.add(c3); + T value = this.get( newKey ); + if( value!= null ) { + theSet.add(new Pair,T>( newKey, value ) ); + } + } + } } } - - return null; + + return theSet; } - */ - public static List makeList(T... args) { + + public List makeList(T... args) { List list = new ArrayList(); for (T arg : args) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/QualityScoreCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/QualityScoreCovariate.java index b2b7692d1..b26c1253e 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/QualityScoreCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/QualityScoreCovariate.java @@ -32,6 +32,8 @@ import org.broadinstitute.sting.utils.QualityUtils; * Created by IntelliJ IDEA. * User: rpoplin * Date: Nov 3, 2009 + * + * The Reported Quality Score covariate. */ public class QualityScoreCovariate implements Covariate { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadGroupCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadGroupCovariate.java index beaf697c5..7fc8d3c3a 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadGroupCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadGroupCovariate.java @@ -31,6 +31,8 @@ import net.sf.samtools.SAMRecord; * Created by IntelliJ IDEA. * User: rpoplin * Date: Oct 30, 2009 + * + * The Read Group covariate. */ public class ReadGroupCovariate implements Covariate{ diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDataManager.java index 63af6315a..adb8c1eaf 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDataManager.java @@ -50,12 +50,12 @@ public class RecalDataManager { collapsedTablesCreated = false; } - // BUGBUG: A lot going on in this method, doing a lot of pre-calculations for use in the sequential mode calculation later + // BUGBUG: A lot going on in this method, doing a lot of pre-calculations for use in the sequential mode calculation later in TableRecalibrationWalker public void createCollapsedTables( int numCovariates ) { dataCollapsedReadGroup = new NHashMap(); dataCollapsedQualityScore = new NHashMap(); dataCollapsedByCovariate = new ArrayList>(); - for( int iii = 0; iii < numCovariates - 2; iii++ ) { // readGroup and QualityScore aren't counted + for( int iii = 0; iii < numCovariates - 2; iii++ ) { // readGroup and QualityScore aren't counted here, their tables are separate dataCollapsedByCovariate.add( new NHashMap() ); } dataSumExpectedErrors = new NHashMap(); @@ -78,7 +78,6 @@ public class RecalDataManager { collapsedDatum = dataCollapsedReadGroup.get( newKey ); if( collapsedDatum == null ) { dataCollapsedReadGroup.put( newKey, new RecalDatum( thisDatum ) ); - //System.out.println("Added key: " + newKey + " to the dataCollapsedReadGroup"); } else { collapsedDatum.increment( thisDatum ); } @@ -89,7 +88,6 @@ public class RecalDataManager { if( sumExpectedErrors == null ) { dataSumExpectedErrors.put( newKey, 0.0 ); } else { - //System.out.println("updated += " + QualityUtils.qualToErrorProb(Byte.parseByte(key.get(1).toString())) * thisDatum.getNumObservations()); dataSumExpectedErrors.remove( newKey ); sumExpectedErrors += QualityUtils.qualToErrorProb(Byte.parseByte(key.get(1).toString())) * thisDatum.getNumObservations(); dataSumExpectedErrors.put( newKey, sumExpectedErrors ); @@ -101,8 +99,7 @@ public class RecalDataManager { newKey.add( key.get(1) ); // and quality score collapsedDatum = dataCollapsedQualityScore.get( newKey ); if( collapsedDatum == null ) { - //System.out.println("Added: " + newKey + " " + newKey.hashCode()); - dataCollapsedQualityScore.put( newKey, new RecalDatum( thisDatum ) ); + dataCollapsedQualityScore.put( newKey, new RecalDatum( thisDatum ) ); } else { collapsedDatum.increment( thisDatum ); } @@ -127,7 +124,7 @@ public class RecalDataManager { public NHashMap getCollapsedTable( int covariate ) { if( !collapsedTablesCreated ) { - throw new StingException("Trying to get collapsed tables before they have been populated."); + throw new StingException("Trying to get collapsed tables before they have been populated. Null pointers abound."); } if( covariate == 0) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDatum.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDatum.java index d67eede7a..87a5aaa8b 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDatum.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDatum.java @@ -69,25 +69,23 @@ public class RecalDatum { //--------------------------------------------------------------------------------------------------------------- - public RecalDatum increment( long incObservations, long incMismatches ) { + public void increment( long incObservations, long incMismatches ) { numObservations += incObservations; numMismatches += incMismatches; - return this; } - public RecalDatum increment( RecalDatum other ) { - return increment( other.numObservations, other.numMismatches ); + public void increment( RecalDatum other ) { + increment( other.numObservations, other.numMismatches ); } - public RecalDatum increment( List data ) { + public void increment( List data ) { for ( RecalDatum other : data ) { this.increment( other ); } - return this; } - public RecalDatum increment( char curBase, char ref ) { - return increment( 1, BaseUtils.simpleBaseToBaseIndex(curBase) == BaseUtils.simpleBaseToBaseIndex(ref) ? 0 : 1 ); // inc takes num observations, then num mismatches + public void increment( char curBase, char ref ) { + increment( 1, BaseUtils.simpleBaseToBaseIndex(curBase) == BaseUtils.simpleBaseToBaseIndex(ref) ? 0 : 1 ); // increment takes num observations, then num mismatches } //--------------------------------------------------------------------------------------------------------------- diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java index 60d884ff9..fe17aad2e 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java @@ -71,6 +71,7 @@ public class TableRecalibrationWalker extends ReadWalker requestedCovariates; + private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*"); private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*"); public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; @@ -93,7 +94,10 @@ public class TableRecalibrationWalker extends ReadWalker key = new ArrayList(); - Covariate cov; // preallocated for use in for loop below + Covariate cov; int iii; for( iii = 0; iii < requestedCovariates.size(); iii++ ) { cov = requestedCovariates.get( iii ); @@ -181,7 +182,7 @@ public class TableRecalibrationWalker extends ReadWalker key = new ArrayList(); for( Covariate covariate : requestedCovariates ) { - key.add( covariate.getValue( read, iii, refBases ) ); + key.add( covariate.getValue( read, iii, refBases ) ); // BUGBUG offset should be 1 based but old recalibrator is 0 based? } if( MODE == RecalibrationMode.COMBINATORIAL ) { @@ -257,7 +258,6 @@ public class TableRecalibrationWalker extends ReadWalker %d + %.2f + %.2f + %.2f = %d", qualFromRead, globalDeltaQ, deltaQReported, deltaQCovariates, newQualityByte ) ); } - //System.out.println("returning: " + newQualityByte); return newQualityByte; }