From 1e7ddd2d9f70cc5e752a6a4c5abaf29db3e3530a Mon Sep 17 00:00:00 2001 From: rpoplin Date: Tue, 10 Nov 2009 15:55:56 +0000 Subject: [PATCH] Added a validateOldRecalibrator option to CovariateCounterWalker which reorders the output to match the old recalibrator exactly. This facilitates direct comparison of output. Changed the -cov argument slightly to require the user to specify both ReadGroupCovariate and QualityScoreCovariate to make it more clear to the user which covariates are being used. Some speed up improvements throughout. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2010 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/Recalibration/Covariate.java | 6 +- .../Recalibration/CovariateCounterWalker.java | 111 ++++++++++++++---- .../walkers/Recalibration/CycleCovariate.java | 8 ++ .../walkers/Recalibration/DinucCovariate.java | 25 ++-- .../MappingQualityCovariate.java | 2 + .../Recalibration/MinimumNQSCovariate.java | 5 + .../gatk/walkers/Recalibration/NHashMap.java | 63 +++++++--- .../Recalibration/QualityScoreCovariate.java | 2 + .../Recalibration/ReadGroupCovariate.java | 2 + .../Recalibration/RecalDataManager.java | 11 +- .../walkers/Recalibration/RecalDatum.java | 14 +-- .../TableRecalibrationWalker.java | 18 +-- 12 files changed, 181 insertions(+), 86 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Covariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Covariate.java index e107aca84..0dccab54f 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Covariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Covariate.java @@ -32,11 +32,11 @@ import net.sf.samtools.SAMRecord; * User: rpoplin * Date: Oct 30, 2009 * - * The Covariate interface. A Covariate is a feature used in the recalibration that can be picked out of the read and corresponding reference bases + * The Covariate interface. A Covariate is a feature used in the recalibration that can be picked out of the read, offset, and corresponding reference bases */ public interface Covariate { - public Comparable getValue(SAMRecord read, int offset, char[] refBases); // used to pick out the value from the read and reference bases + public Comparable getValue(SAMRecord read, int offset, char[] refBases); // used to pick out the value from the read, offset, and reference bases public Comparable getValue(String str); // used to get value from input file - public int estimatedNumberOfBins(); // used to estimate the amount space required in the hashmap + public int estimatedNumberOfBins(); // used to estimate the amount space required in the HashMap } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java index 1ec584ebf..404ce4959 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java @@ -9,6 +9,7 @@ import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.PackageUtils; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Pair; import java.io.PrintStream; import java.io.FileNotFoundException; @@ -72,10 +73,16 @@ public class CovariateCounterWalker extends LocusWalker { public boolean USE_ORIGINAL_QUALS = false; @Argument(fullName="recal_file", shortName="rf", required=false, doc="Filename for the outputted covariates table recalibration file") public String RECAL_FILE = "output.recal_data.csv"; + @Argument(fullName="validateOldRecalibrator", shortName="valor", required=false, doc="If yes will reorder the output to match the old recalibrator exactly but makes the file useless to the refactored TableRecalibrationWalker") + public boolean validateOldRecalibrator = false; protected static RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps protected static ArrayList requestedCovariates; // A list to hold the covariate objects that were requested + private long countedSites = 0; // used for reporting at the end + private long countedBases = 0; // used for reporting at the end + private long skippedSites = 0; // used for reporting at the end + /** * Parse the -cov arguments and create a list of covariates to be used here * Based on the covariates' estimates for initial capacity allocate the data hashmap @@ -99,25 +106,30 @@ public class CovariateCounterWalker extends LocusWalker { // Initialize the requested covariates by parsing the -cov argument requestedCovariates = new ArrayList(); - requestedCovariates.add( new ReadGroupCovariate() ); // Read Group Covariate is a required covariate for the recalibration calculation - requestedCovariates.add( new QualityScoreCovariate() ); // Quality Score Covariate is a required covariate for the recalibration calculation - if( COVARIATES != null ) { + if( validateOldRecalibrator ) { + requestedCovariates.add( new ReadGroupCovariate() ); + requestedCovariates.add( new CycleCovariate() ); // unfortunately the ordering here is different to match the output of the old recalibrator + requestedCovariates.add( new QualityScoreCovariate() ); + requestedCovariates.add( new DinucCovariate() ); + estimatedCapacity = 300 * 200 * 40 * 16; + } else if( COVARIATES != null ) { + int covNumber = 1; for( String requestedCovariateString : COVARIATES ) { - boolean foundClass = false; for( Class covClass : classes ) { - if( requestedCovariateString.equalsIgnoreCase( covClass.getSimpleName() ) ) { // -cov argument matches the class name for an implementing class foundClass = true; + // Read Group Covariate and Quality Score Covariate are required covariates for the recalibration calculation and must begin the list + if( (covNumber == 1 && !requestedCovariateString.equalsIgnoreCase( "ReadGroupCovariate" )) || + (covNumber == 2 && !requestedCovariateString.equalsIgnoreCase( "QualityScoreCovariate" )) ) { + throw new StingException("ReadGroupCovariate and QualityScoreCovariate are required covariates for the recalibration calculation and must begin the list" ); + } + covNumber++; try { + // Now that we've found a matching class, try to instantiate it Covariate covariate = (Covariate)covClass.newInstance(); requestedCovariates.add( covariate ); estimatedCapacity *= covariate.estimatedNumberOfBins(); // update the estimated initial capacity - - if (covariate instanceof ReadGroupCovariate || covariate instanceof QualityScoreCovariate) { - throw new StingException( "ReadGroupCovariate and QualityScoreCovariate are required covariates and are therefore added for you. Please remove them from the -cov list" ); - } - } catch ( InstantiationException e ) { throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) ); } catch ( IllegalAccessException e ) { @@ -130,6 +142,13 @@ public class CovariateCounterWalker extends LocusWalker { throw new StingException( "The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates." ); } } + } else { // Not validating old recalibrator and no covariates were specified by the user so add the default ones + logger.info( "Note: Using default set of covariates because none were specified." ); + requestedCovariates.add( new ReadGroupCovariate() ); + requestedCovariates.add( new QualityScoreCovariate() ); + requestedCovariates.add( new CycleCovariate() ); + requestedCovariates.add( new DinucCovariate() ); + estimatedCapacity = 300 * 40 * 200 * 16; } logger.info( "The covariates being used here: " ); @@ -172,7 +191,7 @@ public class CovariateCounterWalker extends LocusWalker { // Only use data from reads with mapping quality above specified quality value and base quality greater than zero byte[] quals = read.getBaseQualities(); - if( read.getMappingQuality() >= MIN_MAPPING_QUALITY && quals[offset] > 0) + if( read.getMappingQuality() >= MIN_MAPPING_QUALITY && quals[offset] > 0 ) { // Skip first and last bases because they don't have a dinuc count if( offset > 0 && offset < (read.getReadLength() - 1) ) { @@ -181,10 +200,13 @@ public class CovariateCounterWalker extends LocusWalker { } } + countedSites++; + } else { // We skipped over the dbSNP site + skippedSites++; } - return 1; + return 1; // This value isn't actually used anywhere } /** @@ -208,7 +230,7 @@ public class CovariateCounterWalker extends LocusWalker { if( keyElement != null ) { key.add( keyElement ); } else { - badKey = true; // covariate returned bad value, for example dinuc returns null because base = 'N' + badKey = true; // Covariate returned bad value, for example dinuc returns null because base = 'N' } } @@ -218,7 +240,11 @@ public class CovariateCounterWalker extends LocusWalker { datum = dataManager.data.get( key ); if( datum == null ) { // key doesn't exist yet in the map so make a new bucket and add it datum = new RecalDatum(); // initialized with zeros, will be incremented at end of method - dataManager.data.put( key, datum ); + if( validateOldRecalibrator ) { + dataManager.data.myPut( key, datum ); + } else { + dataManager.data.put( key, datum ); + } } } @@ -236,6 +262,12 @@ public class CovariateCounterWalker extends LocusWalker { if( datum != null ) { // Add one to the number of observations and potentially one to the number of mismatches datum.increment( base, refBase ); + countedBases++; + } else { + if( validateOldRecalibrator ) { + countedBases++; // This line here to replicate behavior in the old recalibrator + // (reads with bad covariates [prev_base = 'N', for example] were properly tossed out but this count was still incremented) + } } } @@ -274,9 +306,6 @@ public class CovariateCounterWalker extends LocusWalker { */ public void onTraversalDone( PrintStream recalTableStream ) { out.print( "Writing raw recalibration data..." ); - for( Covariate cov : requestedCovariates ) { - recalTableStream.println( "@!" + cov.getClass().getSimpleName() ); // The "@!" is a code for TableRecalibrationWalker to recognize this line as a Covariate class name - } outputToCSV( recalTableStream ); out.println( "...done!" ); @@ -288,15 +317,45 @@ public class CovariateCounterWalker extends LocusWalker { * @param recalTableStream the PrintStream to write out to */ private void outputToCSV( PrintStream recalTableStream ) { - // For each entry in the data hashmap - for( Map.Entry, RecalDatum> entry : dataManager.data.entrySet() ) { - // For each Covariate in the key - for( Comparable comp : entry.getKey() ) { - // Output the Covariate's value - recalTableStream.print( comp + "," ); - } - // Output the RecalDatum entry - recalTableStream.println( entry.getValue().outputToCSV() ); + + if( validateOldRecalibrator ) { + boolean collapsePos = false; + boolean collapseDinuc = false; + recalTableStream.printf("# collapsed_pos %b%n", collapsePos); + recalTableStream.printf("# collapsed_dinuc %b%n", collapseDinuc); + recalTableStream.printf("# counted_sites %d%n", countedSites); + recalTableStream.printf("# counted_bases %d%n", countedBases); + recalTableStream.printf("# skipped_sites %d%n", skippedSites); + recalTableStream.printf("# fraction_skipped 1 / %.0f bp%n", (double)countedSites / skippedSites); + recalTableStream.println("rg,pos,Qrep,dn,nBases,nMismatches,Qemp"); + for(Pair, RecalDatum> entry : dataManager.data.entrySetSorted4() ) { + // For each Covariate in the key + for( Comparable comp : entry.getFirst() ) { + // Output the Covariate's value + recalTableStream.print( comp + "," ); + } + // Output the RecalDatum entry + recalTableStream.println( entry.getSecond().outputToCSV() ); + } + } else { + recalTableStream.printf("# Counted Sites %d%n", countedSites); + recalTableStream.printf("# Counted Bases %d%n", countedBases); + recalTableStream.printf("# Skipped Sites %d%n", skippedSites); + recalTableStream.printf("# Fraction Skipped 1 / %.0f bp%n", (double)countedSites / skippedSites); + for( Covariate cov : requestedCovariates ) { + // The "@!" is a code for TableRecalibrationWalker to recognize this line as a Covariate class name + recalTableStream.println( "@!" + cov.getClass().getSimpleName() ); + } + // For each entry in the data hashmap + for( Map.Entry, RecalDatum> entry : dataManager.data.entrySet() ) { + // For each Covariate in the key + for( Comparable comp : entry.getKey() ) { + // Output the Covariate's value + recalTableStream.print( comp + "," ); + } + // Output the RecalDatum entry + recalTableStream.println( entry.getValue().outputToCSV() ); + } } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CycleCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CycleCovariate.java index e5d2d20a8..3d85555d5 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CycleCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CycleCovariate.java @@ -31,6 +31,14 @@ import net.sf.samtools.SAMRecord; * Created by IntelliJ IDEA. * User: rpoplin * Date: Oct 30, 2009 + * + * The Cycle covariate. + * For Solexa the cycle is simply the position in the read (counting backwards if it is a negative strand read) + * Yet to be implemented: + * - For 454 the cycle is the number of discontinuous nucleotides seen during the length of the read + * For example, for the read: AAACCCCGAAATTTTTTT + * the cycle would be 111222234445555555 + * - For SOLiD the cycle is a more complicated mixture of ligation cycle and primer round */ public class CycleCovariate implements Covariate { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/DinucCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/DinucCovariate.java index 2613a26a1..9090403db 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/DinucCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/DinucCovariate.java @@ -3,8 +3,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.BaseUtils; -import java.util.*; - /* * Copyright (c) 2009 The Broad Institute * @@ -34,34 +32,31 @@ import java.util.*; * Created by IntelliJ IDEA. * User: rpoplin * Date: Nov 3, 2009 + * + * The Dinucleotide covariate. This base and the one that came before it in the read, remembering to swap directions on cardinality if negative strand read. + * This covariate will return null if there are bases that don't belong such as 'N' or 'X'. + * This covariate will also return null if attempting to get previous base at the start of the read. */ public class DinucCovariate implements Covariate { - public static ArrayList BASES; - public DinucCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker - BASES = new ArrayList(); - BASES.add("A"); - BASES.add("G"); - BASES.add("C"); - BASES.add("T"); - BASES.add("a"); - BASES.add("g"); - BASES.add("c"); - BASES.add("t"); } public Comparable getValue(SAMRecord read, int offset, char[] refBases) { byte[] bases = read.getReadBases(); char base = (char)bases[offset]; - char prevBase = (char)bases[offset - 1]; + char prevBase; // set below if( read.getReadNegativeStrandFlag() ) { base = BaseUtils.simpleComplement(base); + if ( offset + 1 > bases.length - 1 ) { return null; } // no prevBase because at the beginning of the read prevBase = BaseUtils.simpleComplement( (char)bases[offset + 1] ); + } else { + if ( offset - 1 < 0 ) { return null; } // no prevBase because at the beginning of the read + prevBase = (char)bases[offset - 1]; } // Check if bad base, probably an 'N' - if( !BASES.contains( String.format( "%c", prevBase ) ) || !BASES.contains( String.format( "%c", base) ) ) { + if( ( BaseUtils.simpleBaseToBaseIndex(prevBase) == -1 ) || ( BaseUtils.simpleBaseToBaseIndex(base) == -1 ) ) { return null; // CovariateCounterWalker will recognize that null means skip this particular location in the read } else { return String.format("%c%c", prevBase, base); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MappingQualityCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MappingQualityCovariate.java index 7b79fcb79..47ed8d7d8 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MappingQualityCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MappingQualityCovariate.java @@ -31,6 +31,8 @@ import net.sf.samtools.SAMRecord; * Created by IntelliJ IDEA. * User: rpoplin * Date: Nov 4, 2009 + * + * The Mapping Quality covariate. */ public class MappingQualityCovariate implements Covariate { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MinimumNQSCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MinimumNQSCovariate.java index 963f2490d..ec9a20aca 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MinimumNQSCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MinimumNQSCovariate.java @@ -32,6 +32,10 @@ import org.broadinstitute.sting.utils.QualityUtils; * Created by IntelliJ IDEA. * User: rpoplin * Date: Nov 4, 2009 + * + * The Minimum Neighborhood Quality Score covariate, originally described by Chris Hartl. + * This covariate is the minimum base quality score along the length of the read. + * BUGBUG: I don't think this is what was intended by Chris. Covariate interface must change to implement the real covariate. */ public class MinimumNQSCovariate implements Covariate { @@ -58,6 +62,7 @@ public class MinimumNQSCovariate implements Covariate { } } + // Loop over the list of qualities and find the minimum Integer minQual = (int)(quals[0]); for ( int qual : quals ) { if( qual < minQual ) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/NHashMap.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/NHashMap.java index 8dd85cbde..f46dc03c4 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/NHashMap.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/NHashMap.java @@ -1,5 +1,8 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration; +import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.StingException; + import java.util.*; /* @@ -40,21 +43,26 @@ public class NHashMap extends HashMap, T> { public NHashMap() { super(); - keyLists = new ArrayList>(); + keyLists = null; } public NHashMap( int initialCapacity, float loadingFactor ) { super( initialCapacity, loadingFactor ); - keyLists = new ArrayList>(); + keyLists = null; } - /* // This method is here only to help facilitate direct comparison of old and refactored recalibrator. - // The old recalibrator prints out it's mappings in a sorted order but the refactored recalibrator doesn't need to. - // Comment out this overriding method to improve performance - public T put(List key, T value) { + // The old recalibrator prints out its mappings in a sorted order but the refactored recalibrator doesn't need to. + public T myPut(List key, T value) { + if( keyLists == null ) { + keyLists = new ArrayList>(); + for( Comparable comp : key ) { + keyLists.add( new ArrayList() ); + } + } + ArrayList thisList; for( int iii = 0; iii < key.size(); iii++ ) { thisList = keyLists.get( iii ); @@ -68,27 +76,46 @@ public class NHashMap extends HashMap, T> { return super.put( key, value ); } - // This method is here only to help facilitate direct comparison of old and refactored recalibrator. - // The old recalibrator prints out it's mappings in a sorted order but the refactored recalibrator doesn't need to. - // Comment out this overriding method to improve performance - public Set, T>> entrySet() { - + // This method is very ugly but is here only to help facilitate direct comparison of old and refactored recalibrator. + // The old recalibrator prints out its mappings in a sorted order but the refactored recalibrator doesn't need to. + @SuppressWarnings(value = "unchecked") + public ArrayList, T>> entrySetSorted4() { + + ArrayList, T>> theSet = new ArrayList, T>>(); + for( ArrayList list : keyLists ) { Collections.sort(list); } - for( ArrayList list : keyLists ) { - for( Comparable comp : list ) { - // BUGBUG: unfinished + if( keyLists.size() != 4 ) { + throw new StingException("Are you sure you want to be calling this ugly method? NHashMap.entrySetSorted4()"); + } + + ArrayList newKey = null; + for( Comparable c0 : keyLists.get(0) ) { + for( Comparable c1 : keyLists.get(1) ) { + for( Comparable c2 : keyLists.get(2) ) { + for( Comparable c3 : keyLists.get(3) ) { + newKey = new ArrayList(); + newKey.add(c0); + newKey.add(c1); + newKey.add(c2); + newKey.add(c3); + T value = this.get( newKey ); + if( value!= null ) { + theSet.add(new Pair,T>( newKey, value ) ); + } + } + } } } - - return null; + + return theSet; } - */ - public static List makeList(T... args) { + + public List makeList(T... args) { List list = new ArrayList(); for (T arg : args) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/QualityScoreCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/QualityScoreCovariate.java index b2b7692d1..b26c1253e 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/QualityScoreCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/QualityScoreCovariate.java @@ -32,6 +32,8 @@ import org.broadinstitute.sting.utils.QualityUtils; * Created by IntelliJ IDEA. * User: rpoplin * Date: Nov 3, 2009 + * + * The Reported Quality Score covariate. */ public class QualityScoreCovariate implements Covariate { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadGroupCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadGroupCovariate.java index beaf697c5..7fc8d3c3a 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadGroupCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadGroupCovariate.java @@ -31,6 +31,8 @@ import net.sf.samtools.SAMRecord; * Created by IntelliJ IDEA. * User: rpoplin * Date: Oct 30, 2009 + * + * The Read Group covariate. */ public class ReadGroupCovariate implements Covariate{ diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDataManager.java index 63af6315a..adb8c1eaf 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDataManager.java @@ -50,12 +50,12 @@ public class RecalDataManager { collapsedTablesCreated = false; } - // BUGBUG: A lot going on in this method, doing a lot of pre-calculations for use in the sequential mode calculation later + // BUGBUG: A lot going on in this method, doing a lot of pre-calculations for use in the sequential mode calculation later in TableRecalibrationWalker public void createCollapsedTables( int numCovariates ) { dataCollapsedReadGroup = new NHashMap(); dataCollapsedQualityScore = new NHashMap(); dataCollapsedByCovariate = new ArrayList>(); - for( int iii = 0; iii < numCovariates - 2; iii++ ) { // readGroup and QualityScore aren't counted + for( int iii = 0; iii < numCovariates - 2; iii++ ) { // readGroup and QualityScore aren't counted here, their tables are separate dataCollapsedByCovariate.add( new NHashMap() ); } dataSumExpectedErrors = new NHashMap(); @@ -78,7 +78,6 @@ public class RecalDataManager { collapsedDatum = dataCollapsedReadGroup.get( newKey ); if( collapsedDatum == null ) { dataCollapsedReadGroup.put( newKey, new RecalDatum( thisDatum ) ); - //System.out.println("Added key: " + newKey + " to the dataCollapsedReadGroup"); } else { collapsedDatum.increment( thisDatum ); } @@ -89,7 +88,6 @@ public class RecalDataManager { if( sumExpectedErrors == null ) { dataSumExpectedErrors.put( newKey, 0.0 ); } else { - //System.out.println("updated += " + QualityUtils.qualToErrorProb(Byte.parseByte(key.get(1).toString())) * thisDatum.getNumObservations()); dataSumExpectedErrors.remove( newKey ); sumExpectedErrors += QualityUtils.qualToErrorProb(Byte.parseByte(key.get(1).toString())) * thisDatum.getNumObservations(); dataSumExpectedErrors.put( newKey, sumExpectedErrors ); @@ -101,8 +99,7 @@ public class RecalDataManager { newKey.add( key.get(1) ); // and quality score collapsedDatum = dataCollapsedQualityScore.get( newKey ); if( collapsedDatum == null ) { - //System.out.println("Added: " + newKey + " " + newKey.hashCode()); - dataCollapsedQualityScore.put( newKey, new RecalDatum( thisDatum ) ); + dataCollapsedQualityScore.put( newKey, new RecalDatum( thisDatum ) ); } else { collapsedDatum.increment( thisDatum ); } @@ -127,7 +124,7 @@ public class RecalDataManager { public NHashMap getCollapsedTable( int covariate ) { if( !collapsedTablesCreated ) { - throw new StingException("Trying to get collapsed tables before they have been populated."); + throw new StingException("Trying to get collapsed tables before they have been populated. Null pointers abound."); } if( covariate == 0) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDatum.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDatum.java index d67eede7a..87a5aaa8b 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDatum.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/RecalDatum.java @@ -69,25 +69,23 @@ public class RecalDatum { //--------------------------------------------------------------------------------------------------------------- - public RecalDatum increment( long incObservations, long incMismatches ) { + public void increment( long incObservations, long incMismatches ) { numObservations += incObservations; numMismatches += incMismatches; - return this; } - public RecalDatum increment( RecalDatum other ) { - return increment( other.numObservations, other.numMismatches ); + public void increment( RecalDatum other ) { + increment( other.numObservations, other.numMismatches ); } - public RecalDatum increment( List data ) { + public void increment( List data ) { for ( RecalDatum other : data ) { this.increment( other ); } - return this; } - public RecalDatum increment( char curBase, char ref ) { - return increment( 1, BaseUtils.simpleBaseToBaseIndex(curBase) == BaseUtils.simpleBaseToBaseIndex(ref) ? 0 : 1 ); // inc takes num observations, then num mismatches + public void increment( char curBase, char ref ) { + increment( 1, BaseUtils.simpleBaseToBaseIndex(curBase) == BaseUtils.simpleBaseToBaseIndex(ref) ? 0 : 1 ); // increment takes num observations, then num mismatches } //--------------------------------------------------------------------------------------------------------------- diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java index 60d884ff9..fe17aad2e 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java @@ -71,6 +71,7 @@ public class TableRecalibrationWalker extends ReadWalker requestedCovariates; + private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*"); private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*"); public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; @@ -93,7 +94,10 @@ public class TableRecalibrationWalker extends ReadWalker key = new ArrayList(); - Covariate cov; // preallocated for use in for loop below + Covariate cov; int iii; for( iii = 0; iii < requestedCovariates.size(); iii++ ) { cov = requestedCovariates.get( iii ); @@ -181,7 +182,7 @@ public class TableRecalibrationWalker extends ReadWalker key = new ArrayList(); for( Covariate covariate : requestedCovariates ) { - key.add( covariate.getValue( read, iii, refBases ) ); + key.add( covariate.getValue( read, iii, refBases ) ); // BUGBUG offset should be 1 based but old recalibrator is 0 based? } if( MODE == RecalibrationMode.COMBINATORIAL ) { @@ -257,7 +258,6 @@ public class TableRecalibrationWalker extends ReadWalker %d + %.2f + %.2f + %.2f = %d", qualFromRead, globalDeltaQ, deltaQReported, deltaQCovariates, newQualityByte ) ); } - //System.out.println("returning: " + newQualityByte); return newQualityByte; }