diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java index cc298ee78..0fb6c179f 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/Covariate.java @@ -43,3 +43,12 @@ public interface Covariate { public Comparable getValue( String str ); // Used to get the covariate's value from input csv file in TableRecalibrationWalker public int estimatedNumberOfBins(); // Used to estimate the amount space required for the full data HashMap } + +interface RequiredCovariate extends Covariate { +} + +interface StandardCovariate extends Covariate { +} + +interface ExperimentalCovariate extends Covariate { +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index 87ac86afc..4a00a8b08 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -78,9 +78,11 @@ public class CovariateCounterWalker extends LocusWalker { // Command Line Arguments ///////////////////////////// @Argument(fullName="list", shortName="ls", doc="List the available covariates and exit", required=false) - private Boolean LIST_ONLY = false; - @Argument(fullName="covariate", shortName="cov", doc="Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are already added for you.", required=false) + private boolean LIST_ONLY = false; + @Argument(fullName="covariate", shortName="cov", doc="Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required=false) private String[] COVARIATES = null; + @Argument(fullName="standard_covs", shortName="standard", doc="Use the standard set of covariates in addition to the ones listed using the -cov argument", required=false) + private boolean USE_STANDARD_COVARIATES = false; @Argument(fullName="process_nth_locus", shortName="pN", required=false, doc="Only process every Nth covered locus we see.") private int PROCESS_EVERY_NTH_LOCUS = 1; @@ -101,7 +103,7 @@ public class CovariateCounterWalker extends LocusWalker { private long solidInsertedReferenceBases = 0; // Number of bases where we believe SOLID has inserted the reference because the color space is inconsistent with the read base private long otherColorSpaceInconsistency = 0; // Number of bases where the color space is inconsistent with the read but the reference wasn't inserted. BUGBUG: I don't understand what is going on in this case private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site - private static final String versionString = "v2.1.0"; // Major version, minor version, and build number + private static final String versionString = "v2.1.1"; // Major version, minor version, and build number private Pair dbSNP_counts = new Pair(0L, 0L); // mismatch/base counts for dbSNP loci private Pair novel_counts = new Pair(0L, 0L); // mismatch/base counts for non-dbSNP loci private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878) @@ -129,12 +131,14 @@ public class CovariateCounterWalker extends LocusWalker { } // Get a list of all available covariates - final List> classes = PackageUtils.getClassesImplementingInterface( Covariate.class ); - + final List> covariateClasses = PackageUtils.getClassesImplementingInterface( Covariate.class ); + final List> requiredClasses = PackageUtils.getClassesImplementingInterface( RequiredCovariate.class ); + final List> standardClasses = PackageUtils.getClassesImplementingInterface( StandardCovariate.class ); + // Print and exit if that's what was requested if ( LIST_ONLY ) { out.println( "Available covariates:" ); - for( Class covClass : classes ) { + for( Class covClass : covariateClasses ) { out.println( covClass.getSimpleName() ); } out.println(); @@ -155,44 +159,38 @@ public class CovariateCounterWalker extends LocusWalker { // Initialize the requested covariates by parsing the -cov argument - // BUGBUG: This is a mess because there are a lot of cases (validate, all, none, and supplied covList). Clean up needed. requestedCovariates = new ArrayList(); - int estimatedCapacity = 1; // Capacity is multiplicitive so this starts at one + // First add the required covariates + if( requiredClasses.size() == 2) { // readGroup and reported quality score + requestedCovariates.add( new ReadGroupCovariate() ); // Order is important here + requestedCovariates.add( new QualityScoreCovariate() ); + } else { + throw new StingException("There are more required covariates than expected. The instantiation list needs to be updated with the new required covariate and in the correct order."); + } + // Next add the standard covariates if -standard was specified by the user + if( USE_STANDARD_COVARIATES ) { + for( Class covClass : standardClasses ) { + try { + Covariate covariate = (Covariate)covClass.newInstance(); + requestedCovariates.add( covariate ); + } catch ( InstantiationException e ) { + throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) ); + } catch ( IllegalAccessException e ) { + throw new StingException( String.format("Can not instantiate covariate class '%s': must have no-arg constructor.", covClass.getSimpleName()) ); + } + } + } + // Finally parse the -cov arguments that were provided, skipping over the ones already specified if( COVARIATES != null ) { - if(COVARIATES[0].equalsIgnoreCase( "ALL" )) { // The user wants ALL covariates to be used - requestedCovariates.add( new ReadGroupCovariate() ); // First add the required covariates then add the rest by looping over all implementing classes that were found - requestedCovariates.add( new QualityScoreCovariate() ); - for( Class covClass : classes ) { - try { - Covariate covariate = (Covariate)covClass.newInstance(); - - estimatedCapacity *= covariate.estimatedNumberOfBins(); - if( !( covariate instanceof ReadGroupCovariate || covariate instanceof QualityScoreCovariate ) ) { // These were already added so don't add them again - requestedCovariates.add( covariate ); - } - } catch ( InstantiationException e ) { - throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) ); - } catch ( IllegalAccessException e ) { - throw new StingException( String.format("Can not instantiate covariate class '%s': must have no-arg constructor.", covClass.getSimpleName()) ); - } - } - } else { // The user has specified a list of several covariates - int covNumber = 1; - for( String requestedCovariateString : COVARIATES ) { - boolean foundClass = false; - for( Class covClass : classes ) { - if( requestedCovariateString.equalsIgnoreCase( covClass.getSimpleName() ) ) { // -cov argument matches the class name for an implementing class - foundClass = true; - // Read Group Covariate and Quality Score Covariate are required covariates for the recalibration calculation and must begin the list - if( (covNumber == 1 && !requestedCovariateString.equalsIgnoreCase( "ReadGroupCovariate" )) || - (covNumber == 2 && !requestedCovariateString.equalsIgnoreCase( "QualityScoreCovariate" )) ) { - throw new StingException("ReadGroupCovariate and QualityScoreCovariate are required covariates for the recalibration calculation and must begin the list" ); - } - covNumber++; + for( String requestedCovariateString : COVARIATES ) { + boolean foundClass = false; + for( Class covClass : covariateClasses ) { + if( requestedCovariateString.equalsIgnoreCase( covClass.getSimpleName() ) ) { // -cov argument matches the class name for an implementing class + foundClass = true; + if( !requiredClasses.contains( covClass ) && (!USE_STANDARD_COVARIATES || !standardClasses.contains( covClass )) ) { try { // Now that we've found a matching class, try to instantiate it Covariate covariate = (Covariate)covClass.newInstance(); - estimatedCapacity *= covariate.estimatedNumberOfBins(); requestedCovariates.add( covariate ); } catch ( InstantiationException e ) { throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) ); @@ -201,17 +199,12 @@ public class CovariateCounterWalker extends LocusWalker { } } } + } - if( !foundClass ) { - throw new StingException( "The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates." ); - } + if( !foundClass ) { + throw new StingException( "The requested covariate type (" + requestedCovariateString + ") isn't a valid covariate option. Use --list to see possible covariates." ); } } - } else { // No covariates were specified by the user so add the default, required ones - Utils.warnUser( "Using default set of covariates because none were specified. Using ReadGroupCovariate and QualityScoreCovariate only." ); - requestedCovariates.add( new ReadGroupCovariate() ); - requestedCovariates.add( new QualityScoreCovariate() ); - estimatedCapacity = 60 * 40; } logger.info( "The covariates being used here: " ); @@ -221,10 +214,11 @@ public class CovariateCounterWalker extends LocusWalker { } // Don't want to crash with out of heap space exception - if( estimatedCapacity > 300 * 40 * 200 || estimatedCapacity < 0 ) { // Could be negative if overflowed - estimatedCapacity = 300 * 40 * 200; - } - dataManager = new RecalDataManager( estimatedCapacity ); + //if( estimatedCapacity > 300 * 40 * 200 || estimatedCapacity < 0 ) { // Could be negative if overflowed + // estimatedCapacity = 300 * 40 * 200; + //} + + dataManager = new RecalDataManager(); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index fa53248d9..0062b4efd 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -42,7 +42,7 @@ import net.sf.samtools.SAMRecord; * For SOLiD the cycle is a more complicated mixture of ligation cycle and primer round */ -public class CycleCovariate implements Covariate { +public class CycleCovariate implements StandardCovariate { private static boolean warnedUserBadPlatform = false; private static String defaultPlatform; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java index ebb55a204..1278e7292 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java @@ -40,7 +40,7 @@ import net.sf.samtools.SAMRecord; * This assumption is made to speed up the code. */ -public class DinucCovariate implements Covariate { +public class DinucCovariate implements StandardCovariate { HashMap dinucHashMap; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java index c2f063917..ecf82131d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java @@ -38,7 +38,7 @@ import net.sf.samtools.SAMRecord; * 00100123121123456789 */ -public class HomopolymerCovariate implements Covariate { +public class HomopolymerCovariate implements ExperimentalCovariate { int numBack = 10; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java index 882785dde..0b19fd57b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java @@ -35,7 +35,7 @@ import net.sf.samtools.SAMRecord; * The Mapping Quality covariate. */ -public class MappingQualityCovariate implements Covariate { +public class MappingQualityCovariate implements ExperimentalCovariate { // Initialize any member variables using the command-line arguments passed to the walkers public void initialize( final RecalibrationArgumentCollection RAC ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java index a396ac8c5..c97d553d2 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java @@ -36,7 +36,7 @@ import net.sf.samtools.SAMRecord; * This covariate is the minimum base quality score in the read in a small window around the current base. */ -public class MinimumNQSCovariate implements Covariate { +public class MinimumNQSCovariate implements ExperimentalCovariate { private int windowReach; // How far in each direction from the current base to look diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java index 17301460a..7c71db430 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java @@ -36,7 +36,7 @@ import net.sf.samtools.SAMRecord; * This is the Solexa definition of machine cycle and the covariate that was always being used in the original version of the recalibrator. */ -public class PositionCovariate implements Covariate { +public class PositionCovariate implements ExperimentalCovariate { // Initialize any member variables using the command-line arguments passed to the walkers public void initialize( final RecalibrationArgumentCollection RAC ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java index 329ba75e6..328f2dcd1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java @@ -37,7 +37,7 @@ import net.sf.samtools.SAMRecord; * For SOLiD this is different for each position according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf */ -public class PrimerRoundCovariate implements Covariate { +public class PrimerRoundCovariate implements ExperimentalCovariate { // Initialize any member variables using the command-line arguments passed to the walkers public void initialize( final RecalibrationArgumentCollection RAC ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java index 0b4a9e285..a174f4477 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java @@ -35,7 +35,7 @@ import net.sf.samtools.SAMRecord; * The Reported Quality Score covariate. */ -public class QualityScoreCovariate implements Covariate { +public class QualityScoreCovariate implements RequiredCovariate { // Initialize any member variables using the command-line arguments passed to the walkers public void initialize( final RecalibrationArgumentCollection RAC ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java index fa847bcd1..255417409 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java @@ -35,7 +35,7 @@ import net.sf.samtools.SAMRecord; * The Read Group covariate. */ -public class ReadGroupCovariate implements Covariate{ +public class ReadGroupCovariate implements RequiredCovariate{ public static final String defaultReadGroup = "DefaultReadGroup"; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TileCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TileCovariate.java index b92217fd0..cd1551791 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TileCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TileCovariate.java @@ -30,7 +30,7 @@ import edu.mit.broad.picard.illumina.parser.IlluminaUtil; /** * @author alecw@broadinstitute.org */ -public class TileCovariate implements Covariate { +public class TileCovariate implements ExperimentalCovariate { // Initialize any member variables using the command-line arguments passed to the walkers public void initialize( final RecalibrationArgumentCollection RAC ) {