diff --git a/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java b/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java index a6ca5deaa..67fd275c4 100755 --- a/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java +++ b/java/src/org/broadinstitute/sting/gatk/filters/NoOriginalQualityScoresFilter.java @@ -32,9 +32,11 @@ import net.sf.samtools.SAMRecord; * Created by IntelliJ IDEA. * User: rpoplin * Date: Nov 19, 2009 + * + * Filter out reads that don't have Original Quality scores inside. */ public class NoOriginalQualityScoresFilter implements SamRecordFilter { - public boolean filterOut(SAMRecord rec) { - return (rec.getAttribute("OQ") == null); + public boolean filterOut( final SAMRecord read ) { + return (read.getAttribute("OQ") == null); } } diff --git a/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java b/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java index c93b39a4a..56d054b3a 100755 --- a/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java +++ b/java/src/org/broadinstitute/sting/gatk/filters/SingleReadGroupFilter.java @@ -42,8 +42,8 @@ public class SingleReadGroupFilter implements SamRecordFilter { @Argument(fullName = "read_group_to_keep", shortName = "goodRG", doc="The name of the read group to keep, filtering out all others", required=true) private String READ_GROUP_TO_KEEP = null; - public boolean filterOut(SAMRecord read) { - SAMReadGroupRecord readGroup = read.getReadGroup(); + public boolean filterOut( final SAMRecord read ) { + final SAMReadGroupRecord readGroup = read.getReadGroup(); return !( readGroup != null && readGroup.getReadGroupId().equals( READ_GROUP_TO_KEEP ) ); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index bcc352d60..d8fba4d30 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -102,7 +102,7 @@ public class CovariateCounterWalker extends LocusWalker { private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site - private static final String versionString = "v2.0.7"; // Major version, minor version, and build number + private static final String versionString = "v2.0.8"; // Major version, minor version, and build number private Pair dbSNP_counts = new Pair(0L, 0L); // mismatch/base counts for dbSNP loci private Pair novel_counts = new Pair(0L, 0L); // mismatch/base counts for non-dbSNP loci private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878) @@ -224,7 +224,7 @@ public class CovariateCounterWalker extends LocusWalker { } // Don't want to crash with out of heap space exception - if(estimatedCapacity > 300 * 40 * 200 || estimatedCapacity < 0) { // Could be negative if overflowed + if( estimatedCapacity > 300 * 40 * 200 || estimatedCapacity < 0 ) { // Could be negative if overflowed estimatedCapacity = 300 * 40 * 200; } dataManager = new RecalDataManager( estimatedCapacity ); @@ -512,8 +512,9 @@ public class CovariateCounterWalker extends LocusWalker { } for( Covariate cov : requestedCovariates ) { // The "@!" is a code for TableRecalibrationWalker to recognize this line as a Covariate class name - recalTableStream.println( "@!" + cov.getClass().getSimpleName() ); + recalTableStream.print( cov.getClass().getSimpleName().split("Covariate")[0] + "," ); } + recalTableStream.println("nObservations,nMismatches,Qempirical"); } if( SORTED_OUTPUT && requestedCovariates.size() == 4 ) @@ -530,7 +531,6 @@ public class CovariateCounterWalker extends LocusWalker { // For each Covariate in the key for( Comparable comp : entry.getKey() ) { // Output the Covariate's value - if( NO_PRINT_HEADER && comp instanceof String ) { continue; } // BUGBUG recalTableStream.print( comp + "," ); } // Output the RecalDatum entry diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java index 1a80f458e..764e1f6e9 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java @@ -57,7 +57,7 @@ public class RecalibrationArgumentCollection { public String FORCE_PLATFORM = null; ////////////////////////////////// - // Debugging-only Arguments + // Shared Debugging-only Arguments ////////////////////////////////// @Argument(fullName="validate_old_recalibrator", shortName="validate", required=false, doc="!!!Depricated!!! Match the output of the old recalibrator exactly. FOR DEBUGGING PURPOSES ONLY.") public boolean VALIDATE_OLD_RECALIBRATOR = false; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index a360609a0..68f9815bf 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -92,8 +92,8 @@ public class TableRecalibrationWalker extends ReadWalker collapsedTableKey; // The key that will be used over and over again to query the collapsed tables private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*"); private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*"); - private static final Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*"); - private static final String versionString = "v2.0.5"; // Major version, minor version, and build number + private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*"); + private static final String versionString = "v2.0.6"; // Major version, minor version, and build number private SAMFileWriter OUTPUT_BAM = null;// The File Writer that will write out the recalibrated bam //--------------------------------------------------------------------------------------------------------------- @@ -148,33 +148,35 @@ public class TableRecalibrationWalker extends ReadWalker covClass : classes ) { + throw new StingException( "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RAC.RECAL_FILE ); + } else { // Found the covariate list in input file, loop through all of them and instantiate them + String[] vals = line.split(","); + for( int iii = 0; iii < vals.length - 3; iii++ ) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical + boolean foundClass = false; + for( Class covClass : classes ) { + if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) { + foundClass = true; + try { + Covariate covariate = (Covariate)covClass.newInstance(); + requestedCovariates.add( covariate ); + estimatedCapacity *= covariate.estimatedNumberOfBins(); - if( line.equalsIgnoreCase( "@!" + covClass.getSimpleName() ) ) { // The "@!" was added by CovariateCounterWalker as a code to recognize covariate class names - foundClass = true; - try { - Covariate covariate = (Covariate)covClass.newInstance(); - requestedCovariates.add( covariate ); - estimatedCapacity *= covariate.estimatedNumberOfBins(); - - } catch ( InstantiationException e ) { - throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) ); - } catch ( IllegalAccessException e ) { - throw new StingException( String.format("Can not instantiate covariate class '%s': must have no-arg constructor.", covClass.getSimpleName()) ); + } catch ( InstantiationException e ) { + throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) ); + } catch ( IllegalAccessException e ) { + throw new StingException( String.format("Can not instantiate covariate class '%s': must have no-arg constructor.", covClass.getSimpleName()) ); + } } } - } - if( !foundClass ) { - throw new StingException( "Malformed input recalibration file. The requested covariate type (" + line + ") isn't a valid covariate option." ); + if( !foundClass ) { + throw new StingException( "Malformed input recalibration file. The requested covariate type (" + vals[iii] + ") isn't a valid covariate option." ); + } } } - } else { // Found some data + } else { // Found a line of data if( !foundAllCovariates ) { if( RAC.VALIDATE_OLD_RECALIBRATOR ) { requestedCovariates.add( new ReadGroupCovariate() ); @@ -183,8 +185,14 @@ public class TableRecalibrationWalker extends ReadWalker 300 * 40 * 200 || estimatedCapacity < 0) { // Could be negative if overflowed + if( estimatedCapacity > 300 * 40 * 200 || estimatedCapacity < 0 ) { // Could be negative if overflowed estimatedCapacity = 300 * 40 * 200; } final boolean createCollapsedTables = true; @@ -223,7 +231,12 @@ public class TableRecalibrationWalker extends ReadWalker key = new ArrayList(); Covariate cov; int iii; diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index a0296497b..f49ed35cc 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -16,11 +16,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariates1() { HashMap e = new HashMap(); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12892.SLX.SRP000031.2009_06.selected.bam", "5bb3cb7445ec223cd450b05430cb00cd" ); - //e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "dc1a1b6e99f4a47525cc1dce7b6eb1dc" ); // This file doesn't have read groups? - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "7842b33245a53b13c7b6cd4e4616ac11"); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "cbe17568c5f03516a88ff0c5ce33a87b" ); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "8806fb7f0e669d1abab61dc4b3d10d8f" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12892.SLX.SRP000031.2009_06.selected.bam", "16f87c9644b27c3c3fe7a963eed45d2d" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "602bc582a0e5507fee806db19f07b180"); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "92b7ec1f3b5d33eb86be8298827d55f5" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "3caf6ec9c38ce78284159b279b0de0c0" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -45,12 +44,11 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { paramsFiles.put(bam, result.get(0).getAbsolutePath()); } } - + @Test public void testTableRecalibrator1() { HashMap e = new HashMap(); e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12892.SLX.SRP000031.2009_06.selected.bam", "d7f3e0db5ed9fefc917144a0f937d50d" ); - //e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "7be0b7a624d22187e712131f12aae546" ); e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "94f2c602fef9800e270326be9faab3ad"); e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "eea2e02ffe71c9a1f318d2e9cd31a103" ); e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "8126b9494837d504ef1277a799267c15" ); @@ -80,7 +78,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesVCF() { HashMap e = new HashMap(); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "d5962c39a53a511d7ec710d5343e7a37"); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "1a2762916473bd037c72125c35d274d6"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -104,11 +102,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { } } - @Test public void testCountCovariatesNoReadGroups() { HashMap e = new HashMap(); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "ca9e1312102190bbd58f82809eabf175" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "437a6dd63c61d2814f42129ce393d7ce" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey();