BAM files created by TableRecalibration now have the version number and list of covariates used appended to their header with a new 'PG' tag. Eventually the entire list of command line args will be put in there as well. Big thanks to Matt and Aaron. The integration test uses the --no_pg_tag so that the md5 doesn't change every time the version number changes.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2148 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2009-11-24 20:53:57 +00:00
parent 8fbc0c8473
commit dffa46b380
4 changed files with 37 additions and 20 deletions

View File

@ -110,7 +110,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file
private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file
private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site
private final String versionNumber = "2.0.4"; // Major version, minor version, and build number
private final String versionString = "v2.0.4"; // Major version, minor version, and build number
private Pair<Long, Long> dbSNP_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for dbSNP loci
private Pair<Long, Long> novel_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for non-dbSNP loci
private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878)
@ -130,7 +130,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
*/
public void initialize() {
logger.info( "CovariateCounterWalker version: " + versionNumber );
logger.info( "CovariateCounterWalker version: " + versionString );
// Get a list of all available covariates
final List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface( Covariate.class );
@ -312,7 +312,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
}
}
bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'. Is this true?
bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'.
isNegStrand = read.getReadNegativeStrandFlag();
final SAMReadGroupRecord readGroup = read.getReadGroup();
if( readGroup == null ) {

View File

@ -168,6 +168,9 @@ public class RecalDataManager {
dataSumExpectedErrors.clear();
dataCollapsedReadGroup.clear();
dataCollapsedQualityScore.clear();
for( int iii = 0; iii < numCovariates - 2; iii++ ) {
dataCollapsedByCovariate.get(iii).clear();
}
dataCollapsedByCovariate.clear();
dataSumExpectedErrors = null; // Will never need this table again
dataCollapsedReadGroup = null; // Will never need this table again

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.recalibration;
import net.sf.samtools.*;
import net.sf.picard.reference.ReferenceSequenceFileFactory;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.gatk.walkers.Requires;
@ -66,8 +65,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
/////////////////////////////
@Argument(fullName="recal_file", shortName="recalFile", doc="Input recalibration table file generated by CountCovariates", required=true)
private String RECAL_FILE = null;
@Argument(fullName="output_bam", shortName="outputBam", doc="output BAM file", required=false)
private SAMFileWriter OUTPUT_BAM = null;
@Argument(fullName="output_bam", shortName="outputBam", doc="output BAM file", required=true)
private String OUTPUT_BAM_FILE = null;
@Argument(fullName="preserve_qscores_less_than", shortName="pQ",
doc="Bases with quality scores less than this threshold won't be recalibrated, default=5. In general its unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required=false)
private int PRESERVE_QSCORES_LESS_THAN = 5;
@ -87,8 +86,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
private boolean USE_SLX_PLATFORM = false;
@Argument(fullName="ignore_readgroup", shortName="noRG", required=false, doc="All read groups are combined together. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
private boolean IGNORE_READGROUP = false;
//@Argument(fullName="no_pg_tag", shortName="noPG", required=false, doc="Don't output the usual PG tag in the recalibrated bam file. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
//private boolean NO_PG_TAG = false;
@Argument(fullName="no_pg_tag", shortName="noPG", required=false, doc="Don't output the usual PG tag in the recalibrated bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
private boolean NO_PG_TAG = false;
/////////////////////////////
@ -101,8 +100,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
private static Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*");
private final String versionNumber = "2.0.2"; // Major version, minor version, and build number
private final String versionString = "v2.0.3"; // Major version, minor version, and build number
private boolean warnUserNullReadGroup = false; // Has the walker warned the user about null read groups yet?
private SAMFileWriter OUTPUT_BAM = null;// The File Writer that will write out the recalibrated bam
//---------------------------------------------------------------------------------------------------------------
//
@ -117,7 +117,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
*/
public void initialize() {
logger.info( "TableRecalibrationWalker version: " + versionNumber );
logger.info( "TableRecalibrationWalker version: " + versionString );
// Get a list of all available covariates
List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
@ -216,17 +216,20 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
dataManager.generateEmpiricalQualities( requestedCovariates.size(), SMOOTHING );
logger.info( "...done!" );
// Take the header of the SAM file, tweak it by adding in the reference dictionary and specifying that the target file is unsorted.
//SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
//SAMSequenceDictionary referenceDictionary =
// ReferenceSequenceFileFactory.getReferenceSequenceFile(getToolkit().getArguments().referenceFile).getSequenceDictionary();
//header.setSequenceDictionary(referenceDictionary);
//header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
// Take the header of the input SAM file and tweak it by adding in a new programRecord with the version number and list of covariates that were used
SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
if( !NO_PG_TAG ) {
SAMProgramRecord programRecord = new SAMProgramRecord( "TableRecalibrationWalker" );
programRecord.setProgramVersion( versionString );
programRecord.setCommandLine( "Covariates used: " + requestedCovariates );
header.addProgramRecord( programRecord );
}
//if( OUTPUT_BAM != null ) {
// SAMFileWriterFactory factory = new SAMFileWriterFactory();
// OUTPUT_BAM = factory.makeBAMWriter(header, false, new File(OUTPUT_BAM.toString()), 5); // Bam compression = 5
//}
// Create the SAMFileWriter that we will be using to output the reads
if( OUTPUT_BAM_FILE != null ) {
SAMFileWriterFactory factory = new SAMFileWriterFactory();
OUTPUT_BAM = factory.makeBAMWriter( header, true, new File(OUTPUT_BAM_FILE), 5 ); // BUGBUG: Bam compression hardcoded to 5
}
}
/**
@ -482,4 +485,14 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
return output;
}
/**
* Close the output bam file
* @param output The SAMFileWriter that outputs the bam file
*/
public void onTraversalDone(SAMFileWriter output) {
if (output != null)
output.close();
super.onTraversalDone(output);
}
}

View File

@ -66,6 +66,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
( bam.equals( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" )
? " -L 1:10,800,000-10,810,000" : " -L 1:10,100,000-10,300,000" ) +
" -outputBam %s" +
" --no_pg_tag" +
" -recalFile " + paramsFile,
1, // just one output file
Arrays.asList(md5));