BAM files created by TableRecalibration now have the version number and list of covariates used appended to their header with a new 'PG' tag. Eventually the entire list of command line args will be put in there as well. Big thanks to Matt and Aaron. The integration test uses the --no_pg_tag so that the md5 doesn't change every time the version number changes.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2148 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8fbc0c8473
commit
dffa46b380
|
|
@ -110,7 +110,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file
|
||||
private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file
|
||||
private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site
|
||||
private final String versionNumber = "2.0.4"; // Major version, minor version, and build number
|
||||
private final String versionString = "v2.0.4"; // Major version, minor version, and build number
|
||||
private Pair<Long, Long> dbSNP_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for dbSNP loci
|
||||
private Pair<Long, Long> novel_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for non-dbSNP loci
|
||||
private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878)
|
||||
|
|
@ -130,7 +130,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
*/
|
||||
public void initialize() {
|
||||
|
||||
logger.info( "CovariateCounterWalker version: " + versionNumber );
|
||||
logger.info( "CovariateCounterWalker version: " + versionString );
|
||||
|
||||
// Get a list of all available covariates
|
||||
final List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface( Covariate.class );
|
||||
|
|
@ -312,7 +312,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
|
||||
}
|
||||
}
|
||||
bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'. Is this true?
|
||||
bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'.
|
||||
isNegStrand = read.getReadNegativeStrandFlag();
|
||||
final SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||
if( readGroup == null ) {
|
||||
|
|
|
|||
|
|
@ -168,6 +168,9 @@ public class RecalDataManager {
|
|||
dataSumExpectedErrors.clear();
|
||||
dataCollapsedReadGroup.clear();
|
||||
dataCollapsedQualityScore.clear();
|
||||
for( int iii = 0; iii < numCovariates - 2; iii++ ) {
|
||||
dataCollapsedByCovariate.get(iii).clear();
|
||||
}
|
||||
dataCollapsedByCovariate.clear();
|
||||
dataSumExpectedErrors = null; // Will never need this table again
|
||||
dataCollapsedReadGroup = null; // Will never need this table again
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
|
|
@ -66,8 +65,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
/////////////////////////////
|
||||
@Argument(fullName="recal_file", shortName="recalFile", doc="Input recalibration table file generated by CountCovariates", required=true)
|
||||
private String RECAL_FILE = null;
|
||||
@Argument(fullName="output_bam", shortName="outputBam", doc="output BAM file", required=false)
|
||||
private SAMFileWriter OUTPUT_BAM = null;
|
||||
@Argument(fullName="output_bam", shortName="outputBam", doc="output BAM file", required=true)
|
||||
private String OUTPUT_BAM_FILE = null;
|
||||
@Argument(fullName="preserve_qscores_less_than", shortName="pQ",
|
||||
doc="Bases with quality scores less than this threshold won't be recalibrated, default=5. In general its unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required=false)
|
||||
private int PRESERVE_QSCORES_LESS_THAN = 5;
|
||||
|
|
@ -87,8 +86,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
private boolean USE_SLX_PLATFORM = false;
|
||||
@Argument(fullName="ignore_readgroup", shortName="noRG", required=false, doc="All read groups are combined together. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
|
||||
private boolean IGNORE_READGROUP = false;
|
||||
//@Argument(fullName="no_pg_tag", shortName="noPG", required=false, doc="Don't output the usual PG tag in the recalibrated bam file. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
|
||||
//private boolean NO_PG_TAG = false;
|
||||
@Argument(fullName="no_pg_tag", shortName="noPG", required=false, doc="Don't output the usual PG tag in the recalibrated bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
|
||||
private boolean NO_PG_TAG = false;
|
||||
|
||||
|
||||
/////////////////////////////
|
||||
|
|
@ -101,8 +100,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
||||
private static Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
|
||||
private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*");
|
||||
private final String versionNumber = "2.0.2"; // Major version, minor version, and build number
|
||||
private final String versionString = "v2.0.3"; // Major version, minor version, and build number
|
||||
private boolean warnUserNullReadGroup = false; // Has the walker warned the user about null read groups yet?
|
||||
private SAMFileWriter OUTPUT_BAM = null;// The File Writer that will write out the recalibrated bam
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -117,7 +117,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
*/
|
||||
public void initialize() {
|
||||
|
||||
logger.info( "TableRecalibrationWalker version: " + versionNumber );
|
||||
logger.info( "TableRecalibrationWalker version: " + versionString );
|
||||
|
||||
// Get a list of all available covariates
|
||||
List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
|
||||
|
|
@ -216,17 +216,20 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
dataManager.generateEmpiricalQualities( requestedCovariates.size(), SMOOTHING );
|
||||
logger.info( "...done!" );
|
||||
|
||||
// Take the header of the SAM file, tweak it by adding in the reference dictionary and specifying that the target file is unsorted.
|
||||
//SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
|
||||
//SAMSequenceDictionary referenceDictionary =
|
||||
// ReferenceSequenceFileFactory.getReferenceSequenceFile(getToolkit().getArguments().referenceFile).getSequenceDictionary();
|
||||
//header.setSequenceDictionary(referenceDictionary);
|
||||
//header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
|
||||
// Take the header of the input SAM file and tweak it by adding in a new programRecord with the version number and list of covariates that were used
|
||||
SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
|
||||
if( !NO_PG_TAG ) {
|
||||
SAMProgramRecord programRecord = new SAMProgramRecord( "TableRecalibrationWalker" );
|
||||
programRecord.setProgramVersion( versionString );
|
||||
programRecord.setCommandLine( "Covariates used: " + requestedCovariates );
|
||||
header.addProgramRecord( programRecord );
|
||||
}
|
||||
|
||||
//if( OUTPUT_BAM != null ) {
|
||||
// SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
||||
// OUTPUT_BAM = factory.makeBAMWriter(header, false, new File(OUTPUT_BAM.toString()), 5); // Bam compression = 5
|
||||
//}
|
||||
// Create the SAMFileWriter that we will be using to output the reads
|
||||
if( OUTPUT_BAM_FILE != null ) {
|
||||
SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
||||
OUTPUT_BAM = factory.makeBAMWriter( header, true, new File(OUTPUT_BAM_FILE), 5 ); // BUGBUG: Bam compression hardcoded to 5
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -482,4 +485,14 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the output bam file
|
||||
* @param output The SAMFileWriter that outputs the bam file
|
||||
*/
|
||||
public void onTraversalDone(SAMFileWriter output) {
|
||||
if (output != null)
|
||||
output.close();
|
||||
super.onTraversalDone(output);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -66,6 +66,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
( bam.equals( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" )
|
||||
? " -L 1:10,800,000-10,810,000" : " -L 1:10,100,000-10,300,000" ) +
|
||||
" -outputBam %s" +
|
||||
" --no_pg_tag" +
|
||||
" -recalFile " + paramsFile,
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
|
|
|
|||
Loading…
Reference in New Issue