BAM files created by TableRecalibration now have the version number and list of covariates used appended to their header with a new 'PG' tag. Eventually the entire list of command line args will be put in there as well. Big thanks to Matt and Aaron. The integration test uses the --no_pg_tag so that the md5 doesn't change every time the version number changes.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2148 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8fbc0c8473
commit
dffa46b380
|
|
@ -110,7 +110,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file
|
private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file
|
||||||
private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file
|
private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file
|
||||||
private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site
|
private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site
|
||||||
private final String versionNumber = "2.0.4"; // Major version, minor version, and build number
|
private final String versionString = "v2.0.4"; // Major version, minor version, and build number
|
||||||
private Pair<Long, Long> dbSNP_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for dbSNP loci
|
private Pair<Long, Long> dbSNP_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for dbSNP loci
|
||||||
private Pair<Long, Long> novel_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for non-dbSNP loci
|
private Pair<Long, Long> novel_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for non-dbSNP loci
|
||||||
private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878)
|
private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878)
|
||||||
|
|
@ -130,7 +130,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
*/
|
*/
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
|
|
||||||
logger.info( "CovariateCounterWalker version: " + versionNumber );
|
logger.info( "CovariateCounterWalker version: " + versionString );
|
||||||
|
|
||||||
// Get a list of all available covariates
|
// Get a list of all available covariates
|
||||||
final List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface( Covariate.class );
|
final List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface( Covariate.class );
|
||||||
|
|
@ -312,7 +312,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
|
throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'. Is this true?
|
bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'.
|
||||||
isNegStrand = read.getReadNegativeStrandFlag();
|
isNegStrand = read.getReadNegativeStrandFlag();
|
||||||
final SAMReadGroupRecord readGroup = read.getReadGroup();
|
final SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||||
if( readGroup == null ) {
|
if( readGroup == null ) {
|
||||||
|
|
|
||||||
|
|
@ -168,6 +168,9 @@ public class RecalDataManager {
|
||||||
dataSumExpectedErrors.clear();
|
dataSumExpectedErrors.clear();
|
||||||
dataCollapsedReadGroup.clear();
|
dataCollapsedReadGroup.clear();
|
||||||
dataCollapsedQualityScore.clear();
|
dataCollapsedQualityScore.clear();
|
||||||
|
for( int iii = 0; iii < numCovariates - 2; iii++ ) {
|
||||||
|
dataCollapsedByCovariate.get(iii).clear();
|
||||||
|
}
|
||||||
dataCollapsedByCovariate.clear();
|
dataCollapsedByCovariate.clear();
|
||||||
dataSumExpectedErrors = null; // Will never need this table again
|
dataSumExpectedErrors = null; // Will never need this table again
|
||||||
dataCollapsedReadGroup = null; // Will never need this table again
|
dataCollapsedReadGroup = null; // Will never need this table again
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
|
|
@ -66,8 +65,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
@Argument(fullName="recal_file", shortName="recalFile", doc="Input recalibration table file generated by CountCovariates", required=true)
|
@Argument(fullName="recal_file", shortName="recalFile", doc="Input recalibration table file generated by CountCovariates", required=true)
|
||||||
private String RECAL_FILE = null;
|
private String RECAL_FILE = null;
|
||||||
@Argument(fullName="output_bam", shortName="outputBam", doc="output BAM file", required=false)
|
@Argument(fullName="output_bam", shortName="outputBam", doc="output BAM file", required=true)
|
||||||
private SAMFileWriter OUTPUT_BAM = null;
|
private String OUTPUT_BAM_FILE = null;
|
||||||
@Argument(fullName="preserve_qscores_less_than", shortName="pQ",
|
@Argument(fullName="preserve_qscores_less_than", shortName="pQ",
|
||||||
doc="Bases with quality scores less than this threshold won't be recalibrated, default=5. In general its unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required=false)
|
doc="Bases with quality scores less than this threshold won't be recalibrated, default=5. In general its unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required=false)
|
||||||
private int PRESERVE_QSCORES_LESS_THAN = 5;
|
private int PRESERVE_QSCORES_LESS_THAN = 5;
|
||||||
|
|
@ -87,8 +86,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
private boolean USE_SLX_PLATFORM = false;
|
private boolean USE_SLX_PLATFORM = false;
|
||||||
@Argument(fullName="ignore_readgroup", shortName="noRG", required=false, doc="All read groups are combined together. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
|
@Argument(fullName="ignore_readgroup", shortName="noRG", required=false, doc="All read groups are combined together. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
|
||||||
private boolean IGNORE_READGROUP = false;
|
private boolean IGNORE_READGROUP = false;
|
||||||
//@Argument(fullName="no_pg_tag", shortName="noPG", required=false, doc="Don't output the usual PG tag in the recalibrated bam file. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
|
@Argument(fullName="no_pg_tag", shortName="noPG", required=false, doc="Don't output the usual PG tag in the recalibrated bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
|
||||||
//private boolean NO_PG_TAG = false;
|
private boolean NO_PG_TAG = false;
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
|
|
@ -101,8 +100,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
||||||
private static Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
|
private static Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
|
||||||
private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*");
|
private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*");
|
||||||
private final String versionNumber = "2.0.2"; // Major version, minor version, and build number
|
private final String versionString = "v2.0.3"; // Major version, minor version, and build number
|
||||||
private boolean warnUserNullReadGroup = false; // Has the walker warned the user about null read groups yet?
|
private boolean warnUserNullReadGroup = false; // Has the walker warned the user about null read groups yet?
|
||||||
|
private SAMFileWriter OUTPUT_BAM = null;// The File Writer that will write out the recalibrated bam
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
@ -117,7 +117,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
*/
|
*/
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
|
|
||||||
logger.info( "TableRecalibrationWalker version: " + versionNumber );
|
logger.info( "TableRecalibrationWalker version: " + versionString );
|
||||||
|
|
||||||
// Get a list of all available covariates
|
// Get a list of all available covariates
|
||||||
List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
|
List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
|
||||||
|
|
@ -216,17 +216,20 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
dataManager.generateEmpiricalQualities( requestedCovariates.size(), SMOOTHING );
|
dataManager.generateEmpiricalQualities( requestedCovariates.size(), SMOOTHING );
|
||||||
logger.info( "...done!" );
|
logger.info( "...done!" );
|
||||||
|
|
||||||
// Take the header of the SAM file, tweak it by adding in the reference dictionary and specifying that the target file is unsorted.
|
// Take the header of the input SAM file and tweak it by adding in a new programRecord with the version number and list of covariates that were used
|
||||||
//SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
|
SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
|
||||||
//SAMSequenceDictionary referenceDictionary =
|
if( !NO_PG_TAG ) {
|
||||||
// ReferenceSequenceFileFactory.getReferenceSequenceFile(getToolkit().getArguments().referenceFile).getSequenceDictionary();
|
SAMProgramRecord programRecord = new SAMProgramRecord( "TableRecalibrationWalker" );
|
||||||
//header.setSequenceDictionary(referenceDictionary);
|
programRecord.setProgramVersion( versionString );
|
||||||
//header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
|
programRecord.setCommandLine( "Covariates used: " + requestedCovariates );
|
||||||
|
header.addProgramRecord( programRecord );
|
||||||
|
}
|
||||||
|
|
||||||
//if( OUTPUT_BAM != null ) {
|
// Create the SAMFileWriter that we will be using to output the reads
|
||||||
// SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
if( OUTPUT_BAM_FILE != null ) {
|
||||||
// OUTPUT_BAM = factory.makeBAMWriter(header, false, new File(OUTPUT_BAM.toString()), 5); // Bam compression = 5
|
SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
||||||
//}
|
OUTPUT_BAM = factory.makeBAMWriter( header, true, new File(OUTPUT_BAM_FILE), 5 ); // BUGBUG: Bam compression hardcoded to 5
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -482,4 +485,14 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the output bam file
|
||||||
|
* @param output The SAMFileWriter that outputs the bam file
|
||||||
|
*/
|
||||||
|
public void onTraversalDone(SAMFileWriter output) {
|
||||||
|
if (output != null)
|
||||||
|
output.close();
|
||||||
|
super.onTraversalDone(output);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -66,6 +66,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
( bam.equals( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" )
|
( bam.equals( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" )
|
||||||
? " -L 1:10,800,000-10,810,000" : " -L 1:10,100,000-10,300,000" ) +
|
? " -L 1:10,800,000-10,810,000" : " -L 1:10,100,000-10,300,000" ) +
|
||||||
" -outputBam %s" +
|
" -outputBam %s" +
|
||||||
|
" --no_pg_tag" +
|
||||||
" -recalFile " + paramsFile,
|
" -recalFile " + paramsFile,
|
||||||
1, // just one output file
|
1, // just one output file
|
||||||
Arrays.asList(md5));
|
Arrays.asList(md5));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue