Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
418a4d541f
|
|
@ -25,15 +25,10 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||
|
||||
import org.broad.tribble.bed.BEDCodec;
|
||||
import org.broad.tribble.dbsnp.DbSNPCodec;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||
import org.broadinstitute.sting.commandline.Gather;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -42,8 +37,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
|||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -94,13 +87,18 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
/////////////////////////////
|
||||
@ArgumentCollection private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
|
||||
@Output
|
||||
PrintStream out;
|
||||
|
||||
/////////////////////////////
|
||||
// Command Line Arguments
|
||||
/////////////////////////////
|
||||
@Output(fullName="recal_file", shortName="recalFile", required=true, doc="Filename for the outputted covariates table recalibration file")
|
||||
/**
|
||||
* This algorithm treats every reference mismatch as an indication of error. However, real genetic variation is expected to mismatch the reference,
|
||||
* so it is critical that a database of known polymorphic sites is given to the tool in order to skip over those sites.
|
||||
*/
|
||||
@Input(fullName="knownSites", shortName = "knownSites", doc="A database of known polymorphic sites to skip over in the recalibration algorithm", required=false)
|
||||
public List<RodBinding<Feature>> knownSites = Collections.emptyList();
|
||||
@Output
|
||||
PrintStream out;
|
||||
@Output(fullName="recal_file", shortName="recalFile", required=true, doc="Filename for the output covariates table recalibration file")
|
||||
@Gather(CountCovariatesGatherer.class)
|
||||
public PrintStream RECAL_FILE;
|
||||
|
||||
|
|
@ -124,8 +122,8 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
/////////////////////////////
|
||||
private final RecalDataManager dataManager = new RecalDataManager(); // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
|
||||
private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // A list to hold the covariate objects that were requested
|
||||
private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878)
|
||||
private static int DBSNP_VALIDATION_CHECK_FREQUENCY = 1000000; // how often to validate dbsnp mismatch rate (in terms of loci seen)
|
||||
private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878)
|
||||
private static int DBSNP_VALIDATION_CHECK_FREQUENCY = 1000000; // how often to validate dbsnp mismatch rate (in terms of loci seen)
|
||||
|
||||
public static class CountedData {
|
||||
private long countedSites = 0; // Number of loci used in the calculations, used for reporting in the output file
|
||||
|
|
@ -136,7 +134,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
|
||||
private long dbSNPCountsMM = 0, dbSNPCountsBases = 0; // mismatch/base counts for dbSNP loci
|
||||
private long novelCountsMM = 0, novelCountsBases = 0; // mismatch/base counts for non-dbSNP loci
|
||||
private int lociSinceLastDbsnpCheck = 0; // loci since last dbsnp validation
|
||||
private int lociSinceLastDbsnpCheck = 0; // loci since last dbsnp validation
|
||||
|
||||
/**
|
||||
* Adds the values of other to this, returning this
|
||||
|
|
@ -190,20 +188,8 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
}
|
||||
|
||||
// Warn the user if no dbSNP file or other variant mask was specified
|
||||
boolean foundDBSNP = false;
|
||||
for( ReferenceOrderedDataSource rod : this.getToolkit().getRodDataSources() ) {
|
||||
if( rod != null ) {
|
||||
if( rod.getType().equals(DbSNPCodec.class) ||
|
||||
rod.getType().equals(VCFCodec.class) ||
|
||||
rod.getType().equals(VCF3Codec.class) ||
|
||||
rod.getType().equals(BEDCodec.class) ) {
|
||||
foundDBSNP = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if( !foundDBSNP && !RUN_WITHOUT_DBSNP ) {
|
||||
throw new UserException.CommandLineException("This calculation is critically dependent on being able to skip over known variant sites. Please provide a dbSNP ROD or a VCF file containing known sites of genetic variation.");
|
||||
if( knownSites.isEmpty() && !RUN_WITHOUT_DBSNP ) {
|
||||
throw new UserException.CommandLineException("This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation.");
|
||||
}
|
||||
|
||||
// Initialize the requested covariates by parsing the -cov argument
|
||||
|
|
@ -266,12 +252,6 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
logger.info( "\t" + cov.getClass().getSimpleName() );
|
||||
cov.initialize( RAC ); // Initialize any covariate member variables using the shared argument collection
|
||||
}
|
||||
|
||||
// try {
|
||||
// stream = new PrintStream( RAC.RECAL_FILE );
|
||||
// } catch ( FileNotFoundException e ) {
|
||||
// throw new RuntimeException( "Couldn't open output file: ", e );
|
||||
// }
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -290,16 +270,13 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
* @return Returns 1, but this value isn't used in the reduce step
|
||||
*/
|
||||
public CountedData map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
// If any ROD covers this site then we assume it is a site of known genetic variation and we skip it
|
||||
boolean isSNP = tracker.getNTracksWithBoundFeatures() > 0;
|
||||
|
||||
// Only use data from non-dbsnp sites
|
||||
// Assume every mismatch at a non-dbsnp site is indicative of poor quality
|
||||
CountedData counter = new CountedData();
|
||||
if( !isSNP ) {
|
||||
if( tracker.getValues(knownSites).size() == 0 ) { // If something here is in one of the knownSites tracks then skip over it, otherwise proceed
|
||||
// For each read at this locus
|
||||
for( PileupElement p : context.getBasePileup() ) {
|
||||
GATKSAMRecord gatkRead = (GATKSAMRecord) p.getRead();
|
||||
for( final PileupElement p : context.getBasePileup() ) {
|
||||
final GATKSAMRecord gatkRead = (GATKSAMRecord) p.getRead();
|
||||
int offset = p.getOffset();
|
||||
|
||||
if( gatkRead.containsTemporaryAttribute( SKIP_RECORD_ATTRIBUTE ) ) {
|
||||
|
|
@ -358,8 +335,6 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
return counter;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Update the mismatch / total_base counts for a given class of loci.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -93,8 +93,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
@Output(doc="The output BAM file", required=true)
|
||||
private StingSAMFileWriter OUTPUT_BAM = null;
|
||||
@Argument(fullName="preserve_qscores_less_than", shortName="pQ",
|
||||
doc="Bases with quality scores less than this threshold won't be recalibrated, default=5. In general it's unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required=false)
|
||||
@Argument(fullName="preserve_qscores_less_than", shortName="pQ", doc="Bases with quality scores less than this threshold won't be recalibrated, default=5. In general it's unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required=false)
|
||||
private int PRESERVE_QSCORES_LESS_THAN = 5;
|
||||
@Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points, default=1")
|
||||
private int SMOOTHING = 1;
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" -B:dbsnp,vcf " + b36dbSNP129 +
|
||||
" -knownSites " + b36dbSNP129 +
|
||||
" -T CountCovariates" +
|
||||
" -I " + bam +
|
||||
( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" )
|
||||
|
|
@ -135,7 +135,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -standard" +
|
||||
" -OQ" +
|
||||
" -recalFile %s" +
|
||||
" -B:dbsnp,vcf " + b36dbSNP129,
|
||||
" -knownSites " + b36dbSNP129,
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
executeTest("testCountCovariatesUseOriginalQuals", spec);
|
||||
|
|
@ -182,7 +182,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" -B:dbsnp,vcf " + b36dbSNP129 +
|
||||
" -knownSites " + b36dbSNP129 +
|
||||
" -T CountCovariates" +
|
||||
" -I " + bam +
|
||||
" -standard" +
|
||||
|
|
@ -225,30 +225,6 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCountCovariatesVCF() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "170f0c3cc4b8d72c539136effeec9a16");
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" -B:dbsnp,VCF3 " + validationDataLocation + "vcfexample3.vcf" +
|
||||
" -T CountCovariates" +
|
||||
" -I " + bam +
|
||||
" -L 1:10,000,000-10,200,000" +
|
||||
" -standard" +
|
||||
" --solid_recal_mode SET_Q_ZERO" +
|
||||
" -recalFile %s",
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
executeTest("testCountCovariatesVCF", spec);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCountCovariatesBED() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
|
|
@ -260,7 +236,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" -B:bed,bed " + validationDataLocation + "recalibrationTest.bed" +
|
||||
" -knownSites:bed " + validationDataLocation + "recalibrationTest.bed" +
|
||||
" -T CountCovariates" +
|
||||
" -I " + bam +
|
||||
" -L 1:10,000,000-10,200,000" +
|
||||
|
|
@ -284,10 +260,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" -B:anyNameABCD,VCF3 " + validationDataLocation + "vcfexample3.vcf" +
|
||||
" -knownSites:anyNameABCD,VCF3 " + validationDataLocation + "vcfexample3.vcf" +
|
||||
" -T CountCovariates" +
|
||||
" -I " + bam +
|
||||
" -B:dbsnp,vcf " + b36dbSNP129 +
|
||||
" -knownSites " + b36dbSNP129 +
|
||||
" -L 1:10,000,000-10,200,000" +
|
||||
" -cov ReadGroupCovariate" +
|
||||
" -cov QualityScoreCovariate" +
|
||||
|
|
@ -312,7 +288,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" -B:dbsnp,vcf " + b36dbSNP129 +
|
||||
" -knownSites " + b36dbSNP129 +
|
||||
" -T CountCovariates" +
|
||||
" -I " + bam +
|
||||
" -cov ReadGroupCovariate" +
|
||||
|
|
|
|||
Loading…
Reference in New Issue