Merge branch 'master' of ssh://gsa4.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
6f8e7692d4
|
|
@ -241,6 +241,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
samplesList.addAll( samples );
|
||||
// initialize the UnifiedGenotyper Engine which is used to call into the exact model
|
||||
UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; // the GLmodel isn't used by the HaplotypeCaller but it is dangerous to let the user change this argument
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
library("ggplot2")
|
||||
library("tools") #For compactPDF in R 2.13+
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
data <- read.csv(args[1])
|
||||
|
|
|
|||
|
|
@ -797,6 +797,14 @@ public class GenomeAnalysisEngine {
|
|||
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
|
||||
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
|
||||
|
||||
if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
|
||||
throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
|
||||
|
||||
boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class);
|
||||
|
||||
if (argCollection.keepProgramRecords)
|
||||
removeProgramRecords = false;
|
||||
|
||||
return new SAMDataSource(
|
||||
samReaderIDs,
|
||||
threadAllocation,
|
||||
|
|
@ -813,7 +821,8 @@ public class GenomeAnalysisEngine {
|
|||
getWalkerBAQQualityMode(),
|
||||
refReader,
|
||||
getBaseRecalibration(),
|
||||
argCollection.defaultBaseQualities);
|
||||
argCollection.defaultBaseQualities,
|
||||
removeProgramRecords);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -249,6 +249,12 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
|
||||
public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;
|
||||
|
||||
@Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we override the Walker's default and remove program records from the SAM header", required = false)
|
||||
public boolean removeProgramRecords = false;
|
||||
|
||||
@Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Should we override the Walker's default and keep program records from the SAM header", required = false)
|
||||
public boolean keepProgramRecords = false;
|
||||
|
||||
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false)
|
||||
public ValidationExclusion.TYPE unsafe;
|
||||
|
||||
|
|
|
|||
|
|
@ -89,6 +89,11 @@ public class SAMDataSource {
|
|||
*/
|
||||
private final SAMFileReader.ValidationStringency validationStringency;
|
||||
|
||||
/**
|
||||
* Do we want to remove the program records from this data source?
|
||||
*/
|
||||
private final boolean removeProgramRecords;
|
||||
|
||||
/**
|
||||
* Store BAM indices for each reader present.
|
||||
*/
|
||||
|
|
@ -200,7 +205,8 @@ public class SAMDataSource {
|
|||
BAQ.QualityMode.DONT_MODIFY,
|
||||
null, // no BAQ
|
||||
null, // no BQSR
|
||||
(byte) -1);
|
||||
(byte) -1,
|
||||
false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -233,7 +239,8 @@ public class SAMDataSource {
|
|||
BAQ.QualityMode qmode,
|
||||
IndexedFastaSequenceFile refReader,
|
||||
BaseRecalibration bqsrApplier,
|
||||
byte defaultBaseQualities) {
|
||||
byte defaultBaseQualities,
|
||||
boolean removeProgramRecords) {
|
||||
this.readMetrics = new ReadMetrics();
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
|
||||
|
|
@ -249,6 +256,7 @@ public class SAMDataSource {
|
|||
dispatcher = null;
|
||||
|
||||
validationStringency = strictness;
|
||||
this.removeProgramRecords = removeProgramRecords;
|
||||
if(readBufferSize != null)
|
||||
ReadShard.setReadBufferSize(readBufferSize);
|
||||
else {
|
||||
|
|
@ -748,7 +756,7 @@ public class SAMDataSource {
|
|||
private synchronized void createNewResource() {
|
||||
if(allResources.size() > maxEntries)
|
||||
throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use.");
|
||||
SAMReaders readers = new SAMReaders(readerIDs, validationStringency);
|
||||
SAMReaders readers = new SAMReaders(readerIDs, validationStringency, removeProgramRecords);
|
||||
allResources.add(readers);
|
||||
availableResources.add(readers);
|
||||
}
|
||||
|
|
@ -777,9 +785,11 @@ public class SAMDataSource {
|
|||
/**
|
||||
* Derive a new set of readers from the Reads metadata.
|
||||
* @param readerIDs reads to load.
|
||||
* TODO: validationStringency is not used here
|
||||
* @param validationStringency validation stringency.
|
||||
* @param removeProgramRecords indicate whether to clear program records from the readers
|
||||
*/
|
||||
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
|
||||
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency, boolean removeProgramRecords) {
|
||||
final int totalNumberOfFiles = readerIDs.size();
|
||||
int readerNumber = 1;
|
||||
final SimpleTimer timer = new SimpleTimer().start();
|
||||
|
|
@ -790,6 +800,9 @@ public class SAMDataSource {
|
|||
long lastTick = timer.currentTime();
|
||||
for(final SAMReaderID readerID: readerIDs) {
|
||||
final ReaderInitializer init = new ReaderInitializer(readerID).call();
|
||||
if (removeProgramRecords) {
|
||||
init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
|
||||
}
|
||||
if (threadAllocation.getNumIOThreads() > 0) {
|
||||
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
|
||||
}
|
||||
|
|
|
|||
|
|
@ -89,9 +89,9 @@ public class GATKReport {
|
|||
reader = new BufferedReader(new FileReader(file));
|
||||
reportHeader = reader.readLine();
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new ReviewedStingException("Could not open file : " + file);
|
||||
throw new UserException.CouldNotReadInputFile(file, "it does not exist");
|
||||
} catch (IOException e) {
|
||||
throw new ReviewedStingException("Could not read file : " + file);
|
||||
throw new UserException.CouldNotReadInputFile(file, e);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import java.util.List;
|
|||
@PartitionBy(PartitionType.READ)
|
||||
@ActiveRegionExtension(extension=50,maxRegion=1500)
|
||||
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
|
||||
@RemoveProgramRecords
|
||||
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
|
||||
@Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
||||
@PartitionBy(PartitionType.LOCUS)
|
||||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
|
||||
@RemoveProgramRecords
|
||||
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,21 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: thibault
|
||||
* Date: 8/2/12
|
||||
* Time: 1:58 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
import java.lang.annotation.*;
|
||||
|
||||
/**
|
||||
* Indicates that program records should be removed from SAM headers by default for this walker
|
||||
*/
|
||||
@Documented
|
||||
@Inherited
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.TYPE)
|
||||
public @interface RemoveProgramRecords {
|
||||
}
|
||||
|
|
@ -62,7 +62,7 @@ import java.util.ArrayList;
|
|||
* This walker is designed to work as the first pass in a two-pass processing step. It does a by-locus traversal operating
|
||||
* only at sites that are not in dbSNP. We assume that all reference mismatches we see are therefore errors and indicative
|
||||
* of poor base quality. This walker generates tables based on various user-specified covariates (such as read group,
|
||||
* reported quality score, cycle, and dinucleotide). Since there is a large amount of data one can then calculate an empirical
|
||||
* reported quality score, cycle, and context). Since there is a large amount of data one can then calculate an empirical
|
||||
* probability of error given the particular covariates seen at this site, where p(error) = num mismatches / num observations.
|
||||
* The output file is a table (of the several covariate values, num observations, num mismatches, empirical quality score).
|
||||
* <p>
|
||||
|
|
@ -94,7 +94,7 @@ import java.util.ArrayList;
|
|||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx4g -jar GenomeAnalysisTK.jar \
|
||||
* -T BaseQualityScoreRecalibrator \
|
||||
* -T BaseRecalibrator \
|
||||
* -I my_reads.bam \
|
||||
* -R resources/Homo_sapiens_assembly18.fasta \
|
||||
* -knownSites bundle/hg18/dbsnp_132.hg18.vcf \
|
||||
|
|
|
|||
|
|
@ -288,7 +288,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
|||
private VariantContext reduceVCToSamples(VariantContext vc, Set<String> samplesToPhase) {
|
||||
// for ( String sample : samplesToPhase )
|
||||
// logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() ));
|
||||
VariantContext subvc = vc.subContextFromSamples(samplesToPhase, true);
|
||||
VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
|
||||
// logger.debug("original VC = " + vc);
|
||||
// logger.debug("sub VC = " + subvc);
|
||||
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ public class GLBasedSampleSelector extends SampleSelector {
|
|||
return true;
|
||||
// want to include a site in the given samples if it is *likely* to be variant (via the EXACT model)
|
||||
// first subset to the samples
|
||||
VariantContext subContext = vc.subContextFromSamples(samples, true);
|
||||
VariantContext subContext = vc.subContextFromSamples(samples);
|
||||
|
||||
// now check to see (using EXACT model) whether this should be variant
|
||||
// do we want to apply a prior? maybe user-spec?
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ public class GTBasedSampleSelector extends SampleSelector{
|
|||
if ( samples == null || samples.isEmpty() )
|
||||
return true;
|
||||
|
||||
VariantContext subContext = vc.subContextFromSamples(samples, false);
|
||||
VariantContext subContext = vc.subContextFromSamples(samples);
|
||||
if ( subContext.isPolymorphicInSamples() ) {
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -500,7 +500,10 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
|
|||
|
||||
@Requires({"eval != null", "comp != null"})
|
||||
private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) {
|
||||
// find all of the matching comps
|
||||
if ( comp.getType() == VariantContext.Type.NO_VARIATION || eval.getType() == VariantContext.Type.NO_VARIATION )
|
||||
// if either of these are NO_VARIATION they are LENIENT matches
|
||||
return EvalCompMatchType.LENIENT;
|
||||
|
||||
if ( comp.getType() != eval.getType() )
|
||||
return EvalCompMatchType.NO_MATCH;
|
||||
|
||||
|
|
|
|||
|
|
@ -57,9 +57,12 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
|
|||
}
|
||||
}
|
||||
|
||||
public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (vc1 != null) updateTiTv(vc1, false);
|
||||
if (vc2 != null) updateTiTv(vc2, true);
|
||||
@Override
|
||||
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (eval != null)
|
||||
updateTiTv(eval, false);
|
||||
if (comp != null)
|
||||
updateTiTv(comp, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ public class Novelty extends VariantStratifier implements StandardStratification
|
|||
final Collection<VariantContext> knownComps = tracker.getValues(knowns, ref.getLocus());
|
||||
for ( final VariantContext c : knownComps ) {
|
||||
// loop over sites, looking for something that matches the type eval
|
||||
if ( eval.getType() == c.getType() ) {
|
||||
if ( eval.getType() == c.getType() || eval.getType() == VariantContext.Type.NO_VARIATION ) {
|
||||
return KNOWN_STATES;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -197,7 +197,9 @@ public class VariantEvalUtils {
|
|||
* @return a new VariantContext with just the requested samples
|
||||
*/
|
||||
public VariantContext getSubsetOfVariantContext(VariantContext vc, Set<String> sampleNames) {
|
||||
return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, false));
|
||||
// if we want to preserve AC0 sites as polymorphic we need to not rederive alleles
|
||||
final boolean deriveAlleles = variantEvalWalker.ignoreAC0Sites();
|
||||
return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, deriveAlleles));
|
||||
}
|
||||
|
||||
public VariantContext ensureAnnotations(final VariantContext vc, final VariantContext vcsub) {
|
||||
|
|
@ -262,12 +264,8 @@ public class VariantEvalUtils {
|
|||
// First, filter the VariantContext to represent only the samples for evaluation
|
||||
VariantContext vcsub = vc;
|
||||
|
||||
if (subsetBySample && vc.hasGenotypes()) {
|
||||
if ( variantEvalWalker.isSubsettingToSpecificSamples() )
|
||||
vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
|
||||
else
|
||||
vcsub = ensureAnnotations(vc, vc);
|
||||
}
|
||||
if (subsetBySample && vc.hasGenotypes())
|
||||
vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
|
||||
|
||||
if ((byFilter || !vcsub.isFiltered())) {
|
||||
addMapping(mapping, VariantEval.getAllSampleName(), vcsub);
|
||||
|
|
|
|||
|
|
@ -58,14 +58,11 @@ import java.util.*;
|
|||
* to the desired level but also has the information necessary to pull out more variants for a higher sensitivity but a
|
||||
* slightly lower quality level.
|
||||
*
|
||||
* <p>
|
||||
* See <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration">the GATK wiki for a tutorial and example recalibration accuracy plots.</a>
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* The input raw variants to be recalibrated.
|
||||
* <p>
|
||||
* The recalibration table file in CSV format that was generated by the VariantRecalibrator walker.
|
||||
* The recalibration table file in VCF format that was generated by the VariantRecalibrator walker.
|
||||
* <p>
|
||||
* The tranches file that was generated by the VariantRecalibrator walker.
|
||||
*
|
||||
|
|
@ -82,6 +79,7 @@ import java.util.*;
|
|||
* --ts_filter_level 99.0 \
|
||||
* -tranchesFile path/to/output.tranches \
|
||||
* -recalFile path/to/output.recal \
|
||||
* -mode SNP \
|
||||
* -o path/to/output.recalibrated.filtered.vcf
|
||||
* </pre>
|
||||
*
|
||||
|
|
@ -198,7 +196,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
|
|||
|
||||
for( final VariantContext vc : VCs ) {
|
||||
|
||||
if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
||||
if( VariantDataManager.checkVariationClass( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
||||
|
||||
final VariantContext recalDatum = getMatchingRecalVC(vc, recals);
|
||||
if( recalDatum == null ) {
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -273,11 +274,37 @@ public class VariantDataManager {
|
|||
}
|
||||
|
||||
private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) {
|
||||
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
|
||||
((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
|
||||
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && checkVariationClass( evalVC, trainVC ) &&
|
||||
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphicInSamples());
|
||||
}
|
||||
|
||||
protected static boolean checkVariationClass( final VariantContext evalVC, final VariantContext trainVC ) {
|
||||
switch( trainVC.getType() ) {
|
||||
case SNP:
|
||||
case MNP:
|
||||
return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.SNP );
|
||||
case INDEL:
|
||||
case MIXED:
|
||||
case SYMBOLIC:
|
||||
return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.INDEL );
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
protected static boolean checkVariationClass( final VariantContext evalVC, final VariantRecalibratorArgumentCollection.Mode mode ) {
|
||||
switch( mode ) {
|
||||
case SNP:
|
||||
return evalVC.isSNP() || evalVC.isMNP();
|
||||
case INDEL:
|
||||
return evalVC.isIndel() || evalVC.isMixed() || evalVC.isSymbolic();
|
||||
case BOTH:
|
||||
return true;
|
||||
default:
|
||||
throw new ReviewedStingException( "Encountered unknown recal mode: " + mode );
|
||||
}
|
||||
}
|
||||
|
||||
public void writeOutRecalibrationTable( final VariantContextWriter recalWriter ) {
|
||||
// we need to sort in coordinate order in order to produce a valid VCF
|
||||
Collections.sort( data, new Comparator<VariantDatum>() {
|
||||
|
|
|
|||
|
|
@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.R.RScriptExecutor;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
|
|
@ -48,7 +47,6 @@ import org.broadinstitute.sting.utils.io.Resource;
|
|||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
@ -65,7 +63,7 @@ import java.util.*;
|
|||
* The purpose of the variant recalibrator is to assign a well-calibrated probability to each variant call in a call set.
|
||||
* One can then create highly accurate call sets by filtering based on this single estimate for the accuracy of each call.
|
||||
* The approach taken by variant quality score recalibration is to develop a continuous, covarying estimate of the relationship
|
||||
* between SNP call annotations (QD, SB, HaplotypeScore, HRun, for example) and the the probability that a SNP is a true genetic
|
||||
* between SNP call annotations (QD, MQ, HaplotypeScore, and ReadPosRankSum, for example) and the the probability that a SNP is a true genetic
|
||||
* variant versus a sequencing or data processing artifact. This model is determined adaptively based on "true sites" provided
|
||||
* as input, typically HapMap 3 sites and those sites found to be polymorphic on the Omni 2.5M SNP chip array. This adaptive
|
||||
* error model can then be applied to both known and novel variation discovered in the call set of interest to evaluate the
|
||||
|
|
@ -73,15 +71,9 @@ import java.util.*;
|
|||
* the log odds ratio of being a true variant versus being false under the trained Gaussian mixture model.
|
||||
*
|
||||
* <p>
|
||||
* NOTE: Please see our <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3">best practices wiki page</a> for our recommendations on which annotations to use for specific project designs.
|
||||
*
|
||||
* <p>
|
||||
* NOTE: In order to create the model reporting plots Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version).
|
||||
* See <a target="r-project" href="http://www.r-project.org">http://www.r-project.org</a> for more info on how to download and install R.
|
||||
*
|
||||
* <p>
|
||||
* See <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration">the GATK wiki for a tutorial and example recalibration accuracy plots.</a>
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* The input raw variants to be recalibrated.
|
||||
|
|
@ -90,7 +82,7 @@ import java.util.*;
|
|||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* A recalibration table file in CSV format that is used by the ApplyRecalibration walker.
|
||||
* A recalibration table file in VCF format that is used by the ApplyRecalibration walker.
|
||||
* <p>
|
||||
* A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data.
|
||||
*
|
||||
|
|
@ -102,8 +94,9 @@ import java.util.*;
|
|||
* -input NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf \
|
||||
* -resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.b37.sites.vcf \
|
||||
* -resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.b37.sites.vcf \
|
||||
* -resource:dbsnp,known=true,training=false,truth=false,prior=8.0 dbsnp_132.b37.vcf \
|
||||
* -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an MQ \
|
||||
* -resource:dbsnp,known=true,training=false,truth=false,prior=6.0 dbsnp_135.b37.vcf \
|
||||
* -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an FS -an MQ -an InbreedingCoeff \
|
||||
* -mode SNP \
|
||||
* -recalFile path/to/output.recal \
|
||||
* -tranchesFile path/to/output.tranches \
|
||||
* -rscriptFile path/to/output.plots.R
|
||||
|
|
@ -187,9 +180,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
@Advanced
|
||||
@Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false)
|
||||
protected Boolean TRUST_ALL_POLYMORPHIC = false;
|
||||
//@Hidden
|
||||
//@Argument(fullName = "projectConsensus", shortName = "projectConsensus", doc = "Perform 1000G project consensus. This implies an extra prior factor based on the individual participant callsets passed in with consensus=true rod binding tags.", required = false)
|
||||
//protected Boolean PERFORM_PROJECT_CONSENSUS = false;
|
||||
|
||||
/////////////////////////////
|
||||
// Private Member Variables
|
||||
|
|
@ -255,7 +245,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
|
||||
for( final VariantContext vc : tracker.getValues(input, context.getLocation()) ) {
|
||||
if( vc != null && ( vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters()) ) ) {
|
||||
if( checkRecalibrationMode( vc, VRAC.MODE ) ) {
|
||||
if( VariantDataManager.checkVariationClass( vc, VRAC.MODE ) ) {
|
||||
final VariantDatum datum = new VariantDatum();
|
||||
|
||||
// Populate the datum with lots of fields from the VariantContext, unfortunately the VC is too big so we just pull in only the things we absolutely need.
|
||||
|
|
@ -268,10 +258,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
// Loop through the training data sets and if they overlap this loci then update the prior and training status appropriately
|
||||
dataManager.parseTrainingSets( tracker, context.getLocation(), vc, datum, TRUST_ALL_POLYMORPHIC );
|
||||
double priorFactor = QualityUtils.qualToProb( datum.prior );
|
||||
//if( PERFORM_PROJECT_CONSENSUS ) { // BUGBUG: need to resurrect this functionality?
|
||||
// final double consensusPrior = QualityUtils.qualToProb( 1.0 + 5.0 * datum.consensusCount );
|
||||
// priorFactor = 1.0 - ((1.0 - priorFactor) * (1.0 - consensusPrior));
|
||||
//}
|
||||
datum.prior = Math.log10( priorFactor ) - Math.log10( 1.0 - priorFactor );
|
||||
|
||||
mapList.add( datum );
|
||||
|
|
@ -282,12 +268,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
return mapList;
|
||||
}
|
||||
|
||||
public static boolean checkRecalibrationMode( final VariantContext vc, final VariantRecalibratorArgumentCollection.Mode mode ) {
|
||||
return mode == VariantRecalibratorArgumentCollection.Mode.BOTH ||
|
||||
(mode == VariantRecalibratorArgumentCollection.Mode.SNP && vc.isSNP()) ||
|
||||
(mode == VariantRecalibratorArgumentCollection.Mode.INDEL && (vc.isIndel() || vc.isMixed()));
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// reduce
|
||||
|
|
|
|||
|
|
@ -53,6 +53,11 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
|
||||
private final static boolean FORBID_SYMBOLICS = false;
|
||||
|
||||
private final static int ALLOWED_MAJOR_VERSION = 2;
|
||||
private final static int MIN_MINOR_VERSION = 1;
|
||||
|
||||
private BCFVersion bcfVersion = null;
|
||||
|
||||
private VCFHeader header = null;
|
||||
|
||||
/**
|
||||
|
|
@ -131,8 +136,16 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream ) {
|
||||
try {
|
||||
// note that this reads the magic as well, and so does double duty
|
||||
if ( ! BCF2Utils.startsWithBCF2Magic(inputStream) )
|
||||
error("Input stream does not begin with BCF2 magic");
|
||||
bcfVersion = BCFVersion.readBCFVersion(inputStream);
|
||||
if ( bcfVersion == null )
|
||||
error("Input stream does not contain a BCF encoded file; BCF magic header info not found");
|
||||
|
||||
if ( bcfVersion.getMajorVersion() != ALLOWED_MAJOR_VERSION )
|
||||
error("BCF2Codec can only process BCF2 files, this file has major version " + bcfVersion.getMajorVersion());
|
||||
if ( bcfVersion.getMinorVersion() < MIN_MINOR_VERSION )
|
||||
error("BCF2Codec can only process BCF2 files with minor version >= " + MIN_MINOR_VERSION + " but this file has minor version " + bcfVersion.getMinorVersion());
|
||||
|
||||
logger.info("BCF version " + bcfVersion);
|
||||
|
||||
final int headerSizeInBytes = BCF2Utils.readInt(BCF2Type.INT32.getSizeInBytes(), inputStream);
|
||||
|
||||
|
|
@ -187,7 +200,8 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
FileInputStream fis = null;
|
||||
try {
|
||||
fis = new FileInputStream(path);
|
||||
return BCF2Utils.startsWithBCF2Magic(fis);
|
||||
final BCFVersion version = BCFVersion.readBCFVersion(fis);
|
||||
return version != null && version.getMajorVersion() == ALLOWED_MAJOR_VERSION;
|
||||
} catch ( FileNotFoundException e ) {
|
||||
return false;
|
||||
} catch ( IOException e ) {
|
||||
|
|
@ -196,7 +210,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
try {
|
||||
if ( fis != null ) fis.close();
|
||||
} catch ( IOException e ) {
|
||||
; // do nothing
|
||||
// do nothing
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -202,7 +202,7 @@ public final class BCF2Decoder {
|
|||
return null;
|
||||
else {
|
||||
final String s = new String(bytes, 0, goodLength);
|
||||
return BCF2Utils.isCollapsedString(s) ? BCF2Utils.exploreStringList(s) : s;
|
||||
return BCF2Utils.isCollapsedString(s) ? BCF2Utils.explodeStringList(s) : s;
|
||||
}
|
||||
} catch ( IOException e ) {
|
||||
throw new ReviewedStingException("readByte failure", e);
|
||||
|
|
|
|||
|
|
@ -41,8 +41,6 @@ import java.util.*;
|
|||
* @since 5/12
|
||||
*/
|
||||
public final class BCF2Utils {
|
||||
public static final byte[] MAGIC_HEADER_LINE = "BCF\2".getBytes();
|
||||
|
||||
public static final int MAX_ALLELES_IN_GENOTYPES = 127;
|
||||
|
||||
public static final int OVERFLOW_ELEMENT_MARKER = 15;
|
||||
|
|
@ -75,68 +73,60 @@ public final class BCF2Utils {
|
|||
*/
|
||||
@Requires("header != null")
|
||||
@Ensures({"result != null", "new HashSet(result).size() == result.size()"})
|
||||
public final static ArrayList<String> makeDictionary(final VCFHeader header) {
|
||||
public static ArrayList<String> makeDictionary(final VCFHeader header) {
|
||||
final Set<String> seen = new HashSet<String>();
|
||||
final ArrayList<String> dict = new ArrayList<String>();
|
||||
|
||||
boolean sawPASS = false;
|
||||
// special case the special PASS field which doesn't show up in the FILTER field definitions
|
||||
seen.add(VCFConstants.PASSES_FILTERS_v4);
|
||||
dict.add(VCFConstants.PASSES_FILTERS_v4);
|
||||
|
||||
// set up the strings dictionary
|
||||
for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) {
|
||||
if ( line instanceof VCFIDHeaderLine && ! (line instanceof VCFContigHeaderLine) ) {
|
||||
final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
|
||||
if ( ! seen.contains(idLine.getID())) {
|
||||
sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4);
|
||||
dict.add(idLine.getID());
|
||||
seen.add(idLine.getID());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if ( ! sawPASS )
|
||||
dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
@Requires({"nElements >= 0", "type != null"})
|
||||
public final static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
|
||||
public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
|
||||
int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER);
|
||||
byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F));
|
||||
return typeByte;
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
public final static int decodeSize(final byte typeDescriptor) {
|
||||
public static int decodeSize(final byte typeDescriptor) {
|
||||
return (0xF0 & typeDescriptor) >> 4;
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
public final static int decodeTypeID(final byte typeDescriptor) {
|
||||
public static int decodeTypeID(final byte typeDescriptor) {
|
||||
return typeDescriptor & 0x0F;
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
public final static BCF2Type decodeType(final byte typeDescriptor) {
|
||||
public static BCF2Type decodeType(final byte typeDescriptor) {
|
||||
return ID_TO_ENUM[decodeTypeID(typeDescriptor)];
|
||||
}
|
||||
|
||||
public final static boolean sizeIsOverflow(final byte typeDescriptor) {
|
||||
public static boolean sizeIsOverflow(final byte typeDescriptor) {
|
||||
return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER;
|
||||
}
|
||||
|
||||
@Requires("nElements >= 0")
|
||||
public final static boolean willOverflow(final long nElements) {
|
||||
public static boolean willOverflow(final long nElements) {
|
||||
return nElements > MAX_INLINE_ELEMENTS;
|
||||
}
|
||||
|
||||
public final static boolean startsWithBCF2Magic(final InputStream stream) throws IOException {
|
||||
final byte[] magicBytes = new byte[BCF2Utils.MAGIC_HEADER_LINE.length];
|
||||
stream.read(magicBytes);
|
||||
return Arrays.equals(magicBytes, BCF2Utils.MAGIC_HEADER_LINE);
|
||||
}
|
||||
|
||||
public final static byte readByte(final InputStream stream) {
|
||||
public static byte readByte(final InputStream stream) {
|
||||
// TODO -- shouldn't be capturing error here
|
||||
try {
|
||||
return (byte)(stream.read() & 0xFF);
|
||||
|
|
@ -155,7 +145,7 @@ public final class BCF2Utils {
|
|||
*/
|
||||
@Requires({"strings != null", "strings.size() > 1"})
|
||||
@Ensures("result != null")
|
||||
public static final String collapseStringList(final List<String> strings) {
|
||||
public static String collapseStringList(final List<String> strings) {
|
||||
final StringBuilder b = new StringBuilder();
|
||||
for ( final String s : strings ) {
|
||||
if ( s != null ) {
|
||||
|
|
@ -177,14 +167,14 @@ public final class BCF2Utils {
|
|||
*/
|
||||
@Requires({"collapsed != null", "isCollapsedString(collapsed)"})
|
||||
@Ensures("result != null")
|
||||
public static final List<String> exploreStringList(final String collapsed) {
|
||||
public static List<String> explodeStringList(final String collapsed) {
|
||||
assert isCollapsedString(collapsed);
|
||||
final String[] exploded = collapsed.substring(1).split(",");
|
||||
return Arrays.asList(exploded);
|
||||
}
|
||||
|
||||
@Requires("s != null")
|
||||
public static final boolean isCollapsedString(final String s) {
|
||||
public static boolean isCollapsedString(final String s) {
|
||||
return s.charAt(0) == ',';
|
||||
}
|
||||
|
||||
|
|
@ -226,7 +216,7 @@ public final class BCF2Utils {
|
|||
}
|
||||
|
||||
@Ensures("result.isIntegerType()")
|
||||
public final static BCF2Type determineIntegerType(final int value) {
|
||||
public static BCF2Type determineIntegerType(final int value) {
|
||||
for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
|
||||
if ( potentialType.withinRange(value) )
|
||||
return potentialType;
|
||||
|
|
@ -236,7 +226,7 @@ public final class BCF2Utils {
|
|||
}
|
||||
|
||||
@Ensures("result.isIntegerType()")
|
||||
public final static BCF2Type determineIntegerType(final int[] values) {
|
||||
public static BCF2Type determineIntegerType(final int[] values) {
|
||||
// literally a copy of the code below, but there's no general way to unify lists and arrays in java
|
||||
BCF2Type maxType = BCF2Type.INT8;
|
||||
for ( final int value : values ) {
|
||||
|
|
@ -262,7 +252,7 @@ public final class BCF2Utils {
|
|||
*/
|
||||
@Requires({"t1.isIntegerType()","t2.isIntegerType()"})
|
||||
@Ensures("result.isIntegerType()")
|
||||
public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
|
||||
public static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
|
||||
switch ( t1 ) {
|
||||
case INT8: return t2;
|
||||
case INT16: return t2 == BCF2Type.INT32 ? t2 : t1;
|
||||
|
|
@ -272,7 +262,7 @@ public final class BCF2Utils {
|
|||
}
|
||||
|
||||
@Ensures("result.isIntegerType()")
|
||||
public final static BCF2Type determineIntegerType(final List<Integer> values) {
|
||||
public static BCF2Type determineIntegerType(final List<Integer> values) {
|
||||
BCF2Type maxType = BCF2Type.INT8;
|
||||
for ( final int value : values ) {
|
||||
final BCF2Type type1 = determineIntegerType(value);
|
||||
|
|
@ -297,7 +287,7 @@ public final class BCF2Utils {
|
|||
* @param o
|
||||
* @return
|
||||
*/
|
||||
public final static List<Object> toList(final Object o) {
|
||||
public static List<Object> toList(final Object o) {
|
||||
if ( o == null ) return Collections.emptyList();
|
||||
else if ( o instanceof List ) return (List<Object>)o;
|
||||
else return Collections.singletonList(o);
|
||||
|
|
@ -305,7 +295,7 @@ public final class BCF2Utils {
|
|||
|
||||
|
||||
@Requires({"stream != null", "bytesForEachInt > 0"})
|
||||
public final static int readInt(int bytesForEachInt, final InputStream stream) {
|
||||
public static int readInt(int bytesForEachInt, final InputStream stream) {
|
||||
switch ( bytesForEachInt ) {
|
||||
case 1: {
|
||||
return (byte)(readByte(stream));
|
||||
|
|
@ -323,7 +313,7 @@ public final class BCF2Utils {
|
|||
}
|
||||
}
|
||||
|
||||
public final static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
|
||||
public static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
|
||||
switch ( type.getSizeInBytes() ) {
|
||||
case 1:
|
||||
encodeStream.write(0xFF & value);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,80 @@
|
|||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Simple holder for BCF version information
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 8/2/12
|
||||
* Time: 2:16 PM
|
||||
*/
|
||||
public class BCFVersion {
|
||||
/**
|
||||
* BCF2 begins with the MAGIC info BCF_M_m where M is the major version (currently 2)
|
||||
* and m is the minor version, currently 1
|
||||
*/
|
||||
public static final byte[] MAGIC_HEADER_START = "BCF".getBytes();
|
||||
|
||||
final int majorVersion;
|
||||
final int minorVersion;
|
||||
|
||||
public BCFVersion(int majorVersion, int minorVersion) {
|
||||
this.majorVersion = majorVersion;
|
||||
this.minorVersion = minorVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the major version number of this BCF file
|
||||
*/
|
||||
public int getMajorVersion() {
|
||||
return majorVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the minor version number of this BCF file
|
||||
*/
|
||||
public int getMinorVersion() {
|
||||
return minorVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a new BCFVersion object describing the major and minor version of the BCF file in stream
|
||||
*
|
||||
* Note that stream must be at the very start of the file.
|
||||
*
|
||||
* @param stream
|
||||
* @return a BCFVersion object, or null if stream doesn't contain a BCF file
|
||||
* @throws IOException
|
||||
*/
|
||||
public static BCFVersion readBCFVersion(final InputStream stream) throws IOException {
|
||||
final byte[] magicBytes = new byte[MAGIC_HEADER_START.length];
|
||||
stream.read(magicBytes);
|
||||
if ( Arrays.equals(magicBytes, MAGIC_HEADER_START) ) {
|
||||
// we're a BCF file
|
||||
final int majorByte = stream.read();
|
||||
final int minorByte = stream.read();
|
||||
return new BCFVersion( majorByte, minorByte );
|
||||
} else
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write out the BCF magic information indicating this is a BCF file with corresponding major and minor versions
|
||||
* @param out
|
||||
* @throws IOException
|
||||
*/
|
||||
public void write(final OutputStream out) throws IOException {
|
||||
out.write(MAGIC_HEADER_START);
|
||||
out.write(getMajorVersion() & 0xFF);
|
||||
out.write(getMinorVersion() & 0xFF);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("BCF%d.%d", getMajorVersion(), getMinorVersion());
|
||||
}
|
||||
}
|
||||
|
|
@ -334,12 +334,14 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
* in this VC is returned as the set of alleles in the subContext, even if
|
||||
* some of those alleles aren't in the samples
|
||||
*
|
||||
* WARNING: BE CAREFUL WITH rederiveAllelesFromGenotypes UNLESS YOU KNOW WHAT YOU ARE DOING?
|
||||
*
|
||||
* @param sampleNames the sample names
|
||||
* @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples
|
||||
* @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples, true should be default
|
||||
* @return new VariantContext subsetting to just the given samples
|
||||
*/
|
||||
public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
|
||||
if ( sampleNames.containsAll(getSampleNames()) ) {
|
||||
if ( sampleNames.containsAll(getSampleNames()) && ! rederiveAllelesFromGenotypes ) {
|
||||
return this; // fast path when you don't have any work to do
|
||||
} else {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(this);
|
||||
|
|
@ -355,8 +357,18 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #subContextFromSamples(java.util.Set, boolean) with rederiveAllelesFromGenotypes = true
|
||||
*
|
||||
* @param sampleNames
|
||||
* @return
|
||||
*/
|
||||
public VariantContext subContextFromSamples(final Set<String> sampleNames) {
|
||||
return subContextFromSamples(sampleNames, true);
|
||||
}
|
||||
|
||||
public VariantContext subContextFromSample(String sampleName) {
|
||||
return subContextFromSamples(Collections.singleton(sampleName), true);
|
||||
return subContextFromSamples(Collections.singleton(sampleName));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCFVersion;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -83,6 +84,9 @@ import java.util.*;
|
|||
* @since 06/12
|
||||
*/
|
||||
class BCF2Writer extends IndexingVariantContextWriter {
|
||||
public static final int MAJOR_VERSION = 2;
|
||||
public static final int MINOR_VERSION = 1;
|
||||
|
||||
/**
|
||||
* If true, we will write out the undecoded raw bytes for a genotypes block, if it
|
||||
* is found in the input VC. This can be very dangerous as the genotype encoding
|
||||
|
|
@ -153,7 +157,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
writer.close();
|
||||
|
||||
final byte[] headerBytes = capture.toByteArray();
|
||||
outputStream.write(BCF2Utils.MAGIC_HEADER_LINE);
|
||||
new BCFVersion(MAJOR_VERSION, MINOR_VERSION).write(outputStream);
|
||||
BCF2Utils.encodeRawBytes(headerBytes.length, BCF2Type.INT32, outputStream);
|
||||
outputStream.write(headerBytes);
|
||||
} catch (IOException e) {
|
||||
|
|
|
|||
|
|
@ -24,9 +24,12 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.assertTrue;
|
||||
import static org.testng.Assert.fail;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMProgramRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
|
|
@ -36,6 +39,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
|||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.AfterMethod;
|
||||
|
|
@ -143,4 +147,73 @@ public class SAMDataSourceUnitTest extends BaseTest {
|
|||
fail("testLinearBreakIterateAll: We Should get a UserException.CouldNotReadInputFile exception");
|
||||
}
|
||||
}
|
||||
|
||||
/** Test that we clear program records when requested */
|
||||
@Test
|
||||
public void testRemoveProgramRecords() {
|
||||
logger.warn("Executing testRemoveProgramRecords");
|
||||
|
||||
// setup the data
|
||||
readers.add(new SAMReaderID(new File(b37GoodBAM),new Tags()));
|
||||
|
||||
// use defaults
|
||||
SAMDataSource data = new SAMDataSource(readers,
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
genomeLocParser,
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.SILENT,
|
||||
null,
|
||||
null,
|
||||
new ValidationExclusion(),
|
||||
new ArrayList<ReadFilter>(),
|
||||
false);
|
||||
|
||||
List<SAMProgramRecord> defaultProgramRecords = data.getHeader().getProgramRecords();
|
||||
assertTrue(defaultProgramRecords.size() != 0, "testRemoveProgramRecords: No program records found when using default constructor");
|
||||
|
||||
boolean removeProgramRecords = false;
|
||||
data = new SAMDataSource(readers,
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
genomeLocParser,
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.SILENT,
|
||||
null,
|
||||
null,
|
||||
new ValidationExclusion(),
|
||||
new ArrayList<ReadFilter>(),
|
||||
false,
|
||||
BAQ.CalculationMode.OFF,
|
||||
BAQ.QualityMode.DONT_MODIFY,
|
||||
null, // no BAQ
|
||||
null, // no BQSR
|
||||
(byte) -1,
|
||||
removeProgramRecords);
|
||||
|
||||
List<SAMProgramRecord> dontRemoveProgramRecords = data.getHeader().getProgramRecords();
|
||||
assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false");
|
||||
|
||||
removeProgramRecords = true;
|
||||
data = new SAMDataSource(readers,
|
||||
new ThreadAllocation(),
|
||||
null,
|
||||
genomeLocParser,
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.SILENT,
|
||||
null,
|
||||
null,
|
||||
new ValidationExclusion(),
|
||||
new ArrayList<ReadFilter>(),
|
||||
false,
|
||||
BAQ.CalculationMode.OFF,
|
||||
BAQ.QualityMode.DONT_MODIFY,
|
||||
null, // no BAQ
|
||||
null, // no BQSR
|
||||
(byte) -1,
|
||||
removeProgramRecords);
|
||||
|
||||
List<SAMProgramRecord> doRemoveProgramRecords = data.getHeader().getProgramRecords();
|
||||
assertTrue(doRemoveProgramRecords.isEmpty(), "testRemoveProgramRecords: program records not cleared when removeProgramRecords = true");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ import java.util.Arrays;
|
|||
import java.util.List;
|
||||
|
||||
public class VariantEvalIntegrationTest extends WalkerTest {
|
||||
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval/";
|
||||
private static String variantEvalTestDataRoot = privateTestDir + "VariantEval/";
|
||||
private static String fundamentalTestVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
|
||||
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.vcf";
|
||||
private static String fundamentalTestSNPsWithMLEVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.withMLE.vcf";
|
||||
|
|
@ -122,7 +122,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("e62a3bd9914d48e2bb2fb4f5dfc5ebc0")
|
||||
Arrays.asList("40abbc9be663aed8ee1158f832463ca8")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec);
|
||||
}
|
||||
|
|
@ -144,7 +144,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("087a2d9943c53e7f49663667c3305c7e")
|
||||
Arrays.asList("106a0e8753e839c0a2c030eb4b165fa9")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,9 +26,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
|
||||
"62f81e7d2082fbc71cae0101c27fefad", // tranches
|
||||
"b9709e4180e56abc691b208bd3e8626c", // recal file
|
||||
"75c178345f70ca2eb90205662fbdf968"); // cut VCF
|
||||
"f360ce3eb2b0b887301be917a9843e2b", // tranches
|
||||
"287fea5ea066bf3fdd71f5ce9b58eab3", // recal file
|
||||
"356b9570817b9389da71fbe991d8b2f5"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRTest")
|
||||
public Object[][] createData1() {
|
||||
|
|
|
|||
|
|
@ -351,7 +351,7 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
|||
public void testEncodingListOfString(List<String> strings, String expected) throws IOException {
|
||||
final String collapsed = BCF2Utils.collapseStringList(strings);
|
||||
Assert.assertEquals(collapsed, expected);
|
||||
Assert.assertEquals(BCF2Utils.exploreStringList(collapsed), strings);
|
||||
Assert.assertEquals(BCF2Utils.explodeStringList(collapsed), strings);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
||||
|
||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(""));
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("bdab26dd7648a806dbab01f64db2bdab"));
|
||||
executeTest("Test reading and writing 1000G Phase I SVs", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -77,7 +77,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
String testVCF = privateTestDir + "ex2.vcf";
|
||||
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
|
||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("a04a0fc22fedb516c663e56e51fc1e27"));
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("e8f721ce81e4fdadba13c5291027057f"));
|
||||
executeTest("Test writing samtools WEx BCF example", spec1);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -152,7 +152,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
|
|||
public void run(final VariantContext vc) {
|
||||
if ( samples == null )
|
||||
samples = new HashSet<String>(new ArrayList<String>(vc.getSampleNames()).subList(0, nSamplesToTake));
|
||||
VariantContext sub = vc.subContextFromSamples(samples, true);
|
||||
VariantContext sub = vc.subContextFromSamples(samples);
|
||||
sub.getNSamples();
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ class DataProcessingPipelineTest {
|
|||
" -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf",
|
||||
" -test ",
|
||||
" -p " + projectName).mkString
|
||||
spec.fileMD5s += testOut -> "021db3a52ee40d9193895213aaa05dd5"
|
||||
spec.fileMD5s += testOut -> "60d39ae909fdd049920b54e0965b6d3c"
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
|
|
@ -60,7 +60,7 @@ class DataProcessingPipelineTest {
|
|||
" -bwa /home/unix/carneiro/bin/bwa",
|
||||
" -bwape ",
|
||||
" -p " + projectName).mkString
|
||||
spec.fileMD5s += testOut -> "94e32ee974aa15d05ee7d3e99f2f8c8f"
|
||||
spec.fileMD5s += testOut -> "61ca3237afdfabf78ee27a5bb80dae59"
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ class PacbioProcessingPipelineTest {
|
|||
" -blasr ",
|
||||
" -test ",
|
||||
" -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf").mkString
|
||||
spec.fileMD5s += testOut -> "8b5f9e8e1ba5bd686e3084eaeea23889"
|
||||
spec.fileMD5s += testOut -> "61b06e8b78a93e6644657e6d38851084"
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue