Merge branch 'master' of ssh://gsa4.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2012-08-03 12:24:37 -04:00
commit 6f8e7692d4
34 changed files with 346 additions and 111 deletions

View File

@ -241,6 +241,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
samplesList.addAll( samples );
// initialize the UnifiedGenotyper Engine which is used to call into the exact model
UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; // the GLmodel isn't used by the HaplotypeCaller but it is dangerous to let the user change this argument
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC.clone(), logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling

View File

@ -1,4 +1,5 @@
library("ggplot2")
library("tools") #For compactPDF in R 2.13+
args <- commandArgs(TRUE)
data <- read.csv(args[1])

View File

@ -797,6 +797,14 @@ public class GenomeAnalysisEngine {
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
boolean removeProgramRecords = argCollection.removeProgramRecords || walker.getClass().isAnnotationPresent(RemoveProgramRecords.class);
if (argCollection.keepProgramRecords)
removeProgramRecords = false;
return new SAMDataSource(
samReaderIDs,
threadAllocation,
@ -813,7 +821,8 @@ public class GenomeAnalysisEngine {
getWalkerBAQQualityMode(),
refReader,
getBaseRecalibration(),
argCollection.defaultBaseQualities);
argCollection.defaultBaseQualities,
removeProgramRecords);
}
/**

View File

@ -249,6 +249,12 @@ public class GATKArgumentCollection {
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;
@Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we override the Walker's default and remove program records from the SAM header", required = false)
public boolean removeProgramRecords = false;
@Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Should we override the Walker's default and keep program records from the SAM header", required = false)
public boolean keepProgramRecords = false;
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false)
public ValidationExclusion.TYPE unsafe;

View File

@ -89,6 +89,11 @@ public class SAMDataSource {
*/
private final SAMFileReader.ValidationStringency validationStringency;
/**
* Do we want to remove the program records from this data source?
*/
private final boolean removeProgramRecords;
/**
* Store BAM indices for each reader present.
*/
@ -200,7 +205,8 @@ public class SAMDataSource {
BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ
null, // no BQSR
(byte) -1);
(byte) -1,
false);
}
/**
@ -233,7 +239,8 @@ public class SAMDataSource {
BAQ.QualityMode qmode,
IndexedFastaSequenceFile refReader,
BaseRecalibration bqsrApplier,
byte defaultBaseQualities) {
byte defaultBaseQualities,
boolean removeProgramRecords) {
this.readMetrics = new ReadMetrics();
this.genomeLocParser = genomeLocParser;
@ -249,6 +256,7 @@ public class SAMDataSource {
dispatcher = null;
validationStringency = strictness;
this.removeProgramRecords = removeProgramRecords;
if(readBufferSize != null)
ReadShard.setReadBufferSize(readBufferSize);
else {
@ -748,7 +756,7 @@ public class SAMDataSource {
private synchronized void createNewResource() {
if(allResources.size() > maxEntries)
throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use.");
SAMReaders readers = new SAMReaders(readerIDs, validationStringency);
SAMReaders readers = new SAMReaders(readerIDs, validationStringency, removeProgramRecords);
allResources.add(readers);
availableResources.add(readers);
}
@ -777,9 +785,11 @@ public class SAMDataSource {
/**
* Derive a new set of readers from the Reads metadata.
* @param readerIDs reads to load.
* TODO: validationStringency is not used here
* @param validationStringency validation stringency.
* @param removeProgramRecords indicate whether to clear program records from the readers
*/
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency, boolean removeProgramRecords) {
final int totalNumberOfFiles = readerIDs.size();
int readerNumber = 1;
final SimpleTimer timer = new SimpleTimer().start();
@ -790,6 +800,9 @@ public class SAMDataSource {
long lastTick = timer.currentTime();
for(final SAMReaderID readerID: readerIDs) {
final ReaderInitializer init = new ReaderInitializer(readerID).call();
if (removeProgramRecords) {
init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
}
if (threadAllocation.getNumIOThreads() > 0) {
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
}

View File

@ -89,9 +89,9 @@ public class GATKReport {
reader = new BufferedReader(new FileReader(file));
reportHeader = reader.readLine();
} catch (FileNotFoundException e) {
throw new ReviewedStingException("Could not open file : " + file);
throw new UserException.CouldNotReadInputFile(file, "it does not exist");
} catch (IOException e) {
throw new ReviewedStingException("Could not read file : " + file);
throw new UserException.CouldNotReadInputFile(file, e);
}

View File

@ -32,6 +32,7 @@ import java.util.List;
@PartitionBy(PartitionType.READ)
@ActiveRegionExtension(extension=50,maxRegion=1500)
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
@RemoveProgramRecords
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
@Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false)

View File

@ -19,6 +19,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
@PartitionBy(PartitionType.LOCUS)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
@RemoveProgramRecords
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context?
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {

View File

@ -0,0 +1,21 @@
package org.broadinstitute.sting.gatk.walkers;
/**
* Created with IntelliJ IDEA.
* User: thibault
* Date: 8/2/12
* Time: 1:58 PM
* To change this template use File | Settings | File Templates.
*/
import java.lang.annotation.*;
/**
* Indicates that program records should be removed from SAM headers by default for this walker
*/
@Documented
@Inherited
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.TYPE)
public @interface RemoveProgramRecords {
}

View File

@ -62,7 +62,7 @@ import java.util.ArrayList;
* This walker is designed to work as the first pass in a two-pass processing step. It does a by-locus traversal operating
* only at sites that are not in dbSNP. We assume that all reference mismatches we see are therefore errors and indicative
* of poor base quality. This walker generates tables based on various user-specified covariates (such as read group,
* reported quality score, cycle, and dinucleotide). Since there is a large amount of data one can then calculate an empirical
* reported quality score, cycle, and context). Since there is a large amount of data one can then calculate an empirical
* probability of error given the particular covariates seen at this site, where p(error) = num mismatches / num observations.
* The output file is a table (of the several covariate values, num observations, num mismatches, empirical quality score).
* <p>
@ -94,7 +94,7 @@ import java.util.ArrayList;
* <h2>Examples</h2>
* <pre>
* java -Xmx4g -jar GenomeAnalysisTK.jar \
* -T BaseQualityScoreRecalibrator \
* -T BaseRecalibrator \
* -I my_reads.bam \
* -R resources/Homo_sapiens_assembly18.fasta \
* -knownSites bundle/hg18/dbsnp_132.hg18.vcf \

View File

@ -288,7 +288,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
private VariantContext reduceVCToSamples(VariantContext vc, Set<String> samplesToPhase) {
// for ( String sample : samplesToPhase )
// logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() ));
VariantContext subvc = vc.subContextFromSamples(samplesToPhase, true);
VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
// logger.debug("original VC = " + vc);
// logger.debug("sub VC = " + subvc);
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);

View File

@ -43,7 +43,7 @@ public class GLBasedSampleSelector extends SampleSelector {
return true;
// want to include a site in the given samples if it is *likely* to be variant (via the EXACT model)
// first subset to the samples
VariantContext subContext = vc.subContextFromSamples(samples, true);
VariantContext subContext = vc.subContextFromSamples(samples);
// now check to see (using EXACT model) whether this should be variant
// do we want to apply a prior? maybe user-spec?

View File

@ -45,7 +45,7 @@ public class GTBasedSampleSelector extends SampleSelector{
if ( samples == null || samples.isEmpty() )
return true;
VariantContext subContext = vc.subContextFromSamples(samples, false);
VariantContext subContext = vc.subContextFromSamples(samples);
if ( subContext.isPolymorphicInSamples() ) {
return true;
}

View File

@ -500,7 +500,10 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
@Requires({"eval != null", "comp != null"})
private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) {
// find all of the matching comps
if ( comp.getType() == VariantContext.Type.NO_VARIATION || eval.getType() == VariantContext.Type.NO_VARIATION )
// if either of these are NO_VARIATION they are LENIENT matches
return EvalCompMatchType.LENIENT;
if ( comp.getType() != eval.getType() )
return EvalCompMatchType.NO_MATCH;

View File

@ -57,9 +57,12 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
}
}
public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (vc1 != null) updateTiTv(vc1, false);
if (vc2 != null) updateTiTv(vc2, true);
@Override
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (eval != null)
updateTiTv(eval, false);
if (comp != null)
updateTiTv(comp, true);
}
@Override

View File

@ -28,7 +28,7 @@ public class Novelty extends VariantStratifier implements StandardStratification
final Collection<VariantContext> knownComps = tracker.getValues(knowns, ref.getLocus());
for ( final VariantContext c : knownComps ) {
// loop over sites, looking for something that matches the type eval
if ( eval.getType() == c.getType() ) {
if ( eval.getType() == c.getType() || eval.getType() == VariantContext.Type.NO_VARIATION ) {
return KNOWN_STATES;
}
}

View File

@ -197,7 +197,9 @@ public class VariantEvalUtils {
* @return a new VariantContext with just the requested samples
*/
public VariantContext getSubsetOfVariantContext(VariantContext vc, Set<String> sampleNames) {
return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, false));
// if we want to preserve AC0 sites as polymorphic we need to not rederive alleles
final boolean deriveAlleles = variantEvalWalker.ignoreAC0Sites();
return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, deriveAlleles));
}
public VariantContext ensureAnnotations(final VariantContext vc, final VariantContext vcsub) {
@ -262,12 +264,8 @@ public class VariantEvalUtils {
// First, filter the VariantContext to represent only the samples for evaluation
VariantContext vcsub = vc;
if (subsetBySample && vc.hasGenotypes()) {
if ( variantEvalWalker.isSubsettingToSpecificSamples() )
vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
else
vcsub = ensureAnnotations(vc, vc);
}
if (subsetBySample && vc.hasGenotypes())
vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
if ((byFilter || !vcsub.isFiltered())) {
addMapping(mapping, VariantEval.getAllSampleName(), vcsub);

View File

@ -58,14 +58,11 @@ import java.util.*;
* to the desired level but also has the information necessary to pull out more variants for a higher sensitivity but a
* slightly lower quality level.
*
* <p>
* See <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration">the GATK wiki for a tutorial and example recalibration accuracy plots.</a>
*
* <h2>Input</h2>
* <p>
* The input raw variants to be recalibrated.
* <p>
* The recalibration table file in CSV format that was generated by the VariantRecalibrator walker.
* The recalibration table file in VCF format that was generated by the VariantRecalibrator walker.
* <p>
* The tranches file that was generated by the VariantRecalibrator walker.
*
@ -82,6 +79,7 @@ import java.util.*;
* --ts_filter_level 99.0 \
* -tranchesFile path/to/output.tranches \
* -recalFile path/to/output.recal \
* -mode SNP \
* -o path/to/output.recalibrated.filtered.vcf
* </pre>
*
@ -198,7 +196,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
for( final VariantContext vc : VCs ) {
if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
if( VariantDataManager.checkVariationClass( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
final VariantContext recalDatum = getMatchingRecalVC(vc, recals);
if( recalDatum == null ) {

View File

@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -273,11 +274,37 @@ public class VariantDataManager {
}
private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) {
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && checkVariationClass( evalVC, trainVC ) &&
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphicInSamples());
}
protected static boolean checkVariationClass( final VariantContext evalVC, final VariantContext trainVC ) {
switch( trainVC.getType() ) {
case SNP:
case MNP:
return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.SNP );
case INDEL:
case MIXED:
case SYMBOLIC:
return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.INDEL );
default:
return false;
}
}
protected static boolean checkVariationClass( final VariantContext evalVC, final VariantRecalibratorArgumentCollection.Mode mode ) {
switch( mode ) {
case SNP:
return evalVC.isSNP() || evalVC.isMNP();
case INDEL:
return evalVC.isIndel() || evalVC.isMixed() || evalVC.isSymbolic();
case BOTH:
return true;
default:
throw new ReviewedStingException( "Encountered unknown recal mode: " + mode );
}
}
public void writeOutRecalibrationTable( final VariantContextWriter recalWriter ) {
// we need to sort in coordinate order in order to produce a valid VCF
Collections.sort( data, new Comparator<VariantDatum>() {

View File

@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.R.RScriptExecutor;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
@ -48,7 +47,6 @@ import org.broadinstitute.sting.utils.io.Resource;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
import java.io.File;
import java.io.FileNotFoundException;
@ -65,7 +63,7 @@ import java.util.*;
* The purpose of the variant recalibrator is to assign a well-calibrated probability to each variant call in a call set.
* One can then create highly accurate call sets by filtering based on this single estimate for the accuracy of each call.
* The approach taken by variant quality score recalibration is to develop a continuous, covarying estimate of the relationship
* between SNP call annotations (QD, SB, HaplotypeScore, HRun, for example) and the the probability that a SNP is a true genetic
* between SNP call annotations (QD, MQ, HaplotypeScore, and ReadPosRankSum, for example) and the the probability that a SNP is a true genetic
* variant versus a sequencing or data processing artifact. This model is determined adaptively based on "true sites" provided
* as input, typically HapMap 3 sites and those sites found to be polymorphic on the Omni 2.5M SNP chip array. This adaptive
* error model can then be applied to both known and novel variation discovered in the call set of interest to evaluate the
@ -73,15 +71,9 @@ import java.util.*;
* the log odds ratio of being a true variant versus being false under the trained Gaussian mixture model.
*
* <p>
* NOTE: Please see our <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3">best practices wiki page</a> for our recommendations on which annotations to use for specific project designs.
*
* <p>
* NOTE: In order to create the model reporting plots Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version).
* See <a target="r-project" href="http://www.r-project.org">http://www.r-project.org</a> for more info on how to download and install R.
*
* <p>
* See <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration">the GATK wiki for a tutorial and example recalibration accuracy plots.</a>
*
* <h2>Input</h2>
* <p>
* The input raw variants to be recalibrated.
@ -90,7 +82,7 @@ import java.util.*;
*
* <h2>Output</h2>
* <p>
* A recalibration table file in CSV format that is used by the ApplyRecalibration walker.
* A recalibration table file in VCF format that is used by the ApplyRecalibration walker.
* <p>
* A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data.
*
@ -102,8 +94,9 @@ import java.util.*;
* -input NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf \
* -resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.b37.sites.vcf \
* -resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.b37.sites.vcf \
* -resource:dbsnp,known=true,training=false,truth=false,prior=8.0 dbsnp_132.b37.vcf \
* -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an MQ \
* -resource:dbsnp,known=true,training=false,truth=false,prior=6.0 dbsnp_135.b37.vcf \
* -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an FS -an MQ -an InbreedingCoeff \
* -mode SNP \
* -recalFile path/to/output.recal \
* -tranchesFile path/to/output.tranches \
* -rscriptFile path/to/output.plots.R
@ -187,9 +180,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
@Advanced
@Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false)
protected Boolean TRUST_ALL_POLYMORPHIC = false;
//@Hidden
//@Argument(fullName = "projectConsensus", shortName = "projectConsensus", doc = "Perform 1000G project consensus. This implies an extra prior factor based on the individual participant callsets passed in with consensus=true rod binding tags.", required = false)
//protected Boolean PERFORM_PROJECT_CONSENSUS = false;
/////////////////////////////
// Private Member Variables
@ -255,7 +245,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
for( final VariantContext vc : tracker.getValues(input, context.getLocation()) ) {
if( vc != null && ( vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters()) ) ) {
if( checkRecalibrationMode( vc, VRAC.MODE ) ) {
if( VariantDataManager.checkVariationClass( vc, VRAC.MODE ) ) {
final VariantDatum datum = new VariantDatum();
// Populate the datum with lots of fields from the VariantContext, unfortunately the VC is too big so we just pull in only the things we absolutely need.
@ -268,10 +258,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
// Loop through the training data sets and if they overlap this loci then update the prior and training status appropriately
dataManager.parseTrainingSets( tracker, context.getLocation(), vc, datum, TRUST_ALL_POLYMORPHIC );
double priorFactor = QualityUtils.qualToProb( datum.prior );
//if( PERFORM_PROJECT_CONSENSUS ) { // BUGBUG: need to resurrect this functionality?
// final double consensusPrior = QualityUtils.qualToProb( 1.0 + 5.0 * datum.consensusCount );
// priorFactor = 1.0 - ((1.0 - priorFactor) * (1.0 - consensusPrior));
//}
datum.prior = Math.log10( priorFactor ) - Math.log10( 1.0 - priorFactor );
mapList.add( datum );
@ -282,12 +268,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
return mapList;
}
public static boolean checkRecalibrationMode( final VariantContext vc, final VariantRecalibratorArgumentCollection.Mode mode ) {
return mode == VariantRecalibratorArgumentCollection.Mode.BOTH ||
(mode == VariantRecalibratorArgumentCollection.Mode.SNP && vc.isSNP()) ||
(mode == VariantRecalibratorArgumentCollection.Mode.INDEL && (vc.isIndel() || vc.isMixed()));
}
//---------------------------------------------------------------------------------------------------------------
//
// reduce

View File

@ -53,6 +53,11 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
private final static boolean FORBID_SYMBOLICS = false;
private final static int ALLOWED_MAJOR_VERSION = 2;
private final static int MIN_MINOR_VERSION = 1;
private BCFVersion bcfVersion = null;
private VCFHeader header = null;
/**
@ -131,8 +136,16 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream ) {
try {
// note that this reads the magic as well, and so does double duty
if ( ! BCF2Utils.startsWithBCF2Magic(inputStream) )
error("Input stream does not begin with BCF2 magic");
bcfVersion = BCFVersion.readBCFVersion(inputStream);
if ( bcfVersion == null )
error("Input stream does not contain a BCF encoded file; BCF magic header info not found");
if ( bcfVersion.getMajorVersion() != ALLOWED_MAJOR_VERSION )
error("BCF2Codec can only process BCF2 files, this file has major version " + bcfVersion.getMajorVersion());
if ( bcfVersion.getMinorVersion() < MIN_MINOR_VERSION )
error("BCF2Codec can only process BCF2 files with minor version >= " + MIN_MINOR_VERSION + " but this file has minor version " + bcfVersion.getMinorVersion());
logger.info("BCF version " + bcfVersion);
final int headerSizeInBytes = BCF2Utils.readInt(BCF2Type.INT32.getSizeInBytes(), inputStream);
@ -187,7 +200,8 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
FileInputStream fis = null;
try {
fis = new FileInputStream(path);
return BCF2Utils.startsWithBCF2Magic(fis);
final BCFVersion version = BCFVersion.readBCFVersion(fis);
return version != null && version.getMajorVersion() == ALLOWED_MAJOR_VERSION;
} catch ( FileNotFoundException e ) {
return false;
} catch ( IOException e ) {
@ -196,7 +210,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
try {
if ( fis != null ) fis.close();
} catch ( IOException e ) {
; // do nothing
// do nothing
}
}
}

View File

@ -202,7 +202,7 @@ public final class BCF2Decoder {
return null;
else {
final String s = new String(bytes, 0, goodLength);
return BCF2Utils.isCollapsedString(s) ? BCF2Utils.exploreStringList(s) : s;
return BCF2Utils.isCollapsedString(s) ? BCF2Utils.explodeStringList(s) : s;
}
} catch ( IOException e ) {
throw new ReviewedStingException("readByte failure", e);

View File

@ -41,8 +41,6 @@ import java.util.*;
* @since 5/12
*/
public final class BCF2Utils {
public static final byte[] MAGIC_HEADER_LINE = "BCF\2".getBytes();
public static final int MAX_ALLELES_IN_GENOTYPES = 127;
public static final int OVERFLOW_ELEMENT_MARKER = 15;
@ -75,68 +73,60 @@ public final class BCF2Utils {
*/
@Requires("header != null")
@Ensures({"result != null", "new HashSet(result).size() == result.size()"})
public final static ArrayList<String> makeDictionary(final VCFHeader header) {
public static ArrayList<String> makeDictionary(final VCFHeader header) {
final Set<String> seen = new HashSet<String>();
final ArrayList<String> dict = new ArrayList<String>();
boolean sawPASS = false;
// special case the special PASS field which doesn't show up in the FILTER field definitions
seen.add(VCFConstants.PASSES_FILTERS_v4);
dict.add(VCFConstants.PASSES_FILTERS_v4);
// set up the strings dictionary
for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) {
if ( line instanceof VCFIDHeaderLine && ! (line instanceof VCFContigHeaderLine) ) {
final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
if ( ! seen.contains(idLine.getID())) {
sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4);
dict.add(idLine.getID());
seen.add(idLine.getID());
}
}
}
if ( ! sawPASS )
dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
return dict;
}
@Requires({"nElements >= 0", "type != null"})
public final static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER);
byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F));
return typeByte;
}
@Ensures("result >= 0")
public final static int decodeSize(final byte typeDescriptor) {
public static int decodeSize(final byte typeDescriptor) {
return (0xF0 & typeDescriptor) >> 4;
}
@Ensures("result >= 0")
public final static int decodeTypeID(final byte typeDescriptor) {
public static int decodeTypeID(final byte typeDescriptor) {
return typeDescriptor & 0x0F;
}
@Ensures("result != null")
public final static BCF2Type decodeType(final byte typeDescriptor) {
public static BCF2Type decodeType(final byte typeDescriptor) {
return ID_TO_ENUM[decodeTypeID(typeDescriptor)];
}
public final static boolean sizeIsOverflow(final byte typeDescriptor) {
public static boolean sizeIsOverflow(final byte typeDescriptor) {
return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER;
}
@Requires("nElements >= 0")
public final static boolean willOverflow(final long nElements) {
public static boolean willOverflow(final long nElements) {
return nElements > MAX_INLINE_ELEMENTS;
}
public final static boolean startsWithBCF2Magic(final InputStream stream) throws IOException {
final byte[] magicBytes = new byte[BCF2Utils.MAGIC_HEADER_LINE.length];
stream.read(magicBytes);
return Arrays.equals(magicBytes, BCF2Utils.MAGIC_HEADER_LINE);
}
public final static byte readByte(final InputStream stream) {
public static byte readByte(final InputStream stream) {
// TODO -- shouldn't be capturing error here
try {
return (byte)(stream.read() & 0xFF);
@ -155,7 +145,7 @@ public final class BCF2Utils {
*/
@Requires({"strings != null", "strings.size() > 1"})
@Ensures("result != null")
public static final String collapseStringList(final List<String> strings) {
public static String collapseStringList(final List<String> strings) {
final StringBuilder b = new StringBuilder();
for ( final String s : strings ) {
if ( s != null ) {
@ -177,14 +167,14 @@ public final class BCF2Utils {
*/
@Requires({"collapsed != null", "isCollapsedString(collapsed)"})
@Ensures("result != null")
public static final List<String> exploreStringList(final String collapsed) {
public static List<String> explodeStringList(final String collapsed) {
assert isCollapsedString(collapsed);
final String[] exploded = collapsed.substring(1).split(",");
return Arrays.asList(exploded);
}
@Requires("s != null")
public static final boolean isCollapsedString(final String s) {
public static boolean isCollapsedString(final String s) {
return s.charAt(0) == ',';
}
@ -226,7 +216,7 @@ public final class BCF2Utils {
}
@Ensures("result.isIntegerType()")
public final static BCF2Type determineIntegerType(final int value) {
public static BCF2Type determineIntegerType(final int value) {
for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
if ( potentialType.withinRange(value) )
return potentialType;
@ -236,7 +226,7 @@ public final class BCF2Utils {
}
@Ensures("result.isIntegerType()")
public final static BCF2Type determineIntegerType(final int[] values) {
public static BCF2Type determineIntegerType(final int[] values) {
// literally a copy of the code below, but there's no general way to unify lists and arrays in java
BCF2Type maxType = BCF2Type.INT8;
for ( final int value : values ) {
@ -262,7 +252,7 @@ public final class BCF2Utils {
*/
@Requires({"t1.isIntegerType()","t2.isIntegerType()"})
@Ensures("result.isIntegerType()")
public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
public static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
switch ( t1 ) {
case INT8: return t2;
case INT16: return t2 == BCF2Type.INT32 ? t2 : t1;
@ -272,7 +262,7 @@ public final class BCF2Utils {
}
@Ensures("result.isIntegerType()")
public final static BCF2Type determineIntegerType(final List<Integer> values) {
public static BCF2Type determineIntegerType(final List<Integer> values) {
BCF2Type maxType = BCF2Type.INT8;
for ( final int value : values ) {
final BCF2Type type1 = determineIntegerType(value);
@ -297,7 +287,7 @@ public final class BCF2Utils {
* @param o
* @return
*/
public final static List<Object> toList(final Object o) {
public static List<Object> toList(final Object o) {
if ( o == null ) return Collections.emptyList();
else if ( o instanceof List ) return (List<Object>)o;
else return Collections.singletonList(o);
@ -305,7 +295,7 @@ public final class BCF2Utils {
@Requires({"stream != null", "bytesForEachInt > 0"})
public final static int readInt(int bytesForEachInt, final InputStream stream) {
public static int readInt(int bytesForEachInt, final InputStream stream) {
switch ( bytesForEachInt ) {
case 1: {
return (byte)(readByte(stream));
@ -323,7 +313,7 @@ public final class BCF2Utils {
}
}
public final static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
public static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
switch ( type.getSizeInBytes() ) {
case 1:
encodeStream.write(0xFF & value);

View File

@ -0,0 +1,80 @@
package org.broadinstitute.sting.utils.codecs.bcf2;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
/**
* Simple holder for BCF version information
*
* User: depristo
* Date: 8/2/12
* Time: 2:16 PM
*/
public class BCFVersion {
/**
* BCF2 begins with the MAGIC info BCF_M_m where M is the major version (currently 2)
* and m is the minor version, currently 1
*/
public static final byte[] MAGIC_HEADER_START = "BCF".getBytes();
final int majorVersion;
final int minorVersion;
public BCFVersion(int majorVersion, int minorVersion) {
this.majorVersion = majorVersion;
this.minorVersion = minorVersion;
}
/**
* @return the major version number of this BCF file
*/
public int getMajorVersion() {
return majorVersion;
}
/**
* @return the minor version number of this BCF file
*/
public int getMinorVersion() {
return minorVersion;
}
/**
* Return a new BCFVersion object describing the major and minor version of the BCF file in stream
*
* Note that stream must be at the very start of the file.
*
* @param stream
* @return a BCFVersion object, or null if stream doesn't contain a BCF file
* @throws IOException
*/
public static BCFVersion readBCFVersion(final InputStream stream) throws IOException {
final byte[] magicBytes = new byte[MAGIC_HEADER_START.length];
stream.read(magicBytes);
if ( Arrays.equals(magicBytes, MAGIC_HEADER_START) ) {
// we're a BCF file
final int majorByte = stream.read();
final int minorByte = stream.read();
return new BCFVersion( majorByte, minorByte );
} else
return null;
}
/**
* Write out the BCF magic information indicating this is a BCF file with corresponding major and minor versions
* @param out
* @throws IOException
*/
public void write(final OutputStream out) throws IOException {
out.write(MAGIC_HEADER_START);
out.write(getMajorVersion() & 0xFF);
out.write(getMinorVersion() & 0xFF);
}
@Override
public String toString() {
return String.format("BCF%d.%d", getMajorVersion(), getMinorVersion());
}
}

View File

@ -334,12 +334,14 @@ public class VariantContext implements Feature { // to enable tribble integratio
* in this VC is returned as the set of alleles in the subContext, even if
* some of those alleles aren't in the samples
*
* WARNING: BE CAREFUL WITH rederiveAllelesFromGenotypes UNLESS YOU KNOW WHAT YOU ARE DOING?
*
* @param sampleNames the sample names
* @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples
* @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples, true should be default
* @return new VariantContext subsetting to just the given samples
*/
public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
if ( sampleNames.containsAll(getSampleNames()) ) {
if ( sampleNames.containsAll(getSampleNames()) && ! rederiveAllelesFromGenotypes ) {
return this; // fast path when you don't have any work to do
} else {
VariantContextBuilder builder = new VariantContextBuilder(this);
@ -355,8 +357,18 @@ public class VariantContext implements Feature { // to enable tribble integratio
}
}
/**
* @see #subContextFromSamples(java.util.Set, boolean) with rederiveAllelesFromGenotypes = true
*
* @param sampleNames
* @return
*/
public VariantContext subContextFromSamples(final Set<String> sampleNames) {
return subContextFromSamples(sampleNames, true);
}
public VariantContext subContextFromSample(String sampleName) {
return subContextFromSamples(Collections.singleton(sampleName), true);
return subContextFromSamples(Collections.singleton(sampleName));
}
/**

View File

@ -31,6 +31,7 @@ import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
import org.broadinstitute.sting.utils.codecs.bcf2.BCFVersion;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -83,6 +84,9 @@ import java.util.*;
* @since 06/12
*/
class BCF2Writer extends IndexingVariantContextWriter {
public static final int MAJOR_VERSION = 2;
public static final int MINOR_VERSION = 1;
/**
* If true, we will write out the undecoded raw bytes for a genotypes block, if it
* is found in the input VC. This can be very dangerous as the genotype encoding
@ -153,7 +157,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
writer.close();
final byte[] headerBytes = capture.toByteArray();
outputStream.write(BCF2Utils.MAGIC_HEADER_LINE);
new BCFVersion(MAJOR_VERSION, MINOR_VERSION).write(outputStream);
BCF2Utils.encodeRawBytes(headerBytes.length, BCF2Type.INT32, outputStream);
outputStream.write(headerBytes);
} catch (IOException e) {

View File

@ -24,9 +24,12 @@
package org.broadinstitute.sting.gatk.datasources.reads;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMProgramRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.commandline.Tags;
@ -36,6 +39,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.AfterMethod;
@ -143,4 +147,73 @@ public class SAMDataSourceUnitTest extends BaseTest {
fail("testLinearBreakIterateAll: We Should get a UserException.CouldNotReadInputFile exception");
}
}
/** Test that we clear program records when requested */
@Test
public void testRemoveProgramRecords() {
logger.warn("Executing testRemoveProgramRecords");
// setup the data
readers.add(new SAMReaderID(new File(b37GoodBAM),new Tags()));
// use defaults
SAMDataSource data = new SAMDataSource(readers,
new ThreadAllocation(),
null,
genomeLocParser,
false,
SAMFileReader.ValidationStringency.SILENT,
null,
null,
new ValidationExclusion(),
new ArrayList<ReadFilter>(),
false);
List<SAMProgramRecord> defaultProgramRecords = data.getHeader().getProgramRecords();
assertTrue(defaultProgramRecords.size() != 0, "testRemoveProgramRecords: No program records found when using default constructor");
boolean removeProgramRecords = false;
data = new SAMDataSource(readers,
new ThreadAllocation(),
null,
genomeLocParser,
false,
SAMFileReader.ValidationStringency.SILENT,
null,
null,
new ValidationExclusion(),
new ArrayList<ReadFilter>(),
false,
BAQ.CalculationMode.OFF,
BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ
null, // no BQSR
(byte) -1,
removeProgramRecords);
List<SAMProgramRecord> dontRemoveProgramRecords = data.getHeader().getProgramRecords();
assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false");
removeProgramRecords = true;
data = new SAMDataSource(readers,
new ThreadAllocation(),
null,
genomeLocParser,
false,
SAMFileReader.ValidationStringency.SILENT,
null,
null,
new ValidationExclusion(),
new ArrayList<ReadFilter>(),
false,
BAQ.CalculationMode.OFF,
BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ
null, // no BQSR
(byte) -1,
removeProgramRecords);
List<SAMProgramRecord> doRemoveProgramRecords = data.getHeader().getProgramRecords();
assertTrue(doRemoveProgramRecords.isEmpty(), "testRemoveProgramRecords: program records not cleared when removeProgramRecords = true");
}
}

View File

@ -34,7 +34,7 @@ import java.util.Arrays;
import java.util.List;
public class VariantEvalIntegrationTest extends WalkerTest {
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval/";
private static String variantEvalTestDataRoot = privateTestDir + "VariantEval/";
private static String fundamentalTestVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.vcf";
private static String fundamentalTestSNPsWithMLEVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.withMLE.vcf";
@ -122,7 +122,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("e62a3bd9914d48e2bb2fb4f5dfc5ebc0")
Arrays.asList("40abbc9be663aed8ee1158f832463ca8")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec);
}
@ -144,7 +144,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("087a2d9943c53e7f49663667c3305c7e")
Arrays.asList("106a0e8753e839c0a2c030eb4b165fa9")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec);
}

View File

@ -26,9 +26,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
}
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
"62f81e7d2082fbc71cae0101c27fefad", // tranches
"b9709e4180e56abc691b208bd3e8626c", // recal file
"75c178345f70ca2eb90205662fbdf968"); // cut VCF
"f360ce3eb2b0b887301be917a9843e2b", // tranches
"287fea5ea066bf3fdd71f5ce9b58eab3", // recal file
"356b9570817b9389da71fbe991d8b2f5"); // cut VCF
@DataProvider(name = "VRTest")
public Object[][] createData1() {

View File

@ -351,7 +351,7 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
public void testEncodingListOfString(List<String> strings, String expected) throws IOException {
final String collapsed = BCF2Utils.collapseStringList(strings);
Assert.assertEquals(collapsed, expected);
Assert.assertEquals(BCF2Utils.exploreStringList(collapsed), strings);
Assert.assertEquals(BCF2Utils.explodeStringList(collapsed), strings);
}
// -----------------------------------------------------------------

View File

@ -57,7 +57,7 @@ public class VCFIntegrationTest extends WalkerTest {
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(""));
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("bdab26dd7648a806dbab01f64db2bdab"));
executeTest("Test reading and writing 1000G Phase I SVs", spec1);
}
@ -77,7 +77,7 @@ public class VCFIntegrationTest extends WalkerTest {
String testVCF = privateTestDir + "ex2.vcf";
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("a04a0fc22fedb516c663e56e51fc1e27"));
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("e8f721ce81e4fdadba13c5291027057f"));
executeTest("Test writing samtools WEx BCF example", spec1);
}

View File

@ -152,7 +152,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
public void run(final VariantContext vc) {
if ( samples == null )
samples = new HashSet<String>(new ArrayList<String>(vc.getSampleNames()).subList(0, nSamplesToTake));
VariantContext sub = vc.subContextFromSamples(samples, true);
VariantContext sub = vc.subContextFromSamples(samples);
sub.getNSamples();
}
};

View File

@ -41,7 +41,7 @@ class DataProcessingPipelineTest {
" -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf",
" -test ",
" -p " + projectName).mkString
spec.fileMD5s += testOut -> "021db3a52ee40d9193895213aaa05dd5"
spec.fileMD5s += testOut -> "60d39ae909fdd049920b54e0965b6d3c"
PipelineTest.executeTest(spec)
}
@ -60,7 +60,7 @@ class DataProcessingPipelineTest {
" -bwa /home/unix/carneiro/bin/bwa",
" -bwape ",
" -p " + projectName).mkString
spec.fileMD5s += testOut -> "94e32ee974aa15d05ee7d3e99f2f8c8f"
spec.fileMD5s += testOut -> "61ca3237afdfabf78ee27a5bb80dae59"
PipelineTest.executeTest(spec)
}

View File

@ -40,7 +40,7 @@ class PacbioProcessingPipelineTest {
" -blasr ",
" -test ",
" -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf").mkString
spec.fileMD5s += testOut -> "8b5f9e8e1ba5bd686e3084eaeea23889"
spec.fileMD5s += testOut -> "61b06e8b78a93e6644657e6d38851084"
PipelineTest.executeTest(spec)
}
}