Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2012-06-22 10:42:49 -04:00
commit 0650b349d7
283 changed files with 1520 additions and 1675080 deletions

View File

@ -59,7 +59,7 @@
<!-- Commons Dependencies -->
<dependency org="org.apache.commons" name="commons-email" rev="1.2"/>
<dependency org="org.apache.commons" name="commons-jexl" rev="2.0"/>
<dependency org="org.apache.commons" name="commons-jexl" rev="2.1.1"/>
<dependency org="commons-lang" name="commons-lang" rev="2.5"/>
<dependency org="commons-logging" name="commons-logging" rev="1.1.1"/>
<dependency org="commons-io" name="commons-io" rev="2.1"/>

View File

@ -347,6 +347,9 @@ public class GATKArgumentCollection {
public boolean USE_SLOW_GENOTYPES = false;
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
@Argument(fullName="allowMissingVCFHeaders",shortName = "allowMissingVCFHeaders",doc="If provided, the GATK will write out VCF files that contain INFO, FILTER, and FORMAT fields not found in the VCF header",required=false)
public boolean allowMissingVCFHeaders = false;
/**
* The file pointed to by this argument must be a VCF file. The GATK will read in just the header of this file
* and then use the INFO, FORMAT, and FILTER field values from this file to repair the header file of any other

View File

@ -74,7 +74,8 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
else if ( stub.getOutputStream() != null ) {
this.file = null;
this.stream = stub.getOutputStream();
writer = VariantContextWriterFactory.create(stream, stub.getMasterSequenceDictionary(), stub.getWriterOptions(false));
writer = VariantContextWriterFactory.create(stream,
stub.getMasterSequenceDictionary(), stub.getWriterOptions(false));
}
else
throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");

View File

@ -183,6 +183,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
List<Options> options = new ArrayList<Options>();
if ( doNotWriteGenotypes ) options.add(Options.DO_NOT_WRITE_GENOTYPES);
if ( engine.getArguments().allowMissingVCFHeaders ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY);
return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options);

View File

@ -55,9 +55,10 @@ import java.util.*;
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
public static final String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
public static final VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
public static final VCFInfoHeaderLine[] descriptions = {
VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY),
VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY),
VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY) };
private Set<String> founderIds = new HashSet<String>();

View File

@ -8,8 +8,8 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -68,5 +68,7 @@ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnno
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.DEPTH_KEY); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered")); }
public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList(VCFStandardHeaderLines.getInfoLine(getKeyNames().get(0)));
}
}

View File

@ -6,10 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Allele;
@ -136,11 +133,6 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); }
public List<VCFFormatHeaderLine> getDescriptions() {
return Arrays.asList(
new VCFFormatHeaderLine(
getKeyNames().get(0),
VCFHeaderLineCount.UNBOUNDED,
VCFHeaderLineType.Integer,
"Allelic depths for the ref and alt alleles in the order listed"));
return Arrays.asList(VCFStandardHeaderLines.getFormatLine(getKeyNames().get(0)));
}
}

View File

@ -7,8 +7,8 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -44,5 +44,7 @@ public class MappingQualityZero extends InfoFieldAnnotation implements StandardA
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Total Mapping Quality Zero Reads")); }
public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList(VCFStandardHeaderLines.getInfoLine(getKeyNames().get(0)));
}
}

View File

@ -63,7 +63,9 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
public List<String> getKeyNames() { return Arrays.asList("QD"); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); }
public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth"));
}
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )

View File

@ -10,8 +10,8 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnota
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -85,5 +85,7 @@ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAn
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.RMS_MAPPING_QUALITY_KEY); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "RMS Mapping Quality")); }
public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList(VCFStandardHeaderLines.getInfoLine(getKeyNames().get(0)));
}
}

View File

@ -68,9 +68,10 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
}
public static final String[] keyNames = {STR_PRESENT, REPEAT_UNIT_KEY,REPEATS_PER_ALLELE_KEY };
public static final VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(STR_PRESENT, 1, VCFHeaderLineType.Flag, "Variant is a short tandem repeat"),
public static final VCFInfoHeaderLine[] descriptions = {
new VCFInfoHeaderLine(STR_PRESENT, 0, VCFHeaderLineType.Flag, "Variant is a short tandem repeat"),
new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"),
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };
new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };
public List<String> getKeyNames() {
return Arrays.asList(keyNames);

View File

@ -41,8 +41,8 @@ import java.util.*;
public class VariantAnnotatorEngine {
private List<InfoFieldAnnotation> requestedInfoAnnotations;
private List<GenotypeAnnotation> requestedGenotypeAnnotations;
private List<InfoFieldAnnotation> requestedInfoAnnotations = Collections.emptyList();
private List<GenotypeAnnotation> requestedGenotypeAnnotations = Collections.emptyList();
private List<VAExpression> requestedExpressions = new ArrayList<VAExpression>();
private final HashMap<RodBinding<VariantContext>, String> dbAnnotations = new HashMap<RodBinding<VariantContext>, String>();
@ -164,8 +164,12 @@ public class VariantAnnotatorEngine {
descriptions.addAll(annotation.getDescriptions());
for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations )
descriptions.addAll(annotation.getDescriptions());
for ( String db : dbAnnotations.values() )
descriptions.add(new VCFInfoHeaderLine(db, 0, VCFHeaderLineType.Flag, (db.equals(VCFConstants.DBSNP_KEY) ? "dbSNP" : db) + " Membership"));
for ( String db : dbAnnotations.values() ) {
if ( VCFStandardHeaderLines.getInfoLine(db, false) != null )
descriptions.add(VCFStandardHeaderLines.getInfoLine(db));
else
descriptions.add(new VCFInfoHeaderLine(db, 0, VCFHeaderLineType.Flag, db + " Membership"));
}
return descriptions;
}
@ -203,8 +207,9 @@ public class VariantAnnotatorEngine {
// go through all the requested info annotationTypes
for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
Map<String, Object> annotationsFromCurrentType = ((ActiveRegionBasedAnnotation)annotationType).annotate(stratifiedContexts, vc);
if ( annotationsFromCurrentType != null )
if ( annotationsFromCurrentType != null ) {
infoAnnotations.putAll(annotationsFromCurrentType);
}
}
// generate a new annotated VC
@ -216,11 +221,11 @@ public class VariantAnnotatorEngine {
if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) {
final String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), ref.getLocus()), vc.getType());
// put the DB key into the INFO field
infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null);
// add the ID if appropriate
if ( rsID != null ) {
// put the DB key into the INFO field
infoAnnotations.put(VCFConstants.DBSNP_KEY, true);
if ( vc.emptyID() ) {
vc = new VariantContextBuilder(vc).id(rsID).make();
} else if ( walker.alwaysAppendDbsnpId() && vc.getID().indexOf(rsID) == -1 ) {
@ -236,7 +241,8 @@ public class VariantAnnotatorEngine {
break;
}
}
infoAnnotations.put(dbSet.getValue(), overlapsComp);
if ( overlapsComp )
infoAnnotations.put(dbSet.getValue(), overlapsComp);
}
}

View File

@ -128,13 +128,13 @@ class ThresHolder {
Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
// INFO fields for overall data
headerLines.add(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
headerLines.add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
headerLines.add(new VCFInfoHeaderLine("AVG_INTERVAL_DP", 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
// FORMAT fields for each genotype
// todo -- find the appropriate VCF constants
headerLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
headerLines.add(new VCFFormatHeaderLine("AVG_INTERVAL_DP", 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution."));
headerLines.add(new VCFFormatHeaderLine("MED", 1, VCFHeaderLineType.Float, "Median of depth distribution."));
headerLines.add(new VCFFormatHeaderLine("Q3", 1, VCFHeaderLineType.Float, "Upper Quartile of depth Distribution."));

View File

@ -65,6 +65,7 @@ public class VCFDiffableReader implements DiffableReader {
br.close();
// must be read as state is stored in reader itself
AbstractVCFCodec.disableOnTheFlyModifications();
FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false);
VCFHeader header = (VCFHeader)reader.getHeader();
for ( VCFHeaderLine headerLine : header.getMetaData() ) {
@ -97,7 +98,9 @@ public class VCFDiffableReader implements DiffableReader {
vcRoot.add("REF", vc.getReference());
vcRoot.add("ALT", vc.getAlternateAlleles());
vcRoot.add("QUAL", vc.hasLog10PError() ? vc.getLog10PError() * -10 : VCFConstants.MISSING_VALUE_v4);
vcRoot.add("FILTER", vc.getFilters());
vcRoot.add("FILTER", ! vc.filtersWereApplied() // needs null to differentiate between PASS and .
? VCFConstants.MISSING_VALUE_v4
: ( vc.getFilters().isEmpty() ? VCFConstants.PASSES_FILTERS_v4 : vc.getFilters()) );
// add info fields
for (Map.Entry<String, Object> attribute : vc.getAttributes().entrySet()) {

View File

@ -56,9 +56,10 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
public enum Model {
SNP,
INDEL,
BOTH,
POOLSNP,
POOLINDEL,
BOTH
POOLBOTH
}
public enum GENOTYPING_MODE {

View File

@ -241,7 +241,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, VariantContextUtils.DEFAULT_PLOIDY);
// initialize the header
Set<VCFHeaderLine> headerInfo = getHeaderInfo();
Set<VCFHeaderLine> headerInfo = getHeaderInfo(UAC, annotationEngine, dbsnp);
// invoke initialize() method on each of the annotation classes, allowing them to add their own header lines
// and perform any necessary initialization/validation steps
@ -250,49 +250,45 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
writer.writeHeader(new VCFHeader(headerInfo, samples));
}
private Set<VCFHeaderLine> getHeaderInfo() {
public static Set<VCFHeaderLine> getHeaderInfo(final UnifiedArgumentCollection UAC,
final VariantAnnotatorEngine annotationEngine,
final DbsnpArgumentCollection dbsnp) {
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
// all annotation fields from VariantAnnotatorEngine
headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
if ( annotationEngine != null )
headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
// annotation (INFO) fields from UnifiedGenotyper
if ( !UAC.NO_SLOD )
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias"));
VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true, VCFConstants.STRAND_BIAS_KEY);
if ( UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED )
headerInfo.add(new VCFInfoHeaderLine(UnifiedGenotyperEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY, 1, VCFHeaderLineType.Integer, "Number of alternate alleles discovered (but not necessarily genotyped) at this site"));
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.DOWNSAMPLED_KEY, 0, VCFHeaderLineType.Flag, "Were any of the samples downsampled?"));
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.MLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed"));
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed"));
VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true,
VCFConstants.DOWNSAMPLED_KEY,
VCFConstants.MLE_ALLELE_COUNT_KEY,
VCFConstants.MLE_ALLELE_FREQUENCY_KEY);
// also, check to see whether comp rods were included
if ( dbsnp.dbsnp.isBound() )
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.DBSNP_KEY, 0, VCFHeaderLineType.Flag, "dbSNP Membership"));
if ( dbsnp != null && dbsnp.dbsnp.isBound() )
VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true, VCFConstants.DBSNP_KEY);
// FORMAT fields
headerInfo.addAll(getSupportedHeaderStrings());
VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true,
VCFConstants.GENOTYPE_KEY,
VCFConstants.GENOTYPE_QUALITY_KEY,
VCFConstants.DEPTH_KEY,
VCFConstants.GENOTYPE_PL_KEY);
// FILTER fields
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING )
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
// FILTER fields are added unconditionally as it's not always 100% certain the circumstances
// where the filters are used. For example, in emitting all sites the lowQual field is used
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
return headerInfo;
}
/**
* return a set of supported format lines; what we currently support for output in the genotype fields of a VCF
* @return a set of VCF format lines
*/
private static Set<VCFFormatHeaderLine> getSupportedHeaderStrings() {
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "Genotype Quality"));
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
return result;
}
/**
* Compute at a given locus.
*

View File

@ -316,7 +316,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
// first, the basic info
headerInfo.add(new VCFHeaderLine("source", "SomaticIndelDetector"));
headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
headerInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));
// FORMAT and INFO fields
// headerInfo.addAll(VCFUtils.getSupportedHeaderStrings());

View File

@ -198,8 +198,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
// Variables
private Set<SortableJexlVCMatchExp> jexlExpressions = new TreeSet<SortableJexlVCMatchExp>();
private Set<String> sampleNamesForEvaluation = new TreeSet<String>();
private Set<String> sampleNamesForStratification = new TreeSet<String>();
private boolean isSubsettingSamples;
private Set<String> sampleNamesForEvaluation = new LinkedHashSet<String>();
private Set<String> sampleNamesForStratification = new LinkedHashSet<String>();
// important stratifications
private boolean byFilterIsEnabled = false;
@ -249,8 +250,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), evals);
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
// Load the sample list
sampleNamesForEvaluation.addAll(SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS));
// Load the sample list, using an intermediate tree set to sort the samples
final Set<String> allSampleNames = SampleUtils.getSamplesFromCommandLineInput(vcfSamples);
sampleNamesForEvaluation.addAll(new TreeSet<String>(SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS)));
isSubsettingSamples = ! sampleNamesForEvaluation.containsAll(allSampleNames);
if (Arrays.asList(STRATIFICATIONS_TO_USE).contains("Sample")) {
sampleNamesForStratification.addAll(sampleNamesForEvaluation);
@ -571,6 +574,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
public List<RodBinding<VariantContext>> getEvals() { return evals; }
public boolean isSubsettingToSpecificSamples() { return isSubsettingSamples; }
public Set<String> getSampleNamesForEvaluation() { return sampleNamesForEvaluation; }
public Set<String> getSampleNamesForStratification() { return sampleNamesForStratification; }

View File

@ -28,8 +28,6 @@ import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.StandardEval;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
@ -37,13 +35,13 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.Require
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.StandardStratification;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import java.lang.reflect.Field;
import java.util.*;
public class VariantEvalUtils {
@ -199,18 +197,32 @@ public class VariantEvalUtils {
* @return a new VariantContext with just the requested samples
*/
public VariantContext getSubsetOfVariantContext(VariantContext vc, Set<String> sampleNames) {
VariantContext vcsub = vc.subContextFromSamples(sampleNames, false);
VariantContextBuilder builder = new VariantContextBuilder(vcsub);
return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, false));
}
public VariantContext ensureAnnotations(final VariantContext vc, final VariantContext vcsub) {
final int originalAlleleCount = vc.getHetCount() + 2 * vc.getHomVarCount();
final int newAlleleCount = vcsub.getHetCount() + 2 * vcsub.getHomVarCount();
final boolean isSingleton = originalAlleleCount == newAlleleCount && newAlleleCount == 1;
final boolean hasChrCountAnnotations = vcsub.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) &&
vcsub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) &&
vcsub.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY);
if (originalAlleleCount == newAlleleCount && newAlleleCount == 1) {
builder.attribute(VariantEvalWalker.IS_SINGLETON_KEY, true);
if ( ! isSingleton && hasChrCountAnnotations ) {
// nothing to update
return vcsub;
} else {
// have to do the work
VariantContextBuilder builder = new VariantContextBuilder(vcsub);
if ( isSingleton )
builder.attribute(VariantEvalWalker.IS_SINGLETON_KEY, true);
if ( ! hasChrCountAnnotations )
VariantContextUtils.calculateChromosomeCounts(builder, true);
return builder.make();
}
VariantContextUtils.calculateChromosomeCounts(builder, true);
return builder.make();
}
/**
@ -250,8 +262,11 @@ public class VariantEvalUtils {
// First, filter the VariantContext to represent only the samples for evaluation
VariantContext vcsub = vc;
if (subsetBySample && vc.hasGenotypes() && vc.hasGenotypes(variantEvalWalker.getSampleNamesForEvaluation())) {
vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
if (subsetBySample && vc.hasGenotypes()) {
if ( variantEvalWalker.isSubsettingToSpecificSamples() )
vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
else
vcsub = ensureAnnotations(vc, vc);
}
if ((byFilter || !vcsub.isFiltered())) {

View File

@ -150,8 +150,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
// setup the header fields
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames));
hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "Log odds ratio of being a true variant versus being false under the trained gaussian mixture model"));
hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.CULPRIT_KEY, 1, VCFHeaderLineType.String, "The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out"));
addVQSRStandardHeaderLines(hInfo);
final TreeSet<String> samples = new TreeSet<String>();
samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames));
@ -173,6 +172,12 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
vcfWriter.writeHeader(vcfHeader);
}
public static final void addVQSRStandardHeaderLines(final Set<VCFHeaderLine> hInfo) {
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "Log odds ratio of being a true variant versus being false under the trained gaussian mixture model"));
hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.CULPRIT_KEY, 1, VCFHeaderLineType.String, "The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out"));
}
//---------------------------------------------------------------------------------------------------------------
//
// map

View File

@ -37,7 +37,9 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.R.RScriptExecutor;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.io.Resource;
@ -229,7 +231,10 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
throw new UserException.CommandLineException( "No truth set found! Please provide sets of known polymorphic loci marked with the truth=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
}
recalWriter.writeHeader( new VCFHeader() );
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
ApplyRecalibration.addVQSRStandardHeaderLines(hInfo);
recalWriter.writeHeader( new VCFHeader(hInfo) );
}
//---------------------------------------------------------------------------------------------------------------

View File

@ -129,7 +129,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
.attribute("OriginalStart", fromInterval.getStart()).make();
}
VariantContext newVC = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, false);
VariantContext newVC = VariantContextUtils.createVariantContextWithPaddedAlleles(vc);
if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(),

View File

@ -34,9 +34,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.text.ListFileUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -202,6 +200,9 @@ public class SelectHeaders extends RodWalker<Integer, Integer> implements TreeRe
// Remove any excluded headers.
if (XLheaderNames != null)
selectedHeaders = ListFileUtils.excludeMatching(selectedHeaders, headerKey, XLheaderNames, true);
// always include the contig lines
selectedHeaders = VCFUtils.withUpdatedContigsAsLines(selectedHeaders, getToolkit().getArguments().referenceFile, getToolkit().getMasterSequenceDictionary());
return selectedHeaders;
}

View File

@ -36,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.SampleUtils;
@ -427,13 +428,12 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
headerLines.add(new VCFHeaderLine("source", "SelectVariants"));
if (KEEP_ORIGINAL_CHR_COUNTS) {
headerLines.add(new VCFFormatHeaderLine("AC_Orig", 1, VCFHeaderLineType.Integer, "Original AC"));
headerLines.add(new VCFFormatHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF"));
headerLines.add(new VCFFormatHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN"));
headerLines.add(new VCFInfoHeaderLine("AC_Orig", 1, VCFHeaderLineType.Integer, "Original AC"));
headerLines.add(new VCFInfoHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF"));
headerLines.add(new VCFInfoHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN"));
}
headerLines.addAll(Arrays.asList(ChromosomeCounts.descriptions));
headerLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Depth of coverage"));
vcfWriter.writeHeader(new VCFHeader(headerLines, samples));
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY));
for (int i = 0; i < SELECT_EXPRESSIONS.size(); i++) {
// It's not necessary that the user supply select names for the JEXL expressions, since those
@ -469,6 +469,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
UAC.NO_SLOD = true;
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
headerLines.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null));
}
/** load in the IDs file to a hashset for matching */
@ -483,6 +484,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
throw new UserException.CouldNotReadInputFile(rsIDFile, e);
}
}
vcfWriter.writeHeader(new VCFHeader(headerLines, samples));
}
/**

View File

@ -170,8 +170,8 @@ public class VariantValidationAssessor extends RodWalker<VariantContext,Integer>
hInfo.add(new VCFInfoHeaderLine("HetPct", 1, VCFHeaderLineType.Float, "Percent of heterozygous genotypes"));
hInfo.add(new VCFInfoHeaderLine("HomVarPct", 1, VCFHeaderLineType.Float, "Percent homozygous variant genotypes"));
hInfo.add(new VCFInfoHeaderLine("HW", 1, VCFHeaderLineType.Float, "Phred-scaled Hardy-Weinberg violation p-value"));
hInfo.add(new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"));
hInfo.add(new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes"));
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY));
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY));
hInfo.add(new VCFFilterHeaderLine("HardyWeinbergViolation", "The validation is in Hardy-Weinberg violation"));
hInfo.add(new VCFFilterHeaderLine("HighNoCallRate", "The validation no-call rate is too high"));
hInfo.add(new VCFFilterHeaderLine("TooManyHomVars", "The validation homozygous variant rate is too high"));

View File

@ -232,7 +232,8 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
firstEntry = false;
else
sb.append("\t");
sb.append(sample);
// spaces in sample names are legal but wreak havoc in R data frames
sb.append(sample.replace(" ","_"));
sb.append(".");
sb.append(gf);
}
@ -247,7 +248,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
}
for ( final String sample : samples ) {
for ( final String gf : genotypeFieldsToTake ) {
out.println(String.format("%d\t%s\t%s\t%s", nRecords, sample, gf, record.get(index++)));
out.println(String.format("%d\t%s\t%s\t%s", nRecords, sample.replace(" ","_"), gf, record.get(index++)));
}
}
}

View File

@ -219,8 +219,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
// setup the header fields
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName())));
//hInfo.add(new VCFHeaderLine("source", "VariantsToVCF"));
//hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getID()));
hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));
allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY);
for ( VCFHeaderLine field : hInfo ) {

View File

@ -150,8 +150,7 @@ public class SampleUtils {
// iterate to get all of the sample names
for ( Map.Entry<String, VCFHeader> pair : VCFUtils.getVCFHeadersFromRods(toolkit).entrySet() ) {
Set<String> vcfSamples = pair.getValue().getGenotypeSamples();
for ( String sample : vcfSamples )
for ( String sample : pair.getValue().getGenotypeSamples() )
addUniqueSample(samples, sampleOverlapMap, rodNamesToSampleNames, sample, pair.getKey());
}
}

View File

@ -422,13 +422,24 @@ public class ReadClipper {
/**
* Reverts only soft clipped bases with quality score greater than or equal to minQual
*
* Note: Will write a temporary field with the number of soft clips that were undone on each side (left: 'SL', right: 'SR')
* todo -- Note: Will write a temporary field with the number of soft clips that were undone on each side (left: 'SL', right: 'SR') -- THIS HAS BEEN REMOVED TEMPORARILY SHOULD HAPPEN INSIDE THE CLIPPING ROUTINE!
*
* @param read the read
* @param minQual the mininum base quality score to revert the base (inclusive)
* @return the read with high quality soft clips reverted
* @return a new read with high quality soft clips reverted
*/
public static GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read, byte minQual) {
return revertSoftClippedBases(hardClipLowQualitySoftClips(read, minQual));
}
/**
* Hard clips away soft clipped bases that are below the given quality threshold
*
* @param read the read
* @param minQual the mininum base quality score to revert the base (inclusive)
* @return a new read without low quality soft clipped bases
*/
public static GATKSAMRecord hardClipLowQualitySoftClips(GATKSAMRecord read, byte minQual) {
int nLeadingSoftClips = read.getAlignmentStart() - read.getSoftStart();
if (read.isEmpty() || nLeadingSoftClips > read.getReadLength())
return GATKSAMRecord.emptyRead(read);
@ -457,17 +468,12 @@ public class ReadClipper {
}
GATKSAMRecord clippedRead = read;
if (right >= 0) {
if (right + 1 < clippedRead.getReadLength())
clippedRead = hardClipByReadCoordinates(clippedRead, right+1, clippedRead.getReadLength()-1); // first we hard clip the low quality soft clips on the left tail
clippedRead.setTemporaryAttribute("SR", nTailingSoftClips - (read.getReadLength() - right - 1)); // keep track of how may bases to 're-softclip' after processing
}
if (left >= 0) {
if (left - 1 > 0)
clippedRead = hardClipByReadCoordinates(clippedRead, 0, left-1); // then we hard clip the low quality soft clips on the right tail
clippedRead.setTemporaryAttribute("SL", nLeadingSoftClips - left); // keep track of how may bases to 're-softclip' after processing
}
return revertSoftClippedBases(clippedRead); // now that we have only good bases in the soft clips, we can revert them all
if (right >= 0 && right + 1 < clippedRead.getReadLength()) // only clip if there are softclipped bases (right >= 0) and the first high quality soft clip is not the last base (right+1 < readlength)
clippedRead = hardClipByReadCoordinates(clippedRead, right+1, clippedRead.getReadLength()-1); // first we hard clip the low quality soft clips on the right tail
if (left >= 0 && left - 1 > 0) // only clip if there are softclipped bases (left >= 0) and the first high quality soft clip is not the last base (left-1 > 0)
clippedRead = hardClipByReadCoordinates(clippedRead, 0, left-1); // then we hard clip the low quality soft clips on the left tail
return clippedRead;
}
/**

View File

@ -36,6 +36,7 @@ import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -79,6 +80,14 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
*/
private BCF2GenotypeFieldDecoders gtFieldDecoders = null;
/**
* A cached array of GenotypeBuilders for efficient genotype decoding.
*
* Caching it allows us to avoid recreating this intermediate data
* structure each time we decode genotypes
*/
private GenotypeBuilder[] builders = null;
// for error handling
private int recordNo = 0;
private int pos = 0;
@ -168,6 +177,13 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
// prepare the genotype field decoders
gtFieldDecoders = new BCF2GenotypeFieldDecoders(header);
// create and initialize the genotype builder array
final int nSamples = header.getNGenotypeSamples();
builders = new GenotypeBuilder[nSamples];
for ( int i = 0; i < nSamples; i++ ) {
builders[i] = new GenotypeBuilder(header.getGenotypeSamples().get(i));
}
// position right before next line (would be right before first real record byte at end of header)
return new FeatureCodecHeader(header, inputStream.getPosition());
}
@ -256,6 +272,11 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
final int nFormatFields = nFormatSamples >> 24;
final int nSamples = nFormatSamples & 0x00FFFFF;
if ( header.getNGenotypeSamples() != nSamples )
throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " +
"different numbers of samples per record. Saw " + header.getNGenotypeSamples() +
" samples in header but have a record with " + nSamples + " samples");
decodeID(builder);
final ArrayList<Allele> alleles = decodeAlleles(builder, pos, nAlleles);
decodeFilter(builder);
@ -314,7 +335,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
*/
protected static ArrayList<Allele> clipAllelesIfNecessary(int position, String ref, ArrayList<Allele> unclippedAlleles) {
if ( ! AbstractVCFCodec.isSingleNucleotideEvent(unclippedAlleles) ) {
ArrayList<Allele> clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
final ArrayList<Allele> clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
AbstractVCFCodec.clipAlleles(position, ref, unclippedAlleles, clippedAlleles, -1);
return clippedAlleles;
} else
@ -335,14 +356,16 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
String ref = null;
for ( int i = 0; i < nAlleles; i++ ) {
final String allele = (String)decoder.decodeTypedValue();
final String alleleBases = (String)decoder.decodeTypedValue();
if ( i == 0 ) {
ref = allele;
alleles.add(Allele.create(allele, true));
} else {
alleles.add(Allele.create(allele, false));
}
final boolean isRef = i == 0;
final Allele allele = Allele.create(alleleBases, isRef);
if ( isRef ) ref = alleleBases;
alleles.add(allele);
if ( allele.isSymbolic() )
throw new ReviewedStingException("LIMITATION: GATK BCF2 codec does not yet support symbolic alleles");
}
assert ref != null;
@ -416,11 +439,11 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
final VariantContextBuilder builder ) {
if (siteInfo.nSamples > 0) {
final LazyGenotypesContext.LazyParser lazyParser =
new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields);
final int nGenotypes = header.getGenotypeSamples().size();
new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields, builders);
LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser,
new LazyData(siteInfo.nFormatFields, decoder.getRecordBytes()),
nGenotypes);
header.getNGenotypeSamples());
// did we resort the sample names? If so, we need to load the genotype data
if ( !header.samplesWereAlreadySorted() )

View File

@ -60,7 +60,7 @@ public class BCF2GenotypeFieldDecoders {
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_FILTER_KEY, new FTDecoder());
genotypeFieldDecoder.put(VCFConstants.DEPTH_KEY, new DPDecoder());
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, new ADDecoder());
genotypeFieldDecoder.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, new PLDecoder());
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_PL_KEY, new PLDecoder());
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_QUALITY_KEY, new GQDecoder());
}
@ -99,21 +99,21 @@ public class BCF2GenotypeFieldDecoders {
*/
public interface Decoder {
@Requires({"siteAlleles != null", "! siteAlleles.isEmpty()",
"field != null", "decoder != null", "gbs != null", "! gbs.isEmpty()"})
"field != null", "decoder != null", "gbs != null", "gbs.length != 0"})
public void decode(final List<Allele> siteAlleles,
final String field,
final BCF2Decoder decoder,
final byte typeDescriptor,
final List<GenotypeBuilder> gbs);
final GenotypeBuilder[] gbs);
}
private class GTDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
// we have to do a bit of low-level processing here as we want to know the size upfronta
final int ploidy = decoder.decodeNumberOfElements(typeDescriptor);
if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && ploidy == 2 && gbs.size() >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && ploidy == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs);
else {
generalDecode(siteAlleles, ploidy, decoder, typeDescriptor, gbs);
@ -137,7 +137,7 @@ public class BCF2GenotypeFieldDecoders {
private final void fastBiallelicDiploidDecode(final List<Allele> siteAlleles,
final BCF2Decoder decoder,
final byte typeDescriptor,
final List<GenotypeBuilder> gbs) {
final GenotypeBuilder[] gbs) {
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
final int nPossibleGenotypes = 3 * 3;
@ -169,6 +169,9 @@ public class BCF2GenotypeFieldDecoders {
gb.alleles(gt);
}
final boolean phased = (a1 & 0x01) == 1;
gb.phased(phased);
}
}
@ -176,7 +179,7 @@ public class BCF2GenotypeFieldDecoders {
final int ploidy,
final BCF2Decoder decoder,
final byte typeDescriptor,
final List<GenotypeBuilder> gbs) {
final GenotypeBuilder[] gbs) {
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
// a single cache for the encoded genotypes, since we don't actually need this vector
@ -199,6 +202,8 @@ public class BCF2GenotypeFieldDecoders {
gt.add(getAlleleFromEncoded(siteAlleles, encode));
gb.alleles(gt);
final boolean phased = (encoded[0] & 0x01) == 1;
gb.phased(phased);
}
}
}
@ -213,7 +218,7 @@ public class BCF2GenotypeFieldDecoders {
private class DPDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
for ( final GenotypeBuilder gb : gbs ) {
// the -1 is for missing
gb.DP(decoder.decodeInt(typeDescriptor, -1));
@ -223,7 +228,7 @@ public class BCF2GenotypeFieldDecoders {
private class GQDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
for ( final GenotypeBuilder gb : gbs ) {
// the -1 is for missing
gb.GQ(decoder.decodeInt(typeDescriptor, -1));
@ -233,7 +238,7 @@ public class BCF2GenotypeFieldDecoders {
private class ADDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
for ( final GenotypeBuilder gb : gbs ) {
gb.AD(decoder.decodeIntArray(typeDescriptor));
}
@ -242,7 +247,7 @@ public class BCF2GenotypeFieldDecoders {
private class PLDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
for ( final GenotypeBuilder gb : gbs ) {
gb.PL(decoder.decodeIntArray(typeDescriptor));
}
@ -251,7 +256,7 @@ public class BCF2GenotypeFieldDecoders {
private class GenericDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
for ( final GenotypeBuilder gb : gbs ) {
Object value = decoder.decodeTypedValue(typeDescriptor);
if ( value != null ) { // don't add missing values
@ -270,7 +275,7 @@ public class BCF2GenotypeFieldDecoders {
private class FTDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
for ( final GenotypeBuilder gb : gbs ) {
Object value = decoder.decodeTypedValue(typeDescriptor);
if ( value != null ) { // don't add missing values

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.utils.codecs.bcf2;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -46,12 +47,16 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
private final ArrayList<Allele> siteAlleles;
private final int nSamples;
private final int nFields;
private final GenotypeBuilder[] builders;
BCF2LazyGenotypesDecoder(final BCF2Codec codec, final ArrayList<Allele> alleles, final int nSamples, final int nFields) {
@Requires("codec.getHeader().getNGenotypeSamples() == builders.length")
BCF2LazyGenotypesDecoder(final BCF2Codec codec, final ArrayList<Allele> alleles, final int nSamples,
final int nFields, final GenotypeBuilder[] builders) {
this.codec = codec;
this.siteAlleles = alleles;
this.nSamples = nSamples;
this.nFields = nFields;
this.builders = builders;
}
@Override
@ -62,21 +67,8 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
// load our byte[] data into the decoder
final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes);
// TODO -- fast path for sites only
// go ahead and decode everyone
final List<String> samples = new ArrayList<String>(codec.getHeader().getGenotypeSamples());
if ( samples.size() != nSamples )
throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " +
"different numbers of samples per record. Saw " + samples.size() +
" samples in header but have a record with " + nSamples + " samples");
// create and initialize the genotypes array
final ArrayList<GenotypeBuilder> builders = new ArrayList<GenotypeBuilder>(nSamples);
for ( int i = 0; i < nSamples; i++ ) {
builders.add(new GenotypeBuilder(samples.get(i)));
}
for ( int i = 0; i < nSamples; i++ )
builders[i].reset(true);
for ( int i = 0; i < nFields; i++ ) {
// get the field name

View File

@ -82,18 +82,27 @@ public final class BCF2Utils {
@Requires("header != null")
@Ensures({"result != null", "new HashSet(result).size() == result.size()"})
public final static ArrayList<String> makeDictionary(final VCFHeader header) {
final Set<String> dict = new TreeSet<String>();
final Set<String> seen = new HashSet<String>();
final ArrayList<String> dict = new ArrayList<String>();
boolean sawPASS = false;
// set up the strings dictionary
dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
for ( VCFHeaderLine line : header.getMetaData() ) {
if ( line instanceof VCFIDHeaderLine) {
VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
dict.add(idLine.getID());
final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
if ( ! seen.contains(idLine.getID())) {
sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4);
dict.add(idLine.getID());
seen.add(idLine.getID());
}
}
}
return new ArrayList<String>(dict);
if ( ! sawPASS )
dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
return dict;
}
@Requires({"nElements >= 0", "type != null"})
@ -142,25 +151,6 @@ public final class BCF2Utils {
}
}
@Requires({"stream != null", "bytesForEachInt > 0"})
public final static int readInt(int bytesForEachInt, final InputStream stream) {
switch ( bytesForEachInt ) {
case 1: {
return (byte)(readByte(stream));
} case 2: {
final int b1 = readByte(stream) & 0xFF;
final int b2 = readByte(stream) & 0xFF;
return (short)((b1 << 8) | b2);
} case 4: {
final int b1 = readByte(stream) & 0xFF;
final int b2 = readByte(stream) & 0xFF;
final int b3 = readByte(stream) & 0xFF;
final int b4 = readByte(stream) & 0xFF;
return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4);
} default: throw new ReviewedStingException("Unexpected size during decoding");
}
}
/**
* Collapse multiple strings into a comma separated list
*
@ -174,8 +164,10 @@ public final class BCF2Utils {
public static final String collapseStringList(final List<String> strings) {
final StringBuilder b = new StringBuilder();
for ( final String s : strings ) {
assert s.indexOf(",") == -1; // no commas in individual strings
b.append(",").append(s);
if ( s != null ) {
assert s.indexOf(",") == -1; // no commas in individual strings
b.append(",").append(s);
}
}
return b.toString();
}
@ -299,20 +291,40 @@ public final class BCF2Utils {
else return Collections.singletonList(o);
}
@Requires({"stream != null", "bytesForEachInt > 0"})
public final static int readInt(int bytesForEachInt, final InputStream stream) {
switch ( bytesForEachInt ) {
case 1: {
return (byte)(readByte(stream));
} case 2: {
final int b2 = readByte(stream) & 0xFF;
final int b1 = readByte(stream) & 0xFF;
return (short)((b1 << 8) | b2);
} case 4: {
final int b4 = readByte(stream) & 0xFF;
final int b3 = readByte(stream) & 0xFF;
final int b2 = readByte(stream) & 0xFF;
final int b1 = readByte(stream) & 0xFF;
return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4);
} default: throw new ReviewedStingException("Unexpected size during decoding");
}
}
public final static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
switch ( type.getSizeInBytes() ) {
case 1:
encodeStream.write(0xFF & value);
break;
case 2:
encodeStream.write((0x00FF & value));
encodeStream.write((0xFF00 & value) >> 8);
encodeStream.write(0xFF & value);
break;
case 4:
encodeStream.write((0xFF000000 & value) >> 24);
encodeStream.write((0x00FF0000 & value) >> 16);
encodeStream.write((0x0000FF00 & value) >> 8);
encodeStream.write((0x000000FF & value));
encodeStream.write((0x0000FF00 & value) >> 8);
encodeStream.write((0x00FF0000 & value) >> 16);
encodeStream.write((0xFF000000 & value) >> 24);
break;
default:
throw new ReviewedStingException("BUG: unexpected type size " + type);

View File

@ -22,8 +22,9 @@ import java.util.zip.GZIPInputStream;
public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext> implements NameAwareCodec {
public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20);
protected static boolean doOnTheFlyModifications = true;
protected final static Logger log = Logger.getLogger(VCFCodec.class);
protected final static Logger log = Logger.getLogger(AbstractVCFCodec.class);
protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column
// we have to store the list of strings that make up the header until they're needed
@ -58,6 +59,8 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
protected Map<String, String> stringCache = new HashMap<String, String>();
protected boolean warnedAboutNoEqualsForNonFlag = false;
protected AbstractVCFCodec() {
super(VariantContext.class);
}
@ -168,6 +171,8 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
}
this.header = new VCFHeader(metaData, sampleNames);
if ( doOnTheFlyModifications )
this.header = VCFStandardHeaderLines.repairStandardHeaderLines(this.header);
return this.header;
}
@ -344,7 +349,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
// do we have genotyping data
if (parts.length > NUM_STANDARD_FIELDS) {
final LazyGenotypesContext.LazyParser lazyParser = new LazyVCFGenotypesParser(alleles, chr, pos);
final int nGenotypes = header.getGenotypeSamples().size();
final int nGenotypes = header.getNGenotypeSamples();
LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser, parts[8], nGenotypes);
// did we resort the sample names? If so, we need to load the genotype data
@ -425,6 +430,11 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
int infoValueSplitSize = ParsingUtils.split(str, infoValueArray, VCFConstants.INFO_FIELD_ARRAY_SEPARATOR_CHAR, false);
if ( infoValueSplitSize == 1 ) {
value = infoValueArray[0];
final VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(key);
if ( headerLine != null && headerLine.getType() == VCFHeaderLineType.Flag && value.equals("0") ) {
// deal with the case where a flag field has =0, such as DB=0, by skipping the add
continue;
}
} else {
ArrayList<String> valueList = new ArrayList<String>(infoValueSplitSize);
for ( int j = 0; j < infoValueSplitSize; j++ )
@ -433,7 +443,18 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
}
} else {
key = infoFieldArray[i];
value = true;
final VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(key);
if ( headerLine != null && headerLine.getType() != VCFHeaderLineType.Flag ) {
if ( ! warnedAboutNoEqualsForNonFlag ) {
log.warn("Found info key " + key + " without a = value, but the header says the field is of type "
+ headerLine.getType() + " but this construct is only value for FLAG type fields");
warnedAboutNoEqualsForNonFlag = true;
}
value = VCFConstants.MISSING_VALUE_v4;
} else {
value = true;
}
}
attributes.put(key, value);
@ -780,7 +801,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
gb.GQ((int)Math.round(Double.valueOf(GTValueArray[i])));
} else if (gtKey.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
gb.AD(decodeInts(GTValueArray[i]));
} else if (gtKey.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) {
} else if (gtKey.equals(VCFConstants.GENOTYPE_PL_KEY)) {
gb.PL(decodeInts(GTValueArray[i]));
} else if (gtKey.equals(VCFConstants.GENOTYPE_LIKELIHOODS_KEY)) {
gb.PL(GenotypeLikelihoods.fromGLField(GTValueArray[i]).getAsPLs());
@ -823,4 +844,13 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
values[i] = Integer.valueOf(INT_DECODE_ARRAY[i]);
return values;
}
/**
* Forces all VCFCodecs to not perform any on the fly modifications to the VCF header
* of VCF records. Useful primarily for raw comparisons such as when comparing
* raw VCF records
*/
public static final void disableOnTheFlyModifications() {
doOnTheFlyModifications = false;
}
}

View File

@ -114,7 +114,8 @@ public class VCFCodec extends AbstractVCFCodec {
* @param headerForRepairs
*/
public void setHeaderForRepairs(final VCFHeader headerForRepairs) {
log.info("Using master VCF header to repair missing files from incoming VCFs");
if ( headerForRepairs != null )
log.info("Using master VCF header to repair missing files from incoming VCFs");
this.headerForRepairs = headerForRepairs;
}

View File

@ -24,8 +24,10 @@
package org.broadinstitute.sting.utils.codecs.vcf;
import org.apache.log4j.Logger;
import org.broad.tribble.TribbleException;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.Arrays;
import java.util.LinkedHashMap;
@ -35,6 +37,8 @@ import java.util.Map;
* a base class for compound header lines, which include info lines and format lines (so far)
*/
public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCFIDHeaderLine {
final protected static Logger logger = Logger.getLogger(VCFHeader.class);
public enum SupportedHeaderLineType {
INFO(true), FORMAT(false);
@ -151,6 +155,10 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
count = Integer.valueOf(numberStr);
}
if ( count < 0 && countType == VCFHeaderLineCount.INTEGER )
throw new UserException.MalformedVCFHeader("Count < 0 for fixed size VCF header field " + name);
try {
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
} catch (Exception e) {
@ -172,6 +180,11 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
if ( name == null || type == null || description == null || lineType == null )
throw new IllegalArgumentException(String.format("Invalid VCFCompoundHeaderLine: key=%s name=%s type=%s desc=%s lineType=%s",
super.getKey(), name, type, description, lineType ));
if ( type == VCFHeaderLineType.Flag && count != 0 ) {
count = 0;
logger.warn("FLAG fields must have a count value of 0, but saw " + count + " for header line " + getID() + ". Changing it to 0 inside the code");
}
}
/**

View File

@ -44,20 +44,21 @@ public final class VCFConstants {
public static final String DOWNSAMPLED_KEY = "DS";
public static final String EXPECTED_ALLELE_COUNT_KEY = "EC";
public static final String END_KEY = "END";
public static final String GENOTYPE_FILTER_KEY = "FT";
public static final String GENOTYPE_KEY = "GT";
@Deprecated
public static final String GENOTYPE_LIKELIHOODS_KEY = "GL"; // log10 scaled genotype likelihoods
public static final String GENOTYPE_POSTERIORS_KEY = "GP";
public static final String GENOTYPE_QUALITY_KEY = "GQ";
public static final String GENOTYPE_ALLELE_DEPTHS = "AD";
public static final String GENOTYPE_PL_KEY = "PL"; // phred-scaled genotype likelihoods
@Deprecated public static final String GENOTYPE_LIKELIHOODS_KEY = "GL"; // log10 scaled genotype likelihoods
public static final String HAPMAP2_KEY = "H2";
public static final String HAPMAP3_KEY = "H3";
public static final String HAPLOTYPE_QUALITY_KEY = "HQ";
public static final String RMS_MAPPING_QUALITY_KEY = "MQ";
public static final String MAPPING_QUALITY_ZERO_KEY = "MQ0";
public static final String SAMPLE_NUMBER_KEY = "NS";
public static final String PHRED_GENOTYPE_LIKELIHOODS_KEY = "PL"; // phred-scaled genotype likelihoods
public static final String PHASE_QUALITY_KEY = "PQ";
public static final String PHASE_SET_KEY = "PS";
public static final String OLD_DEPTH_KEY = "RD";
@ -88,7 +89,8 @@ public final class VCFConstants {
public static final String FORMAT_HEADER_START = "##FORMAT";
public static final String INFO_HEADER_START = "##INFO";
public static final String ALT_HEADER_START = "##ALT";
public static final String CONTIG_HEADER_START = "##contig";
public static final String CONTIG_HEADER_KEY = "contig";
public static final String CONTIG_HEADER_START = "##" + CONTIG_HEADER_KEY;
// old indel alleles
public static final char DELETION_ALLELE_v3 = 'D';

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.codecs.vcf;
import org.apache.log4j.Logger;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.*;
@ -54,11 +55,12 @@ public class VCFHeader {
private final Set<VCFHeaderLine> mMetaData = new TreeSet<VCFHeaderLine>();
private final Map<String, VCFInfoHeaderLine> mInfoMetaData = new HashMap<String, VCFInfoHeaderLine>();
private final Map<String, VCFFormatHeaderLine> mFormatMetaData = new HashMap<String, VCFFormatHeaderLine>();
private final Map<String, VCFFilterHeaderLine> mFilterMetaData = new HashMap<String, VCFFilterHeaderLine>();
private final Map<String, VCFHeaderLine> mOtherMetaData = new HashMap<String, VCFHeaderLine>();
private final List<VCFContigHeaderLine> contigMetaData = new ArrayList<VCFContigHeaderLine>();
// the list of auxillary tags
private final Set<String> mGenotypeSampleNames = new LinkedHashSet<String>();
private final List<String> mGenotypeSampleNames = new ArrayList<String>();
// the character string that indicates meta data
public static final String METADATA_INDICATOR = "##";
@ -106,7 +108,15 @@ public class VCFHeader {
* @param genotypeSampleNames the sample names
*/
public VCFHeader(Set<VCFHeaderLine> metaData, Set<String> genotypeSampleNames) {
this(metaData, new ArrayList<String>(genotypeSampleNames));
}
public VCFHeader(Set<VCFHeaderLine> metaData, List<String> genotypeSampleNames) {
this(metaData);
if ( genotypeSampleNames.size() != new HashSet<String>(genotypeSampleNames).size() )
throw new ReviewedStingException("BUG: VCF header has duplicate sample names");
mGenotypeSampleNames.addAll(genotypeSampleNames);
samplesWereAlreadySorted = ParsingUtils.isSorted(genotypeSampleNames);
buildVCFReaderMaps(genotypeSampleNames);
@ -175,12 +185,23 @@ public class VCFHeader {
} else if ( line instanceof VCFFormatHeaderLine ) {
VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line;
addMetaDataMapBinding(mFormatMetaData, formatLine);
} else if ( line instanceof VCFFilterHeaderLine ) {
VCFFilterHeaderLine filterLine = (VCFFilterHeaderLine)line;
mFilterMetaData.put(filterLine.getID(), filterLine);
} else if ( line instanceof VCFContigHeaderLine ) {
contigMetaData.add((VCFContigHeaderLine)line);
} else {
mOtherMetaData.put(line.getKey(), line);
}
}
if ( hasFormatLine(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && ! hasFormatLine(VCFConstants.GENOTYPE_PL_KEY) ) {
logger.warn("Found " + VCFConstants.GENOTYPE_LIKELIHOODS_KEY + " format, but no "
+ VCFConstants.GENOTYPE_PL_KEY + " field. As the GATK now only manages PL fields internally"
+ " automatically adding a corresponding PL field to your VCF header");
addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
loadMetaDataMaps();
}
}
/**
@ -239,7 +260,7 @@ public class VCFHeader {
*
* @return a list of the genotype column names, which may be empty if hasGenotypingData() returns false
*/
public Set<String> getGenotypeSamples() {
public List<String> getGenotypeSamples() {
return mGenotypeSampleNames;
}
@ -294,6 +315,26 @@ public class VCFHeader {
return mFormatMetaData.get(id);
}
/**
* @param id the header key name
* @return the meta data line, or null if there is none
*/
public VCFFilterHeaderLine getFilterHeaderLine(final String id) {
return mFilterMetaData.get(id);
}
public boolean hasInfoLine(final String id) {
return getInfoHeaderLine(id) != null;
}
public boolean hasFormatLine(final String id) {
return getFormatHeaderLine(id) != null;
}
public boolean hasFilterLine(final String id) {
return getFilterHeaderLine(id) != null;
}
/**
* @param key the header key name
* @return the meta data line, or null if there is none

View File

@ -0,0 +1,262 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.vcf;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.*;
/**
* Manages header lines for standard VCF INFO and FORMAT fields
*
* Provides simple mechanisms for registering standard lines,
* looking them up, and adding them to headers
*
* @author Mark DePristo
* @since 6/12
*/
public class VCFStandardHeaderLines {
/**
* Enabling this causes us to repair header lines even if only their descriptions differ
*/
private final static boolean REPAIR_BAD_DESCRIPTIONS = false;
protected final static Logger logger = Logger.getLogger(VCFStandardHeaderLines.class);
private static Standards<VCFFormatHeaderLine> formatStandards = new Standards<VCFFormatHeaderLine>();
private static Standards<VCFInfoHeaderLine> infoStandards = new Standards<VCFInfoHeaderLine>();
/**
* Walks over the VCF header and repairs the standard VCF header lines in it, returning a freshly
* allocated VCFHeader with standard VCF header lines repaired as necessary
*
* @param header
* @return
*/
@Requires("header != null")
@Ensures("result != null")
public static VCFHeader repairStandardHeaderLines(final VCFHeader header) {
final Set<VCFHeaderLine> newLines = new LinkedHashSet<VCFHeaderLine>(header.getMetaData().size());
for ( VCFHeaderLine line : header.getMetaData() ) {
if ( line instanceof VCFFormatHeaderLine ) {
line = formatStandards.repair((VCFFormatHeaderLine) line);
} else if ( line instanceof VCFInfoHeaderLine) {
line = infoStandards.repair((VCFInfoHeaderLine) line);
}
newLines.add(line);
}
return new VCFHeader(newLines, header.getGenotypeSamples());
}
/**
* Adds header lines for each of the format fields in IDs to header, returning the set of
* IDs without standard descriptions, unless throwErrorForMissing is true, in which
* case this situation results in a ReviewedStingException
*
* @param IDs
* @return
*/
public static Set<String> addStandardFormatLines(final Set<VCFHeaderLine> headerLines, final boolean throwErrorForMissing, final Collection<String> IDs) {
return formatStandards.addToHeader(headerLines, IDs, throwErrorForMissing);
}
/**
* @see #addStandardFormatLines(java.util.Set, boolean, java.util.Collection)
*
* @param headerLines
* @param throwErrorForMissing
* @param IDs
* @return
*/
public static Set<String> addStandardFormatLines(final Set<VCFHeaderLine> headerLines, final boolean throwErrorForMissing, final String ... IDs) {
return addStandardFormatLines(headerLines, throwErrorForMissing, Arrays.asList(IDs));
}
/**
* Returns the standard format line for ID. If none exists, return null or throw an exception, depending
* on throwErrorForMissing
*
* @param ID
* @param throwErrorForMissing
* @return
*/
public static VCFFormatHeaderLine getFormatLine(final String ID, final boolean throwErrorForMissing) {
return formatStandards.get(ID, throwErrorForMissing);
}
/**
* Returns the standard format line for ID. If none exists throw an exception
*
* @param ID
* @return
*/
public static VCFFormatHeaderLine getFormatLine(final String ID) {
return formatStandards.get(ID, true);
}
private static void registerStandard(final VCFFormatHeaderLine line) {
formatStandards.add(line);
}
/**
* Adds header lines for each of the info fields in IDs to header, returning the set of
* IDs without standard descriptions, unless throwErrorForMissing is true, in which
* case this situation results in a ReviewedStingException
*
* @param IDs
* @return
*/
public static Set<String> addStandardInfoLines(final Set<VCFHeaderLine> headerLines, final boolean throwErrorForMissing, final Collection<String> IDs) {
return infoStandards.addToHeader(headerLines, IDs, throwErrorForMissing);
}
/**
* @see #addStandardFormatLines(java.util.Set, boolean, java.util.Collection)
*
* @param IDs
* @return
*/
public static Set<String> addStandardInfoLines(final Set<VCFHeaderLine> headerLines, final boolean throwErrorForMissing, final String ... IDs) {
return addStandardInfoLines(headerLines, throwErrorForMissing, Arrays.asList(IDs));
}
/**
* Returns the standard info line for ID. If none exists, return null or throw an exception, depending
* on throwErrorForMissing
*
* @param ID
* @param throwErrorForMissing
* @return
*/
public static VCFInfoHeaderLine getInfoLine(final String ID, final boolean throwErrorForMissing) {
return infoStandards.get(ID, throwErrorForMissing);
}
/**
* Returns the standard info line for ID. If none exists throw an exception
*
* @param ID
* @return
*/
public static VCFInfoHeaderLine getInfoLine(final String ID) {
return getInfoLine(ID, true);
}
private static void registerStandard(final VCFInfoHeaderLine line) {
infoStandards.add(line);
}
//
// VCF header line constants
//
static {
// FORMAT lines
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "Genotype Quality"));
registerStandard(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed"));
// INFO lines
registerStandard(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
registerStandard(new VCFInfoHeaderLine(VCFConstants.MLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed"));
registerStandard(new VCFInfoHeaderLine(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed"));
registerStandard(new VCFInfoHeaderLine(VCFConstants.DOWNSAMPLED_KEY, 0, VCFHeaderLineType.Flag, "Were any of the samples downsampled?"));
registerStandard(new VCFInfoHeaderLine(VCFConstants.DBSNP_KEY, 0, VCFHeaderLineType.Flag, "dbSNP Membership"));
registerStandard(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered"));
registerStandard(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias"));
registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"));
registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"));
registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes"));
registerStandard(new VCFInfoHeaderLine(VCFConstants.MAPPING_QUALITY_ZERO_KEY, 1, VCFHeaderLineType.Integer, "Total Mapping Quality Zero Reads"));
registerStandard(new VCFInfoHeaderLine(VCFConstants.RMS_MAPPING_QUALITY_KEY, 1, VCFHeaderLineType.Float, "RMS Mapping Quality"));
registerStandard(new VCFInfoHeaderLine(VCFConstants.SOMATIC_KEY, 0, VCFHeaderLineType.Flag, "Somatic event"));
}
private static class Standards<T extends VCFCompoundHeaderLine> {
private final Map<String, T> standards = new HashMap<String, T>();
@Requires("line != null")
@Ensures({"result != null", "result.getID().equals(line.getID())"})
public T repair(final T line) {
final T standard = get(line.getID(), false);
if ( standard != null ) {
final boolean badCountType = line.getCountType() != standard.getCountType();
final boolean badCount = line.isFixedCount() && ! badCountType && line.getCount() != standard.getCount();
final boolean badType = line.getType() != standard.getType();
final boolean badDesc = ! line.getDescription().equals(standard.getDescription());
final boolean needsRepair = badCountType || badCount || badType || (REPAIR_BAD_DESCRIPTIONS && badDesc);
if ( needsRepair ) {
logger.warn("Repairing standard header line for field " + line.getID() + " because"
+ (badCountType ? " -- count types disagree; header has " + line.getCountType() + " but standard is " + standard.getCountType() : "")
+ (badType ? " -- type disagree; header has " + line.getType() + " but standard is " + standard.getType() : "")
+ (badCount ? " -- counts disagree; header has " + line.getCount() + " but standard is " + standard.getCount() : "")
+ (badDesc ? " -- descriptions disagree; header has '" + line.getDescription() + "' but standard is '" + standard.getDescription() + "'": ""));
return standard;
} else
return line;
} else
return line;
}
@Requires("headerLines != null")
@Ensures({"result != null", "result.isEmpty() || ! throwErrorForMissing", "IDs.containsAll(result)"})
public Set<String> addToHeader(final Set<VCFHeaderLine> headerLines, final Collection<String> IDs, final boolean throwErrorForMissing) {
final Set<String> missing = new HashSet<String>();
for ( final String ID : IDs ) {
final T line = get(ID, throwErrorForMissing);
if ( line == null )
missing.add(ID);
else
headerLines.add(line);
}
return missing;
}
@Requires("line != null")
@Ensures({"standards.containsKey(line.getID())"})
public void add(final T line) {
if ( standards.containsKey(line.getID()) )
throw new ReviewedStingException("Attempting to add multiple standard header lines for ID " + line.getID());
standards.put(line.getID(), line);
}
@Requires("ID != null")
@Ensures({"result != null || ! throwErrorForMissing"})
public T get(final String ID, final boolean throwErrorForMissing) {
final T x = standards.get(ID);
if ( throwErrorForMissing && x == null )
throw new ReviewedStingException("Couldn't find a standard VCF header line for field " + ID);
return x;
}
}
}

View File

@ -247,9 +247,13 @@ public class VCFUtils {
* @param refDict the SAM formatted reference sequence dictionary
*/
public final static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) {
final Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>(oldHeader.getMetaData().size());
return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaData(), referenceFile, refDict), oldHeader.getGenotypeSamples());
}
for ( final VCFHeaderLine line : oldHeader.getMetaData() ) {
public final static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict) {
final Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>(oldLines.size());
for ( final VCFHeaderLine line : oldLines ) {
if ( line instanceof VCFContigHeaderLine )
continue; // skip old contig lines
if ( line.getKey().equals(VCFHeader.REFERENCE_KEY) )
@ -261,7 +265,7 @@ public class VCFUtils {
lines.add(contigLine);
lines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, "file://" + referenceFile.getAbsolutePath()));
return new VCFHeader(lines, oldHeader.getGenotypeSamples());
return lines;
}
/**

View File

@ -194,12 +194,35 @@ public class GATKSAMRecord extends BAMRecord {
}
}
/**
* @return whether or not this read has base insertion or deletion qualities (one of the two is sufficient to return true)
*/
public boolean hasBaseIndelQualities() {
return getAttribute( BQSR_BASE_INSERTION_QUALITIES ) != null || getAttribute( BQSR_BASE_DELETION_QUALITIES ) != null;
}
/**
* @return the base deletion quality or null if read doesn't have one
*/
public byte[] getExistingBaseInsertionQualities() {
return SAMUtils.fastqToPhred( getStringAttribute(BQSR_BASE_INSERTION_QUALITIES));
}
/**
* @return the base deletion quality or null if read doesn't have one
*/
public byte[] getExistingBaseDeletionQualities() {
return SAMUtils.fastqToPhred( getStringAttribute(BQSR_BASE_DELETION_QUALITIES));
}
/**
* Default utility to query the base insertion quality of a read. If the read doesn't have one, it creates an array of default qualities (currently Q45)
* and assigns it to the read.
*
* @return the base insertion quality array
*/
public byte[] getBaseInsertionQualities() {
byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_INSERTION_QUALITIES ) );
byte [] quals = getExistingBaseInsertionQualities();
if( quals == null ) {
quals = new byte[getBaseQualities().length];
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
@ -209,8 +232,14 @@ public class GATKSAMRecord extends BAMRecord {
return quals;
}
/**
* Default utility to query the base deletion quality of a read. If the read doesn't have one, it creates an array of default qualities (currently Q45)
* and assigns it to the read.
*
* @return the base deletion quality array
*/
public byte[] getBaseDeletionQualities() {
byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_DELETION_QUALITIES ) );
byte[] quals = getExistingBaseDeletionQualities();
if( quals == null ) {
quals = new byte[getBaseQualities().length];
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
@ -478,6 +507,7 @@ public class GATKSAMRecord extends BAMRecord {
public Object clone() throws CloneNotSupportedException {
final GATKSAMRecord clone = (GATKSAMRecord) super.clone();
if (temporaryAttributes != null) {
clone.temporaryAttributes = new HashMap<Object, Object>();
for (Object attribute : temporaryAttributes.keySet())
clone.setTemporaryAttribute(attribute, temporaryAttributes.get(attribute));
}

View File

@ -347,6 +347,15 @@ public class Allele implements Comparable<Allele> {
*/
public String getDisplayString() { return new String(bases); }
/**
* Same as #getDisplayString() but returns the result as byte[].
*
* Slightly faster then getDisplayString()
*
* @return the allele string representation
*/
public byte[] getDisplayBases() { return bases; }
/**
* @param other the other allele
*

View File

@ -156,11 +156,6 @@ public final class FastGenotype extends Genotype {
return (List<String>) getExtendedAttribute(VCFConstants.GENOTYPE_FILTER_KEY, Collections.emptyList());
}
@Override
public boolean filtersWereApplied() {
return hasExtendedAttribute(VCFConstants.GENOTYPE_FILTER_KEY);
}
@Override public int[] getPL() {
return PL;
}

View File

@ -31,7 +31,7 @@ public abstract class Genotype implements Comparable<Genotype> {
VCFConstants.GENOTYPE_QUALITY_KEY,
VCFConstants.DEPTH_KEY,
VCFConstants.GENOTYPE_ALLELE_DEPTHS,
VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
VCFConstants.GENOTYPE_PL_KEY);
public final static String PHASED_ALLELE_SEPARATOR = "|";
public final static String UNPHASED_ALLELE_SEPARATOR = "/";
@ -354,7 +354,7 @@ public abstract class Genotype implements Comparable<Genotype> {
toStringIfExists(VCFConstants.GENOTYPE_QUALITY_KEY, getGQ()),
toStringIfExists(VCFConstants.DEPTH_KEY, getDP()),
toStringIfExists(VCFConstants.GENOTYPE_ALLELE_DEPTHS, getAD()),
toStringIfExists(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, getPL()),
toStringIfExists(VCFConstants.GENOTYPE_PL_KEY, getPL()),
sortedString(getExtendedAttributes()));
}
@ -451,7 +451,7 @@ public abstract class Genotype implements Comparable<Genotype> {
*
* @return
*/
@Ensures({"result != null", "filtersWereApplied() || result.isEmpty()"})
@Ensures({"result != null"})
public abstract List<String> getFilters();
@Ensures({"result != getFilters().isEmpty()"})
@ -459,9 +459,6 @@ public abstract class Genotype implements Comparable<Genotype> {
return ! getFilters().isEmpty();
}
@Ensures("result == true || getFilters().isEmpty()")
public abstract boolean filtersWereApplied();
@Deprecated public boolean hasLog10PError() { return hasGQ(); }
@Deprecated public double getLog10PError() { return getGQ() / -10.0; }
@Deprecated public int getPhredScaledQual() { return getGQ(); }
@ -505,7 +502,7 @@ public abstract class Genotype implements Comparable<Genotype> {
return getGQ();
} else if (key.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
return Arrays.asList(getAD());
} else if (key.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) {
} else if (key.equals(VCFConstants.GENOTYPE_PL_KEY)) {
return Arrays.asList(getPL());
} else if (key.equals(VCFConstants.DEPTH_KEY)) {
return getDP();
@ -521,7 +518,7 @@ public abstract class Genotype implements Comparable<Genotype> {
return hasGQ();
} else if (key.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
return hasAD();
} else if (key.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) {
} else if (key.equals(VCFConstants.GENOTYPE_PL_KEY)) {
return hasPL();
} else if (key.equals(VCFConstants.DEPTH_KEY)) {
return hasDP();

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.variantcontext;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
@ -49,6 +50,7 @@ import java.util.*;
* @author Mark DePristo
* @since 06/12
*/
@Invariant({"alleles != null"})
public final class GenotypeBuilder {
public static boolean MAKE_FAST_BY_DEFAULT = true;
@ -154,9 +156,9 @@ public final class GenotypeBuilder {
* function you must provide sampleName and alleles before trying to
* make more Genotypes.
*/
public final void reset() {
sampleName = null;
alleles = null;
public final void reset(final boolean keepSampleName) {
if ( ! keepSampleName ) sampleName = null;
alleles = Collections.emptyList();
isPhased = false;
GQ = -1;
DP = -1;
@ -381,7 +383,8 @@ public final class GenotypeBuilder {
*/
@Requires("filters != null")
public GenotypeBuilder filters(final List<String> filters) {
attribute(VCFConstants.GENOTYPE_FILTER_KEY, filters);
if ( ! filters.isEmpty() )
attribute(VCFConstants.GENOTYPE_FILTER_KEY, filters);
return this;
}

View File

@ -51,7 +51,7 @@ public class SlowGenotype extends Genotype {
this.alleles = Collections.unmodifiableList(alleles);
commonInfo = new CommonInfo(sampleName, log10PError, filters, attributes);
if ( log10Likelihoods != null )
commonInfo.putAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods));
commonInfo.putAttribute(VCFConstants.GENOTYPE_PL_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods));
this.isPhased = isPhased;
validate();
}
@ -72,12 +72,12 @@ public class SlowGenotype extends Genotype {
// Useful methods for getting genotype likelihoods for a genotype object, if present
//
@Override public boolean hasLikelihoods() {
return (commonInfo.hasAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) && !commonInfo.getAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4)) ||
return (commonInfo.hasAttribute(VCFConstants.GENOTYPE_PL_KEY) && !commonInfo.getAttribute(VCFConstants.GENOTYPE_PL_KEY).equals(VCFConstants.MISSING_VALUE_v4)) ||
(commonInfo.hasAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && !commonInfo.getAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4));
}
@Override public GenotypeLikelihoods getLikelihoods() {
GenotypeLikelihoods x = getLikelihoods(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, true);
GenotypeLikelihoods x = getLikelihoods(VCFConstants.GENOTYPE_PL_KEY, true);
if ( x != null )
return x;
else {
@ -113,7 +113,6 @@ public class SlowGenotype extends Genotype {
//
// ---------------------------------------------------------------------------------------------------------
@Override public List<String> getFilters() { return new ArrayList<String>(commonInfo.getFilters()); }
@Override public boolean filtersWereApplied() { return commonInfo.filtersWereApplied(); }
@Override public boolean hasLog10PError() { return commonInfo.hasLog10PError(); }
@Override public double getLog10PError() { return commonInfo.getLog10PError(); }

View File

@ -339,7 +339,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
* @return
*/
public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
if ( ! rederiveAllelesFromGenotypes && sampleNames.containsAll(getSampleNames()) ) {
if ( sampleNames.containsAll(getSampleNames()) ) {
return this; // fast path when you don't have any work to do
} else {
VariantContextBuilder builder = new VariantContextBuilder(this);
@ -559,7 +559,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
public String getAlleleStringWithRefPadding(final Allele allele) {
if ( VariantContextUtils.needsPadding(this) )
return VariantContextUtils.padAllele(this, allele);
return VariantContextUtils.padAllele(this, allele).getDisplayString();
else
return allele.getDisplayString();
}
@ -1177,8 +1177,9 @@ public class VariantContext implements Feature { // to enable tribble integratio
// if ( getType() == Type.INDEL ) {
// if ( getReference().length() != (getLocation().size()-1) ) {
long length = (stop - start) + 1;
if ( (getReference().isNull() && length != 1 ) ||
(!isSymbolic() && getReference().isNonNull() && (length - getReference().length() > 1))) {
if ( ! isSymbolic()
&& ((getReference().isNull() && length != 1 )
|| (getReference().isNonNull() && (length - getReference().length() > 1)))) {
throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
}
}
@ -1358,19 +1359,37 @@ public class VariantContext implements Feature { // to enable tribble integratio
}
private final void fullyDecodeInfo(final VariantContextBuilder builder, final VCFHeader header) {
builder.attributes(fullyDecodeAttributes(getAttributes(), header));
builder.attributes(fullyDecodeAttributes(getAttributes(), header, false));
}
private final Map<String, Object> fullyDecodeAttributes(final Map<String, Object> attributes, final VCFHeader header) {
private final Map<String, Object> fullyDecodeAttributes(final Map<String, Object> attributes,
final VCFHeader header,
final boolean allowMissingValuesComparedToHeader) {
final Map<String, Object> newAttributes = new HashMap<String, Object>(attributes.size());
for ( final Map.Entry<String, Object> attr : attributes.entrySet() ) {
final String field = attr.getKey();
if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) )
continue; // gross, FT is part of the extended attributes
final VCFCompoundHeaderLine format = VariantContextUtils.getMetaDataForField(header, field);
final Object decoded = decodeValue(field, attr.getValue(), format);
if ( decoded != null )
newAttributes.put(field, decoded);
if ( decoded != null &&
! allowMissingValuesComparedToHeader
&& format.getCountType() != VCFHeaderLineCount.UNBOUNDED
&& format.getType() != VCFHeaderLineType.Flag ) { // we expect exactly the right number of elements
final int obsSize = decoded instanceof List ? ((List) decoded).size() : 1;
final int expSize = format.getCount(this.getNAlleles() - 1);
if ( obsSize != expSize ) {
throw new UserException.MalformedVCFHeader("Discordant field size detected for field " +
field + " at " + getChr() + ":" + getStart() + ". Field had " + obsSize + " values " +
"but the header says this should have " + expSize + " values based on header record " +
format);
}
}
newAttributes.put(field, decoded);
}
return newAttributes;
@ -1378,7 +1397,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
private final Object decodeValue(final String field, final Object value, final VCFCompoundHeaderLine format) {
if ( value instanceof String ) {
if ( field.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) )
if ( field.equals(VCFConstants.GENOTYPE_PL_KEY) )
return GenotypeLikelihoods.fromPLField((String)value);
final String string = (String)value;
@ -1400,6 +1419,8 @@ public class VariantContext implements Feature { // to enable tribble integratio
} else {
return value;
}
// allowMissingValuesComparedToHeader
}
private final Object decodeOne(final String field, final String string, final VCFCompoundHeaderLine format) {
@ -1409,7 +1430,12 @@ public class VariantContext implements Feature { // to enable tribble integratio
else {
switch ( format.getType() ) {
case Character: return string;
case Flag: return Boolean.valueOf(string);
case Flag:
final boolean b = Boolean.valueOf(string);
if ( b == false )
throw new UserException.MalformedVCF("VariantContext FLAG fields " + field + " cannot contain false values"
+ " as seen at " + getChr() + ":" + getStart());
return b;
case String: return string;
case Integer: return Integer.valueOf(string);
case Float: return Double.valueOf(string);
@ -1430,7 +1456,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
}
private final Genotype fullyDecodeGenotypes(final Genotype g, final VCFHeader header) {
final Map<String, Object> map = fullyDecodeAttributes(g.getExtendedAttributes(), header);
final Map<String, Object> map = fullyDecodeAttributes(g.getExtendedAttributes(), header, true);
return new GenotypeBuilder(g).attributes(map).make();
}

View File

@ -54,6 +54,7 @@ public class VariantContextUtils {
static {
engine.setSilent(false); // will throw errors now for selects that don't evaluate properly
engine.setLenient(false);
engine.setDebug(false);
}
/**
@ -119,10 +120,6 @@ public class VariantContextUtils {
attributes.put(VCFConstants.ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs);
}
else {
attributes.put(VCFConstants.ALLELE_COUNT_KEY, 0);
attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, 0.0);
}
}
return attributes;
@ -186,17 +183,23 @@ public class VariantContextUtils {
return false;
}
public static String padAllele(final VariantContext vc, final Allele allele) {
public static Allele padAllele(final VariantContext vc, final Allele allele) {
assert needsPadding(vc);
StringBuilder sb = new StringBuilder();
sb.append((char)vc.getReferenceBaseForIndel().byteValue());
sb.append(allele.getDisplayString());
return sb.toString();
if ( allele.isSymbolic() )
return allele;
else {
// get bases for current allele and create a new one with trimmed bases
final StringBuilder sb = new StringBuilder();
sb.append((char)vc.getReferenceBaseForIndel().byteValue());
sb.append(allele.getDisplayString());
final String newBases = sb.toString();
return Allele.create(newBases, allele.isReference());
}
}
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC) {
final boolean padVC = needsPadding(inputVC);
// nothing to do if we don't need to pad bases
@ -204,46 +207,21 @@ public class VariantContextUtils {
if ( !inputVC.hasReferenceBaseForIndel() )
throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available.");
Byte refByte = inputVC.getReferenceBaseForIndel();
final ArrayList<Allele> alleles = new ArrayList<Allele>(inputVC.getNAlleles());
final Map<Allele, Allele> unpaddedToPadded = new HashMap<Allele, Allele>(inputVC.getNAlleles());
List<Allele> alleles = new ArrayList<Allele>();
for (Allele a : inputVC.getAlleles()) {
// get bases for current allele and create a new one with trimmed bases
if (a.isSymbolic()) {
alleles.add(a);
} else {
String newBases;
if ( refBaseShouldBeAppliedToEndOfAlleles )
newBases = a.getBaseString() + new String(new byte[]{refByte});
else
newBases = new String(new byte[]{refByte}) + a.getBaseString();
alleles.add(Allele.create(newBases,a.isReference()));
}
for (final Allele a : inputVC.getAlleles()) {
final Allele padded = padAllele(inputVC, a);
alleles.add(padded);
unpaddedToPadded.put(a, padded);
}
// now we can recreate new genotypes with trimmed alleles
GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples());
for (final Genotype g : inputVC.getGenotypes() ) {
List<Allele> inAlleles = g.getAlleles();
List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
for (Allele a : inAlleles) {
if (a.isCalled()) {
if (a.isSymbolic()) {
newGenotypeAlleles.add(a);
} else {
String newBases;
if ( refBaseShouldBeAppliedToEndOfAlleles )
newBases = a.getBaseString() + new String(new byte[]{refByte});
else
newBases = new String(new byte[]{refByte}) + a.getBaseString();
newGenotypeAlleles.add(Allele.create(newBases,a.isReference()));
}
}
else {
// add no-call allele
newGenotypeAlleles.add(Allele.NO_CALL);
}
final List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
for (final Allele a : g.getAlleles()) {
newGenotypeAlleles.add( a.isCalled() ? unpaddedToPadded.get(a) : Allele.NO_CALL);
}
genotypes.add(new GenotypeBuilder(g).alleles(newGenotypeAlleles).make());
@ -560,7 +538,7 @@ public class VariantContextUtils {
for (final VariantContext vc : prepaddedVCs) {
// also a reasonable place to remove filtered calls, if needed
if ( ! filteredAreUncalled || vc.isNotFiltered() )
VCs.add(createVariantContextWithPaddedAlleles(vc, false));
VCs.add(createVariantContextWithPaddedAlleles(vc));
}
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
return null;

View File

@ -264,7 +264,9 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
// if the context is null, we need to create it to evaluate the JEXL expression
if (this.jContext == null) createContext();
try {
jexl.put (exp, (Boolean) exp.exp.evaluate(jContext));
final Boolean value = (Boolean) exp.exp.evaluate(jContext);
// treat errors as no match
jexl.put(exp, value == null ? false : value);
} catch (Exception e) {
throw new UserException.CommandLineException(String.format("Invalid JEXL expression detected for %s with message %s", exp.name, e.getMessage()));
}

View File

@ -260,7 +260,7 @@ public abstract class BCF2FieldEncoder {
@Requires("isDynamicallyTyped()")
@Ensures("result != null")
public BCF2Type getDynamicType(final Object value) {
throw new ReviewedStingException("BUG: cannot get dynamic type for statically typed BCF2 field");
throw new ReviewedStingException("BUG: cannot get dynamic type for statically typed BCF2 field " + getField());
}
// ----------------------------------------------------------------------
@ -269,21 +269,6 @@ public abstract class BCF2FieldEncoder {
//
// ----------------------------------------------------------------------
/**
* Convenience method that just called encodeValue with a no minimum for the number of values.
*
* Primarily useful for encoding site values
*
* @param encoder
* @param value
* @param type
* @throws IOException
*/
@Requires({"encoder != null", "isDynamicallyTyped() || type == getStaticType()"})
public void encodeOneValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
encodeValue(encoder, value, type, 0);
}
/**
* Key abstract method that should encode a value of the given type into the encoder.
*
@ -348,10 +333,10 @@ public abstract class BCF2FieldEncoder {
if ( value == null )
return "";
else if (value instanceof List) {
if ( ((List) value).size() == 1 )
return (String)((List) value).get(0);
else
return BCF2Utils.collapseStringList((List<String>)value);
final List<String> l = (List<String>)value;
if ( l.isEmpty() ) return "";
else if ( l.size() == 1 ) return (String)l.get(0);
else return BCF2Utils.collapseStringList(l);
} else
return (String)value;
}
@ -367,7 +352,7 @@ public abstract class BCF2FieldEncoder {
public Flag(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
super(headerLine, dict, BCF2Type.INT8);
if ( ! headerLine.isFixedCount() || headerLine.getCount() != 0 )
throw new ReviewedStingException("Flag encoder only suppports atomic flags!");
throw new ReviewedStingException("Flag encoder only suppports atomic flags for field " + getField());
}
@Override
@ -376,7 +361,7 @@ public abstract class BCF2FieldEncoder {
}
@Override
@Requires("minValues <= 1")
@Requires({"minValues <= 1", "value != null", "value instanceof Boolean", "((Boolean)value) == true"})
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
encoder.encodeRawBytes(1, getStaticType());
}
@ -409,9 +394,11 @@ public abstract class BCF2FieldEncoder {
} else {
// handle generic case
final List<Double> doubles = toList(Double.class, value);
for ( final double d : doubles ) {
encoder.encodeRawFloat(d);
count++;
for ( final Double d : doubles ) {
if ( d != null ) { // necessary because .,. => [null, null] in VC
encoder.encodeRawFloat(d);
count++;
}
}
}
for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type);
@ -439,6 +426,7 @@ public abstract class BCF2FieldEncoder {
return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType((int[])value);
}
@Requires("value == null || ((int[])value).length <= minValues")
@Override
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
int count = 0;
@ -495,9 +483,11 @@ public abstract class BCF2FieldEncoder {
@Override
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
int count = 0;
for ( final int i : toList(Integer.class, value) ) {
encoder.encodeRawInt(i, type);
count++;
for ( final Integer i : toList(Integer.class, value) ) {
if ( i != null ) { // necessary because .,. => [null, null] in VC
encoder.encodeRawInt(i, type);
count++;
}
}
for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type);
}

View File

@ -100,7 +100,7 @@ public abstract class BCF2FieldWriter {
} else {
final int valueCount = getFieldEncoder().numElements(vc, rawValue);
encoder.encodeType(valueCount, type);
getFieldEncoder().encodeOneValue(encoder, rawValue, type);
getFieldEncoder().encodeValue(encoder, rawValue, type, valueCount);
}
}
}
@ -179,7 +179,7 @@ public abstract class BCF2FieldWriter {
final List<Integer> values = new ArrayList<Integer>(vc.getNSamples());
for ( final Genotype g : vc.getGenotypes() ) {
for ( final Object i : BCF2Utils.toList(g.getExtendedAttribute(getField(), null)) ) {
values.add((Integer)i); // we know they are all integers
if ( i != null ) values.add((Integer)i); // we know they are all integers
}
}
@ -246,6 +246,10 @@ public abstract class BCF2FieldWriter {
buildAlleleMap(vc);
nValuesPerGenotype = vc.getMaxPloidy();
// deal with the case where we have no call everywhere, in which case we write out diploid
if ( nValuesPerGenotype == -1 )
nValuesPerGenotype = 2;
super.start(encoder, vc);
}
@ -298,7 +302,6 @@ public abstract class BCF2FieldWriter {
if ( nAlleles > 2 ) {
// for multi-allelics we need to clear the map, and add additional looks
alleleMapForTriPlus.clear();
alleleMapForTriPlus.put(Allele.NO_CALL, -1); // convenience for lookup
final List<Allele> alleles = vc.getAlleles();
for ( int i = 2; i < alleles.size(); i++ ) {
alleleMapForTriPlus.put(alleles.get(i), i);

View File

@ -84,6 +84,8 @@ import java.util.*;
*/
class BCF2Writer extends IndexingVariantContextWriter {
final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
final private static List<Allele> MISSING_GENOTYPE = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
final private static boolean ALLOW_MISSING_CONTIG_LINES = false;
private final OutputStream outputStream; // Note: do not flush until completely done writing, to avoid issues with eventual BGZF support
private VCFHeader header;
@ -111,8 +113,12 @@ class BCF2Writer extends IndexingVariantContextWriter {
public void writeHeader(final VCFHeader header) {
// create the config offsets map
if ( header.getContigLines().isEmpty() ) {
logger.warn("No contig dictionary found in header, falling back to reference sequence dictionary");
createContigDictionary(VCFUtils.makeContigHeaderLines(getRefDict(), null));
if ( ALLOW_MISSING_CONTIG_LINES ) {
logger.warn("No contig dictionary found in header, falling back to reference sequence dictionary");
createContigDictionary(VCFUtils.makeContigHeaderLines(getRefDict(), null));
} else {
throw new UserException.MalformedBCF2("Cannot write BCF2 file with missing contig lines");
}
} else {
createContigDictionary(header.getContigLines());
}
@ -213,7 +219,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
final int nAlleles = vc.getNAlleles();
final int nInfo = vc.getAttributes().size();
final int nGenotypeFormatFields = getNGenotypeFormatFields(vc);
final int nSamples = vc.getNSamples();
final int nSamples = header.getNGenotypeSamples();
encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x0000FFFF), BCF2Type.INT32);
encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x00FFFFF), BCF2Type.INT32);
@ -256,10 +262,10 @@ class BCF2Writer extends IndexingVariantContextWriter {
private void buildAlleles( VariantContext vc ) throws IOException {
final boolean needsPadding = VariantContextUtils.needsPadding(vc);
for ( final Allele allele : vc.getAlleles() ) {
byte[] s = allele.getBases();
for ( Allele allele : vc.getAlleles() ) {
if ( needsPadding )
s = VariantContextUtils.padAllele(vc,allele).getBytes();
allele = VariantContextUtils.padAllele(vc,allele);
final byte[] s = allele.getDisplayBases();
encoder.encodeTypedString(s);
}
}
@ -298,7 +304,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
Genotype g = vc.getGenotype(name);
if ( g == null )
// we don't have any data about g at all
g = new GenotypeBuilder(name).make();
g = new GenotypeBuilder(name).alleles(MISSING_GENOTYPE).make();
writer.addGenotype(encoder, vc, g);
}
writer.done(encoder, vc);

View File

@ -44,7 +44,7 @@ class IntGenotypeFieldAccessors {
public IntGenotypeFieldAccessors() {
intGenotypeFieldEncoders.put(VCFConstants.DEPTH_KEY, new IntGenotypeFieldAccessors.DPAccessor());
intGenotypeFieldEncoders.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, new IntGenotypeFieldAccessors.ADAccessor());
intGenotypeFieldEncoders.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, new IntGenotypeFieldAccessors.PLAccessor());
intGenotypeFieldEncoders.put(VCFConstants.GENOTYPE_PL_KEY, new IntGenotypeFieldAccessors.PLAccessor());
intGenotypeFieldEncoders.put(VCFConstants.GENOTYPE_QUALITY_KEY, new IntGenotypeFieldAccessors.GQAccessor());
}

View File

@ -33,5 +33,6 @@ package org.broadinstitute.sting.utils.variantcontext.writer;
public enum Options {
INDEX_ON_THE_FLY,
DO_NOT_WRITE_GENOTYPES,
ALLOW_MISSING_FIELDS_IN_HEADER,
FORCE_BCF
}

View File

@ -27,9 +27,9 @@ package org.broadinstitute.sting.utils.variantcontext.writer;
import net.sf.samtools.SAMSequenceDictionary;
import org.broad.tribble.TribbleException;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.io.*;
@ -51,15 +51,17 @@ class VCFWriter extends IndexingVariantContextWriter {
// the VCF header we're storing
protected VCFHeader mHeader = null;
// were filters applied?
protected boolean filtersWereAppliedToContext = false;
final private boolean allowMissingFieldsInHeader;
private IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors();
public VCFWriter(final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) {
public VCFWriter(final File location, final OutputStream output, final SAMSequenceDictionary refDict,
final boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes,
final boolean allowMissingFieldsInHeader ) {
super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing);
mWriter = new BufferedWriter(new OutputStreamWriter(getOutputStream())); // todo -- fix buffer size
this.doNotWriteGenotypes = doNotWriteGenotypes;
this.allowMissingFieldsInHeader = allowMissingFieldsInHeader;
}
// --------------------------------------------------------------------------------
@ -73,13 +75,6 @@ class VCFWriter extends IndexingVariantContextWriter {
// note we need to update the mHeader object after this call because they header
// may have genotypes trimmed out of it, if doNotWriteGenotypes is true
mHeader = writeHeader(header, mWriter, doNotWriteGenotypes, getVersionLine(), getStreamName());
// determine if we use filters, so we should FORCE pass the records
// TODO -- this might not be necessary any longer as we have unfiltered, filtered, and PASS VCs
for ( final VCFHeaderLine line : header.getMetaData() ) {
if ( line instanceof VCFFilterHeaderLine)
filtersWereAppliedToContext = true;
}
}
public static final String getVersionLine() {
@ -166,7 +161,7 @@ class VCFWriter extends IndexingVariantContextWriter {
vc = new VariantContextBuilder(vc).noGenotypes().make();
try {
vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, false);
vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc);
super.add(vc);
Map<Allele, String> alleleMap = buildAlleleMap(vc);
@ -214,7 +209,7 @@ class VCFWriter extends IndexingVariantContextWriter {
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// FILTER
String filters = getFilterString(vc, filtersWereAppliedToContext);
String filters = getFilterString(vc);
mWriter.write(filters);
mWriter.write(VCFConstants.FIELD_SEPARATOR);
@ -222,6 +217,10 @@ class VCFWriter extends IndexingVariantContextWriter {
Map<String, String> infoFields = new TreeMap<String, String>();
for ( Map.Entry<String, Object> field : vc.getAttributes().entrySet() ) {
String key = field.getKey();
if ( ! mHeader.hasInfoLine(key) )
fieldIsMissingFromHeaderError(vc, key, "INFO");
String outputValue = formatVCFField(field.getValue());
if ( outputValue != null )
infoFields.put(key, outputValue);
@ -236,6 +235,10 @@ class VCFWriter extends IndexingVariantContextWriter {
} else {
List<String> genotypeAttributeKeys = calcVCFGenotypeKeys(vc, mHeader);
if ( ! genotypeAttributeKeys.isEmpty() ) {
for ( final String format : genotypeAttributeKeys )
if ( ! mHeader.hasFormatLine(format) )
fieldIsMissingFromHeaderError(vc, format, "FORMAT");
final String genotypeFormatString = ParsingUtils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys);
mWriter.write(VCFConstants.FIELD_SEPARATOR);
@ -270,12 +273,18 @@ class VCFWriter extends IndexingVariantContextWriter {
//
// --------------------------------------------------------------------------------
public static final String getFilterString(final VariantContext vc) {
return getFilterString(vc, false);
}
private final String getFilterString(final VariantContext vc) {
if ( vc.isFiltered() ) {
for ( final String filter : vc.getFilters() )
if ( ! mHeader.hasFilterLine(filter) )
fieldIsMissingFromHeaderError(vc, filter, "FILTER");
public static final String getFilterString(final VariantContext vc, boolean forcePASS) {
return vc.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters())) : (forcePASS || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
return ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters()));
}
else if ( vc.filtersWereApplied() )
return VCFConstants.PASSES_FILTERS_v4;
else
return VCFConstants.UNFILTERED;
}
private static final String QUAL_FORMAT_STRING = "%.2f";
@ -330,13 +339,13 @@ class VCFWriter extends IndexingVariantContextWriter {
*/
private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
throws IOException {
if ( ! mHeader.getGenotypeSamples().containsAll(vc.getSampleNames()) ) {
final List<String> badSampleNames = new ArrayList<String>();
for ( final Genotype g : vc.getGenotypes() )
if ( ! mHeader.getGenotypeSamples().contains(g.getSampleName()) )
badSampleNames.add(g.getSampleName());
throw new ReviewedStingException("BUG: VariantContext contains some samples not in the VCF header: bad samples are " + Utils.join(",",badSampleNames));
}
// if ( ! mHeader.getGenotypeSamples().containsAll(vc.getSampleNames()) ) {
// final List<String> badSampleNames = new ArrayList<String>();
// for ( final Genotype g : vc.getGenotypes() )
// if ( ! mHeader.getGenotypeSamples().contains(g.getSampleName()) )
// badSampleNames.add(g.getSampleName());
// throw new ReviewedStingException("BUG: VariantContext contains some samples not in the VCF header: bad samples are " + Utils.join(",",badSampleNames));
// }
for ( String sample : mHeader.getGenotypeSamples() ) {
mWriter.write(VCFConstants.FIELD_SEPARATOR);
@ -388,7 +397,7 @@ class VCFWriter extends IndexingVariantContextWriter {
// some exceptions
if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY ) ) {
val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : VCFConstants.PASSES_FILTERS_v4;
}
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(field);
@ -524,7 +533,7 @@ class VCFWriter extends IndexingVariantContextWriter {
if ( sawGoodQual ) keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);
if ( sawDP ) keys.add(VCFConstants.DEPTH_KEY);
if ( sawAD ) keys.add(VCFConstants.GENOTYPE_ALLELE_DEPTHS);
if ( sawPL ) keys.add(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
if ( sawPL ) keys.add(VCFConstants.GENOTYPE_PL_KEY);
if ( sawGenotypeFilter ) keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
List<String> sortedList = ParsingUtils.sortList(new ArrayList<String>(keys));
@ -553,4 +562,13 @@ class VCFWriter extends IndexingVariantContextWriter {
}
return count;
}
private final void fieldIsMissingFromHeaderError(final VariantContext vc, final String id, final String field) {
if ( !allowMissingFieldsInHeader)
throw new UserException.MalformedVCFHeader("Key " + id + " found in VariantContext field " + field
+ " at " + vc.getChr() + ":" + vc.getStart()
+ " but this key isn't defined in the VCFHeader. The GATK now requires all VCFs to have"
+ " complete VCF headers by default. This error can be disabled with the engine argument"
+ " --allowMissingVCFHeaders");
}
}

View File

@ -79,7 +79,8 @@ public class VariantContextWriterFactory {
else {
return new VCFWriter(location, output, refDict,
options.contains(Options.INDEX_ON_THE_FLY),
options.contains(Options.DO_NOT_WRITE_GENOTYPES));
options.contains(Options.DO_NOT_WRITE_GENOTYPES),
options.contains(Options.ALLOW_MISSING_FIELDS_IN_HEADER));
}
}

View File

@ -55,7 +55,7 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest {
public void unixFileTest() {
logger.warn("Executing unixFileTest");
fastaFile = new File(testDir + "exampleFASTA.fasta");
fastaFile = new File(publicTestDir + "exampleFASTA.fasta");
builder = new FastaSequenceIndexBuilder(fastaFile, false);
FastaSequenceIndex index = builder.createIndex();
controlIndex.add(new FastaSequenceIndexEntry("chr1", 6, 100000, 60, 61,0));
@ -72,7 +72,7 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest {
public void windowsFileTest() {
logger.warn("Executing windowsFileTest");
fastaFile = new File(testDir + "exampleFASTA-windows.fasta");
fastaFile = new File(publicTestDir + "exampleFASTA-windows.fasta");
builder = new FastaSequenceIndexBuilder(fastaFile, false);
FastaSequenceIndex index = builder.createIndex();
controlIndex.add(new FastaSequenceIndexEntry("chr2", 7, 29, 7, 9,0));
@ -88,7 +88,7 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest {
public void combinedWindowsUnix() {
logger.warn("Executing combinedWindowsUnix");
fastaFile = new File(testDir + "exampleFASTA-combined.fasta");
fastaFile = new File(publicTestDir + "exampleFASTA-combined.fasta");
builder = new FastaSequenceIndexBuilder(fastaFile, false);
FastaSequenceIndex index = builder.createIndex();
controlIndex.add(new FastaSequenceIndexEntry("chr1", 6, 100000, 60, 61,0));
@ -105,7 +105,7 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest {
public void threeVariableLengthContigs() {
logger.warn("Executing threeVariableLengthContigs");
fastaFile = new File(testDir + "exampleFASTA-3contigs.fasta");
fastaFile = new File(publicTestDir + "exampleFASTA-3contigs.fasta");
builder = new FastaSequenceIndexBuilder(fastaFile, false);
FastaSequenceIndex index = builder.createIndex();
controlIndex.add(new FastaSequenceIndexEntry("chr1", 6, 17, 5, 6,0));

View File

@ -87,10 +87,13 @@ public abstract class BaseTest {
private static final String networkTempDir;
private static final File networkTempDirFile;
protected static final String testDirRelative = "public/testdata/";
public static final File testDirFile = new File(testDirRelative);
public static final String testDir = testDirFile.getAbsolutePath() + "/";
protected static final String testDirRoot = testDir.replace(testDirRelative, "");
private static final String privateTestDirRelative = "private/testdata/";
public static final String privateTestDir = new File(privateTestDirRelative).getAbsolutePath() + "/";
protected static final String privateTestDirRoot = privateTestDir.replace(privateTestDirRelative, "");
private static final String publicTestDirRelative = "public/testdata/";
public static final String publicTestDir = new File(publicTestDirRelative).getAbsolutePath() + "/";
protected static final String publicTestDirRoot = publicTestDir.replace(publicTestDirRelative, "");
public static final String keysDataLocation = validationDataLocation + "keys/";
public static final String gatkKeyFile = CryptUtils.GATK_USER_KEY_DIRECTORY + "gsamembers_broadinstitute.org.key";
@ -277,7 +280,7 @@ public abstract class BaseTest {
Reporter.log(message, true);
}
private static final double DEFAULT_FLOAT_TOLERANCE = 1e-4;
private static final double DEFAULT_FLOAT_TOLERANCE = 1e-1;
public static final void assertEqualsDoubleSmart(final Object actual, final Double expected) {
Assert.assertTrue(actual instanceof Double);

View File

@ -48,7 +48,7 @@ public class MD5DB {
/**
* Subdirectory under the ant build directory where we store integration test md5 results
*/
private static final int MAX_RECORDS_TO_READ = 100000;
private static final int MAX_RECORDS_TO_READ = 1000000;
private static final int MAX_RAW_DIFFS_TO_SUMMARIZE = -1;
public static final String LOCAL_MD5_DB_DIR = "integrationtests";
public static final String GLOBAL_MD5_DB_DIR = "/humgen/gsa-hpprojects/GATK/data/integrationtests";

View File

@ -51,7 +51,7 @@ import java.text.SimpleDateFormat;
import java.util.*;
public class WalkerTest extends BaseTest {
private static final boolean GENERATE_SHADOW_BCF = false;
private static final boolean GENERATE_SHADOW_BCF = true;
private static final boolean ENABLE_PHONE_HOME_FOR_TESTS = false;
private static final boolean ENABLE_ON_THE_FLY_CHECK_FOR_VCF_INDEX = false;
@ -356,7 +356,7 @@ public class WalkerTest extends BaseTest {
System.out.println(String.format("[%s] Executing test %s with GATK arguments: %s", now, name, cmdline));
// also write the command line to the HTML log for convenient follow-up
// do the replaceAll so paths become relative to the current
BaseTest.log(cmdline.replaceAll(testDirRoot, ""));
BaseTest.log(cmdline.replaceAll(publicTestDirRoot, "").replaceAll(privateTestDirRoot, ""));
CommandLineExecutable.start(instance, command);
} catch (Exception e) {
gotAnException = true;

View File

@ -844,8 +844,8 @@ public class ParsingEngineUnitTest extends BaseTest {
Assert.assertEquals(argProvider.bindings.get(1).getName(), "foo2", "Name isn't set properly");
}
private final static String HISEQ_VCF = testDir + "HiSeq.10000.vcf";
private final static String TRANCHES_FILE = testDir + "tranches.6.txt";
private final static String HISEQ_VCF = privateTestDir + "HiSeq.10000.vcf";
private final static String TRANCHES_FILE = privateTestDir + "tranches.6.txt";
@Test
public void variantContextBindingTestDynamicTyping1() {

View File

@ -79,7 +79,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
*/
@Test
public void testSingleBinding() {
String fileName = testDir + "TabularDataTest.dat";
String fileName = privateTestDir + "TabularDataTest.dat";
RMDTriplet triplet = new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE,new Tags());
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(triplet,builder,seq.getSequenceDictionary(),genomeLocParser,false);
@ -101,7 +101,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
*/
@Test
public void testMultipleBinding() {
File file = new File(testDir + "TabularDataTest.dat");
File file = new File(privateTestDir + "TabularDataTest.dat");
RMDTriplet testTriplet1 = new RMDTriplet("tableTest1","Table",file.getAbsolutePath(),RMDStorageType.FILE,new Tags());
ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(testTriplet1,builder,seq.getSequenceDictionary(),genomeLocParser,false);

View File

@ -94,13 +94,13 @@ public class GATKBAMIndexUnitTest extends BaseTest {
@Test( expectedExceptions = UserException.MalformedFile.class )
public void testDetectTruncatedBamIndexWordBoundary() {
GATKBAMIndex index = new GATKBAMIndex(new File(testDir + "truncated_at_word_boundary.bai"));
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_word_boundary.bai"));
index.readReferenceSequence(0);
}
@Test( expectedExceptions = UserException.MalformedFile.class )
public void testDetectTruncatedBamIndexNonWordBoundary() {
GATKBAMIndex index = new GATKBAMIndex(new File(testDir + "truncated_at_non_word_boundary.bai"));
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_non_word_boundary.bai"));
index.readReferenceSequence(0);
}

View File

@ -68,10 +68,10 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
@BeforeMethod
public void setUp() {
String fileName = testDir + "TabularDataTest.dat";
String fileName = privateTestDir + "TabularDataTest.dat";
// check to see if we have an index, if so, delete it
File indexFileName = new File(testDir + "TabularDataTest.dat.idx");
File indexFileName = new File(privateTestDir + "TabularDataTest.dat.idx");
if (indexFileName.exists()) indexFileName.delete();
triplet = new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE,new Tags());

View File

@ -43,7 +43,7 @@ public class ReferenceOrderedQueryDataPoolUnitTest extends BaseTest{
@Test
public void testCloseFilePointers() throws IOException {
// Build up query parameters
File file = new File(BaseTest.validationDataLocation + "NA12878.hg19.example1.vcf");
File file = new File(BaseTest.privateTestDir + "NA12878.hg19.example1.vcf");
RMDTriplet triplet = new RMDTriplet("test", "VCF", file.getAbsolutePath(), RMDTriplet.RMDStorageType.FILE, new Tags());
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
GenomeLocParser parser = new GenomeLocParser(seq);

View File

@ -4,7 +4,6 @@ import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@ -188,7 +187,7 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest {
}
List<String> filterList = new ArrayList<String>();
filterList.add(testDir + "readgroupblacklisttest.txt");
filterList.add(privateTestDir + "readgroupblacklisttest.txt");
ReadGroupBlackListFilter filter = new ReadGroupBlackListFilter(filterList);
int filtered = 0;
@ -227,7 +226,7 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest {
}
List<String> filterList = new ArrayList<String>();
filterList.add(testDir + "readgroupblacklisttestlist.txt");
filterList.add(privateTestDir + "readgroupblacklisttestlist.txt");
ReadGroupBlackListFilter filter = new ReadGroupBlackListFilter(filterList);
int filtered = 0;

View File

@ -52,11 +52,11 @@ import java.util.*;
* UnitTests for RMD FeatureManager
*/
public class FeatureManagerUnitTest extends BaseTest {
private static final File RANDOM_FILE = new File(testDir + "exampleGATKReport.eval");
private static final File VCF3_FILE = new File(testDir + "vcfexample3.vcf");
private static final File VCF4_FILE = new File(testDir + "HiSeq.10000.vcf");
private static final File VCF4_FILE_GZ = new File(testDir + "HiSeq.10000.vcf.gz");
private static final File VCF4_FILE_BGZIP = new File(testDir + "HiSeq.10000.bgzip.vcf.gz");
private static final File RANDOM_FILE = new File(publicTestDir+ "exampleGATKReport.eval");
private static final File VCF3_FILE = new File(privateTestDir + "vcf3.vcf");
private static final File VCF4_FILE = new File(privateTestDir + "HiSeq.10000.vcf");
private static final File VCF4_FILE_GZ = new File(privateTestDir + "HiSeq.10000.vcf.gz");
private static final File VCF4_FILE_BGZIP = new File(privateTestDir + "HiSeq.10000.bgzip.vcf.gz");
private FeatureManager manager;
private GenomeLocParser genomeLocParser;

View File

@ -44,7 +44,7 @@ public class FeatureToGATKFeatureIteratorUnitTest extends BaseTest {
final String chr = "20";
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
GenomeLocParser parser = new GenomeLocParser(seq);
File file = new File(validationDataLocation + "NA12878.hg19.example1.vcf");
File file = new File(privateTestDir + "NA12878.hg19.example1.vcf");
VCFCodec codec = new VCFCodec();
TestFeatureReader reader = new TestFeatureReader(file.getAbsolutePath(), codec);
CheckableCloseableTribbleIterator<Feature> tribbleIterator = reader.query(chr, 1, 100000);

View File

@ -36,7 +36,7 @@ import java.io.PrintStream;
public class GATKReportUnitTest extends BaseTest {
@Test
public void testParse() throws Exception {
String reportPath = testDir + "exampleGATKReportv2.tbl";
String reportPath = publicTestDir + "exampleGATKReportv2.tbl";
GATKReport report = new GATKReport(reportPath);
Assert.assertEquals(report.getVersion(), GATKReportVersion.V1_1);
Assert.assertEquals(report.getTables().size(), 5);

View File

@ -20,7 +20,7 @@ import java.util.*;
public class SampleDBUnitTest extends BaseTest {
private static SampleDBBuilder builder;
// all the test sample files are located here
private File testPED = new File(testDir + "ceutrio.ped");
private File testPED = new File(privateTestDir + "ceutrio.ped");
private static final Set<Sample> testPEDSamples = new HashSet<Sample>(Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),

View File

@ -10,7 +10,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
public static String baseTestString(String reference, String VCF) {
return "-T CombineVariants" +
" -R " + reference +
" --variant:vcf " + testDir + VCF +
" --variant:vcf " + privateTestDir + VCF +
" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" +
" -genotypeMergeOptions REQUIRE_UNIQUE" +
" -setKey null" +
@ -19,7 +19,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
}
@Test
@Test(enabled = false)
public void test1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(b36KGReference, "symbolic_alleles_1.vcf"),
@ -28,7 +28,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
executeTest("Test symbolic alleles", spec);
}
@Test
@Test(enabled = false)
public void test2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(b36KGReference, "symbolic_alleles_2.vcf"),

View File

@ -36,7 +36,7 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-R " + hg18Reference +
" -T ClipReads " +
"-I " + testDir + "clippingReadsTest.withRG.bam " +
"-I " + privateTestDir + "clippingReadsTest.withRG.bam " +
"-os %s " +
"-o %s " + args,
2, // just one output file
@ -55,9 +55,9 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest {
@Test public void testClipRange2() { testClipper("clipRange2", "-CT 1-5,11-15", "be4fcad5b666a5540028b774169cbad7", "3061cf742f9e5526a61130128ae761a3"); }
@Test public void testClipSeq() { testClipper("clipSeqX", "-X CCCCC", "db199bd06561c9f2122f6ffb07941fbc", "b89459f373e40f0b835c1faff2208839"); }
@Test public void testClipSeqFile() { testClipper("clipSeqXF", "-XF " + testDir + "seqsToClip.fasta", "d011a3152b31822475afbe0281491f8d", "24e19116ef16a37a6d095ed5c22c2466"); }
@Test public void testClipSeqFile() { testClipper("clipSeqXF", "-XF " + privateTestDir + "seqsToClip.fasta", "d011a3152b31822475afbe0281491f8d", "24e19116ef16a37a6d095ed5c22c2466"); }
@Test public void testClipMulti() { testClipper("clipSeqMulti", "-QT 10 -CT 1-5 -XF " + testDir + "seqsToClip.fasta -X CCCCC", "a23187bd9bfb06557f799706d98441de", "ad8d30300cb43d5e300fcc4d2450da8e"); }
@Test public void testClipMulti() { testClipper("clipSeqMulti", "-QT 10 -CT 1-5 -XF " + privateTestDir + "seqsToClip.fasta -X CCCCC", "a23187bd9bfb06557f799706d98441de", "ad8d30300cb43d5e300fcc4d2450da8e"); }
@Test public void testClipNs() { testClipper("testClipNs", "-QT 10 -CR WRITE_NS", Q10ClipOutput, "57c05b6241db7110148a91fde2d431d0"); }
@Test public void testClipQ0s() { testClipper("testClipQs", "-QT 10 -CR WRITE_Q0S", Q10ClipOutput, "2a1a3153e0942ab355fd8a6e082b30e0"); }
@ -68,7 +68,7 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + hg18Reference +
" -T ClipReads" +
" -I " + testDir + "originalQuals.chr1.1-1K.bam" +
" -I " + privateTestDir + "originalQuals.chr1.1-1K.bam" +
" -L chr1:1-1,000" +
" -OQ -QT 4 -CR WRITE_Q0S" +
" -o %s -os %s",

View File

@ -47,7 +47,7 @@ public class PrintReadsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T PrintReads" +
" -R " + params.reference +
" -I " + testDir + params.bam +
" -I " + privateTestDir + params.bam +
params.args +
" -o %s",
Arrays.asList(params.md5));

View File

@ -15,40 +15,40 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testHasAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("dfa5dff09fa964b06da19c0f4aff6928"));
baseTestString() + " --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("55785745fe13ad81a2c4a14373d091f0"));
executeTest("test file has annotations, not asking for annotations, #1", spec);
}
@Test
public void testHasAnnotsNotAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("9914bd19f6235c550e5182e0f4591da6"));
baseTestString() + " --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("d6f749f8dbeb2d42c9effaff9fe571d7"));
executeTest("test file has annotations, not asking for annotations, #2", spec);
}
@Test
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("6a52ef10bb10d72cdd82a8f7afc2dd09"));
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("9084e6c7b1cec0f3a2c6d96711844d5e"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@Test
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("74d894fd31b449deffca88d0e465f01b"));
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("3dfabdcaa2648ac34380fb71860c42d3"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@Test
public void testNoAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("dd89dfa22f0e1d6760095e04f528d62a"));
baseTestString() + " --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("b85c1ea28194484b327fbe0add1b5685"));
executeTest("test file doesn't have annotations, not asking for annotations, #1", spec);
}
@ -57,96 +57,96 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
// the genotype annotations in this file are actually out of order. If you don't parse the genotypes
// they don't get reordered. It's a good test of the genotype ordering system.
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("542d9ed8290ef7868387af4127e0b5fa"));
baseTestString() + " --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("fe4d4e2484c4cf8b1cd50ad42cfe468e"));
executeTest("test file doesn't have annotations, not asking for annotations, #2", spec);
}
@Test
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("b1b32ed3b831c92c94258c8e4a60e8c9"));
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("043fc6205b0633edcd3fadc9e044800c"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@Test
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("a25eacb0ceea2c082af349f8d7776c8a"));
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("6fafb42d374a67ba4687a23078a126af"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@Test
public void testExcludeAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant:VCF3 " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("ef046909a6f6c6cb43653a255a99a014"));
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("639462a0e0fa79e33def5f011fe55961"));
executeTest("test exclude annotations", spec);
}
@Test
public void testOverwritingHeader() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + testDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
Arrays.asList("5c2fded3b6a96b0b0788086bbb2409ed"));
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
Arrays.asList("ebbf32f5b8b8d22f2eb247a0a3db3da0"));
executeTest("test overwriting header", spec);
}
@Test
public void testNoReads() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + testDir + "vcfexample3empty.vcf -L " + testDir + "vcfexample3empty.vcf", 1,
Arrays.asList("c590088d85edce786604fd600f5d5e75"));
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("afe6c9d3b4b80635a541cdfcfa48db2f"));
executeTest("not passing it any reads", spec);
}
@Test
public void testDBTagWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + testDir + "vcfexample3empty.vcf -L " + testDir + "vcfexample3empty.vcf", 1,
Arrays.asList("ade9354a4cdd6cc92c169f252fb36f3f"));
baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("21d696ea8c55d2fd4cbb4dcd5f7f7db6"));
executeTest("getting DB tag with dbSNP", spec);
}
@Test
public void testMultipleIdsWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + testDir + "vcfexample3withIDs.vcf -L " + testDir + "vcfexample3withIDs.vcf", 1,
Arrays.asList("f496f40e1e9efa743e3b473f6fe6e6d3"));
baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3withIDs.vcf -L " + privateTestDir + "vcfexample3withIDs.vcf", 1,
Arrays.asList("ef95394c14d5c16682a322f3dfb9000c"));
executeTest("adding multiple IDs with dbSNP", spec);
}
@Test
public void testDBTagWithHapMap() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --comp:H3 " + testDir + "fakeHM3.vcf -G Standard --variant " + testDir + "vcfexample3empty.vcf -L " + testDir + "vcfexample3empty.vcf", 1,
Arrays.asList("d383fbd741d604625c9507d4da1c5a27"));
baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("e6e276b7d517d57626c8409589cd286f"));
executeTest("getting DB tag with HM3", spec);
}
@Test
public void testNoQuals() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --variant " + testDir + "noQual.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L " + testDir + "noQual.vcf -A QualByDepth", 1,
Arrays.asList("4a247f039dfb16ac05b38a0dd5f98da6"));
baseTestString() + " --variant " + privateTestDir + "noQual.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L " + privateTestDir + "noQual.vcf -A QualByDepth", 1,
Arrays.asList("a99e8315571ed1b6bce942451b3d8612"));
executeTest("test file doesn't have QUALs", spec);
}
@Test
public void testUsingExpression() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + testDir + "targetAnnotations.vcf -G Standard --variant:VCF3 " + testDir + "vcfexample3empty.vcf -E foo.AF -L " + testDir + "vcfexample3empty.vcf", 1,
Arrays.asList("067792efcffea93ade632e52a80d0d8f"));
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.AF -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("7d6ea3b54210620cbc7e14dad8836bcb"));
executeTest("using expression", spec);
}
@Test
public void testUsingExpressionWithID() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + testDir + "targetAnnotations.vcf -G Standard --variant:VCF3 " + testDir + "vcfexample3empty.vcf -E foo.ID -L " + testDir + "vcfexample3empty.vcf", 1,
Arrays.asList("66c68deb0508348324eb47d524e756de"));
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.ID -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("35ce4fb0288dfc5c01ec6ce8b14c6157"));
executeTest("using expression with ID", spec);
}
@ -189,8 +189,8 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testTDTAnnotation() {
final String MD5 = "81f85f0ce8cc36df7c717c478e100ba1";
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + testDir + "ug.random50000.subset300bp.chr1.family.vcf" +
" -L " + testDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + testDir + "ug.random50000.family.ped -o %s", 1,
"-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf" +
" -L " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + privateTestDir + "ug.random50000.family.ped -o %s", 1,
Arrays.asList(MD5));
executeTest("Testing TDT annotation ", spec);
}
@ -200,8 +200,8 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testChromosomeCountsPed() {
final String MD5 = "9830fe2247651377e68ad0b0894e9a4e";
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + b37KGReference + " -A ChromosomeCounts --variant:vcf " + testDir + "ug.random50000.subset300bp.chr1.family.vcf" +
" -L " + testDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + testDir + "ug.random50000.family.ped -o %s", 1,
"-T VariantAnnotator -R " + b37KGReference + " -A ChromosomeCounts --variant:vcf " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf" +
" -L " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + privateTestDir + "ug.random50000.family.ped -o %s", 1,
Arrays.asList(MD5));
executeTest("Testing ChromosomeCounts annotation with PED file", spec);
}
@ -210,8 +210,8 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testInbreedingCoeffPed() {
final String MD5 = "e94d589b5691e3ecfd9cc9475a384890";
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + b37KGReference + " -A InbreedingCoeff --variant:vcf " + testDir + "ug.random50000.subset300bp.chr1.family.vcf" +
" -L " + testDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + testDir + "ug.random50000.family.ped -o %s", 1,
"-T VariantAnnotator -R " + b37KGReference + " -A InbreedingCoeff --variant:vcf " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf" +
" -L " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + privateTestDir + "ug.random50000.family.ped -o %s", 1,
Arrays.asList(MD5));
executeTest("Testing InbreedingCoeff annotation with PED file", spec);
}

View File

@ -32,7 +32,7 @@ import java.util.Arrays;
public class BeagleIntegrationTest extends WalkerTest {
private static final String beagleValidationDataLocation = testDir + "/Beagle/";
private static final String beagleValidationDataLocation = privateTestDir + "/Beagle/";
@Test
public void testBeagleOutput() {
WalkerTestSpec spec = new WalkerTestSpec(
@ -41,7 +41,8 @@ public class BeagleIntegrationTest extends WalkerTest {
"--beagleR2:BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " +
"--beagleProbs:BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " +
"--beaglePhased:BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " +
"-o %s --no_cmdline_in_header", 1, Arrays.asList("cdbf8cc557f5be9ac778e52338c0d906"));
"-o %s --no_cmdline_in_header --allowMissingVCFHeaders", 1, Arrays.asList("c5522304abf0633041c7772dd7dafcea"));
spec.disableShadowBCF();
executeTest("test BeagleOutputToVCF", spec);
}
@ -50,7 +51,8 @@ public class BeagleIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T ProduceBeagleInput -R " + hg19Reference + " " +
"--variant:VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " +
"-o %s", 1, Arrays.asList("f301b089d21da259873f04bdc468835d"));
"-o %s --allowMissingVCFHeaders", 1, Arrays.asList("f301b089d21da259873f04bdc468835d"));
spec.disableShadowBCF();
executeTest("test BeagleInput", spec);
}
@ -59,8 +61,9 @@ public class BeagleIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T ProduceBeagleInput --variant:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+
"--validation:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_OMNI_chr22_14-16m.vcf "+
"-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta --no_cmdline_in_header ",2,
"-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 --allowMissingVCFHeaders -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta --no_cmdline_in_header ",2,
Arrays.asList("660986891b30cdc937e0f2a3a5743faa","4b6417f892ccfe5c63b8a60cb0ef3740"));
spec.disableShadowBCF();
executeTest("test BeagleInputWithBootstrap",spec);
}
@ -72,8 +75,8 @@ public class BeagleIntegrationTest extends WalkerTest {
"--beagleR2:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+
"--beagleProbs:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+
"--beaglePhased:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+
"-L 20:1-70000 -o %s --no_cmdline_in_header ",1,Arrays.asList("8c05bda0630155bcd0ebaf155ed5e491"));
"-L 20:1-70000 -o %s --no_cmdline_in_header --allowMissingVCFHeaders",1,Arrays.asList("fbbbebfda35bab3f6f62eea2f0be1c01"));
spec.disableShadowBCF();
executeTest("testBeagleChangesSitesToRef",spec);
}

View File

@ -52,10 +52,10 @@ public class DiffObjectsIntegrationTest extends WalkerTest {
@DataProvider(name = "data")
public Object[][] createData() {
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", true, "bf7ef17436a7eccf27be41a9477904f6");
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", true, "3f46f5a964f7c34015d972256fe49a35");
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", false, "8ab29169cff232e670db9a4c54fc4358");
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", false, "47bf16c27c9e2c657a7e1d13f20880c9");
new TestParams(privateTestDir + "diffTestMaster.vcf", privateTestDir + "diffTestTest.vcf", true, "aea3d5df32a2acd400da48d06b4dbc60");
new TestParams(publicTestDir + "exampleBAM.bam", publicTestDir + "exampleBAM.simple.bam", true, "3f46f5a964f7c34015d972256fe49a35");
new TestParams(privateTestDir + "diffTestMaster.vcf", privateTestDir + "diffTestTest.vcf", false, "e71e23e7ebfbe768e59527bc62f8918d");
new TestParams(publicTestDir + "exampleBAM.bam", publicTestDir + "exampleBAM.simple.bam", false, "47bf16c27c9e2c657a7e1d13f20880c9");
return TestParams.getTests(TestParams.class);
}

View File

@ -29,8 +29,8 @@ package org.broadinstitute.sting.gatk.walkers.diffengine;
// the imports for unit testing.
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
@ -45,8 +45,8 @@ import java.util.*;
public class DiffableReaderUnitTest extends BaseTest {
DiffEngine engine;
File vcfFile = new File(testDir + "diffTestMaster.vcf");
File bamFile = new File(testDir + "exampleBAM.bam");
File vcfFile = new File(privateTestDir + "diffTestMaster.vcf");
File bamFile = new File(publicTestDir + "exampleBAM.bam");
@BeforeClass(enabled = true)
public void createDiffEngine() {
@ -97,7 +97,7 @@ public class DiffableReaderUnitTest extends BaseTest {
testLeaf(rec1, "REF", Allele.create("G", true));
testLeaf(rec1, "ALT", Arrays.asList(Allele.create("A")));
testLeaf(rec1, "QUAL", 0.15);
testLeaf(rec1, "FILTER", Collections.<Object>emptySet());
testLeaf(rec1, "FILTER", VCFConstants.PASSES_FILTERS_v4);
testLeaf(rec1, "AC", "2");
testLeaf(rec1, "AF", "1.00");
testLeaf(rec1, "AN", "2");

View File

@ -15,88 +15,88 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
@Test
public void testNoAction() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("dfa5dff09fa964b06da19c0f4aff6928"));
baseTestString() + " --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("fbf88e25df30181ca5422a374c7b36fa"));
executeTest("test no action", spec);
}
@Test
public void testClusteredSnps() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -window 10 --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("4a4596929f9fe983d8868ca142567781"));
baseTestString() + " -window 10 --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("bb69f49e9ef0054f0ccd6d38f5ffa46a"));
executeTest("test clustered SNPs", spec);
}
@Test
public void testMask1() {
WalkerTestSpec spec1 = new WalkerTestSpec(
baseTestString() + " -maskName foo --mask:VCF3 " + testDir + "vcfexample2.vcf --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("1719462cd17986c33e59e45b69df0270"));
baseTestString() + " -maskName foo --mask " + privateTestDir + "vcfexample2.vcf --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("7e3225a32fcd6066901247992b2c5ca8"));
executeTest("test mask all", spec1);
}
@Test
public void testMask2() {
WalkerTestSpec spec2 = new WalkerTestSpec(
baseTestString() + " -maskName foo --mask:VCF " + testDir + "vcfMask.vcf --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("db19ff7d90c82cda09fb3c3878100eb5"));
baseTestString() + " -maskName foo --mask:VCF " + privateTestDir + "vcfMask.vcf --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("3485fe95e3f0864c3575baf05cef4bcc"));
executeTest("test mask some", spec2);
}
@Test
public void testMask3() {
WalkerTestSpec spec3 = new WalkerTestSpec(
baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + testDir + "vcfMask.vcf --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("a9e417cba21585c786d4b9930265ea31"));
baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + privateTestDir + "vcfMask.vcf --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("367ab9c028a68e4eda2055e3bb8b486c"));
executeTest("test mask extend", spec3);
}
@Test
public void testFilter1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("4160904b180d1f62a6bf50de6728ce00"));
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("5a10d969e50a58d8dfbf1da54bf293df"));
executeTest("test filter #1", spec);
}
@Test
public void testFilter2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("df80db30c7836731ac7c8c3d4fc005b4"));
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("886dbbca2350083819ff67224f6efbd6"));
executeTest("test filter #2", spec);
}
@Test
public void testFilterWithSeparateNames() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("71ce6c0952831cb68f575aa0173dce2b"));
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("ee78c2e7128a8f9549233493c7cf6949"));
executeTest("test filter with separate names #2", spec);
}
@Test
public void testGenotypeFilters1() {
WalkerTestSpec spec1 = new WalkerTestSpec(
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("179f7f2a90c0e6c656109aac9b775476"));
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("285dd348c47c8c1e85d2886f9b33559e"));
executeTest("test genotype filter #1", spec1);
}
@Test
public void testGenotypeFilters2() {
WalkerTestSpec spec2 = new WalkerTestSpec(
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("22e07c27feb9017a130dfb045c5b29b9"));
baseTestString() + " -G_filter 'isHomVar == 1' -G_filterName foo --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("a9c835a13eb72aa22d5e271894d8ac33"));
executeTest("test genotype filter #2", spec2);
}
@Test
public void testDeletions() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo --variant:VCF " + testDir + "twoDeletions.vcf", 1,
Arrays.asList("637256ee5348c1c57f1dadf581b06ed9"));
baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo --variant:VCF " + privateTestDir + "twoDeletions.vcf", 1,
Arrays.asList("a1c02a5a90f1262e9eb3d2cad1fd08f2"));
executeTest("test deletions", spec);
}
}

View File

@ -28,23 +28,23 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("1c6ea045819b151bcd9d98947c5d4c4d"));
Arrays.asList("f98c38defc8d619609399b4a3ba874e8"));
executeTest("test MultiSample Pilot1", spec);
}
@Test
public void testWithAllelesPassedIn1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("c09dfbfc5b76acacb616730eaa83a150"));
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("6f0c49b76225e2099c74015b6f79c96d"));
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
}
@Test
public void testWithAllelesPassedIn2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("c51d037e0b1cd0ed3a1cd6c6b29646cf"));
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("42c4e37e0f130dc796231003638a197c"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
}
@ -52,23 +52,23 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("0a085eac119c91d63fdd4a7e9a5e45af"));
Arrays.asList("736607ee529b5624a3ab5521ab9e1b35"));
executeTest("test SingleSample Pilot2", spec);
}
@Test
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + testDir + "multiallelic.snps.bam -o %s -L " + testDir + "multiallelic.snps.intervals", 1,
Arrays.asList("bdbb67743c9f75ac60d0a10f94856361"));
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("f33507add5d5c30448948906467dd3f3"));
executeTest("test Multiple SNP alleles", spec);
}
@Test
public void testBadRead() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH -I " + testDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
Arrays.asList("bf60763a6e9c9d3987cfbac43b941a48"));
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
Arrays.asList("d915535c1458733f09f82670092fcab6"));
executeTest("test bad read", spec);
}
@ -76,7 +76,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testReverseTrim() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
Arrays.asList("1e991a6a7288be7ac603ef6467fb1ac2"));
Arrays.asList("0d724551e00129730b95fd4d70faaa58"));
executeTest("test reverse trim", spec);
}
@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "3136826ec99366b0285b278aba35cec1";
private final static String COMPRESSED_OUTPUT_MD5 = "fe3429b736c50bb770e40c0320d498ed";
@Test
public void testCompressedOutput() {
@ -107,7 +107,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
String md5 = "7824468b8290ffb7795a1ec3e493c1a4";
String md5 = "306943dd63111e2e64388cd2e2de6c01";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@ -139,7 +139,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinBaseQualityScore() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
Arrays.asList("86121f5094f26c8b2e320c1f5dea4ae3"));
Arrays.asList("b341b87742848a3224115fe94e73f244"));
executeTest("test min_base_quality_score 26", spec);
}
@ -147,7 +147,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSLOD() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("fe4a96f0049edd466c030def4c62a224"));
Arrays.asList("acb5332a267927d78edd51d93685111c"));
executeTest("test SLOD", spec);
}
@ -155,7 +155,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testNDA() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("ca8d0d91fd0cef93d4a606dec84a7986"));
Arrays.asList("74779b59730962bdf36a7a8ef84ac24d"));
executeTest("test NDA", spec);
}
@ -163,23 +163,23 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testCompTrack() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("7c3518d356c05c6b9a8918357c260bfe"));
Arrays.asList("036edf58a4ed6c626f53bd2ab34b9f97"));
executeTest("test using comp track", spec);
}
@Test
public void testOutputParameterSitesOnly() {
testOutputParameters("-sites_only", "fe204cef499e5aceb2732ba2e45903ad");
testOutputParameters("-sites_only", "52b8336f347d182c158e8384b78f5a6d");
}
@Test
public void testOutputParameterAllConfident() {
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "1ab8b68891d1531923a40d594250e8e0");
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "281363e6afb3260143bfdb22710e3d0e");
}
@Test
public void testOutputParameterAllSites() {
testOutputParameters("--output_mode EMIT_ALL_SITES", "ab179ef6ece3ab9e6b1ff5800cb89ebd");
testOutputParameters("--output_mode EMIT_ALL_SITES", "a802b672850b6fbc2764611d3ad071d9");
}
private void testOutputParameters(final String args, final String md5) {
@ -193,7 +193,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("a19c6195211b0ff036c746c7e11490ed"));
Arrays.asList("99ef7ba1747c7289ce1f963130539e18"));
executeTest("test confidence 1", spec1);
}
@ -201,7 +201,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("3fc3c36edaac133b4c11b20a5af915c4"));
Arrays.asList("99ef7ba1747c7289ce1f963130539e18"));
executeTest("test confidence 2", spec2);
}
@ -212,12 +212,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// --------------------------------------------------------------------------------------------------------------
@Test
public void testHeterozyosity1() {
testHeterozosity( 0.01, "82caf6c25d3aeabf7978016474e04fd0" );
testHeterozosity( 0.01, "7e1681b9052e357ca4a065fa76c8afb6" );
}
@Test
public void testHeterozyosity2() {
testHeterozosity( 1.0 / 1850, "d2a7ba1fa2d1a4153f685f3b3f6d55a2" );
testHeterozosity( 1.0 / 1850, "68a12f3eccac6cf4b27b6424f23628ee" );
}
private void testHeterozosity(final double arg, final String md5) {
@ -241,7 +241,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("b574087efc5b259f69c429f1f415da0a"));
Arrays.asList("b098a7744a448cf91a50886e4cc7d268"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -260,7 +260,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("1b9556725b6a2cb52ad6745e9eca37e6"));
Arrays.asList("2f008169b82d542ec9cc94908c395a0f"));
executeTest(String.format("test calling with BAQ"), spec);
}
@ -279,7 +279,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("9388a1216957c4722fe54af06a05f242"));
Arrays.asList("0d4177d7f963f4b4e8568613e7a468f0"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -294,7 +294,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("8f942000baaf522fcea29691fe5ef75d"));
Arrays.asList("1268bde77842e6bb6a4f337c1d589f4d"));
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
}
@ -307,7 +307,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("4b8822ccc9ac04bee37bf0c9922108f9"));
Arrays.asList("181c4ed8dd084b83f8de92123bb85c41"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -315,9 +315,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testWithIndelAllelesPassedIn1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("13160041d8ebfb2080981f89e39eeb4f"));
Arrays.asList("5250cefb1fff262a6a3985dee29c154d"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
}
@ -325,9 +325,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithIndelAllelesPassedIn2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ testDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("59e874d76e42eafd98ad961eb70706bc"));
Arrays.asList("c9b468fe75e7215a6d6d5a050af07918"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
}
@ -335,13 +335,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSampleIndels1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("eaef9cc984a95b5ccb4c4c1f7c20c235"));
Arrays.asList("01fd223deb4f88fb7d9ee9736b664d8a"));
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("b4df2bf0d820c6fc11fabcafe18bb769"));
Arrays.asList("c48c8a1a8ec88c6f3c99187e08496ae0"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
}
@ -349,9 +349,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testGGAwithNoEvidenceInReads() {
final String vcf = "small.indel.test.vcf";
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + testDir + vcf + " -I " + validationDataLocation +
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + privateTestDir + vcf + " -I " + validationDataLocation +
"NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
Arrays.asList("95226301a014347efc90e5f750a0db60"));
Arrays.asList("db0f91abb901e097714d8755058e1319"));
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
}
@ -384,7 +384,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction0() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
Arrays.asList("a3ea0eea74f2031ebb2ea0edfa14c945"));
Arrays.asList("25465c6dd3c4845f61b0f8e383388824"));
executeTest("test minIndelFraction 0.0", spec);
}
@ -392,7 +392,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction25() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
Arrays.asList("a3741b9de95e5858640220d62a0d318c"));
Arrays.asList("aa58dc9f77132c30363562bcdc321f6e"));
executeTest("test minIndelFraction 0.25", spec);
}
@ -400,7 +400,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction100() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 1", 1,
Arrays.asList("c1911f6ede7b4e8e83209ead66329596"));
Arrays.asList("3f07efb768e08650a7ce333edd4f9a52"));
executeTest("test minIndelFraction 1.0", spec);
}
}

View File

@ -44,7 +44,7 @@ public class RealignerTargetCreatorIntegrationTest extends WalkerTest {
@Test
public void testKnownsOnly() {
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
"-T RealignerTargetCreator -R " + b36KGReference + " --known " + testDir + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -L " + testDir + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -o %s",
"-T RealignerTargetCreator -R " + b36KGReference + " --known " + privateTestDir + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -L " + privateTestDir + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -o %s",
1,
Arrays.asList("5206cee6c01b299417bf2feeb8b3dc96"));
executeTest("test rods only", spec3);

View File

@ -6,7 +6,7 @@ import org.testng.annotations.Test;
import java.util.Arrays;
public class PhaseByTransmissionIntegrationTest extends WalkerTest {
private static String phaseByTransmissionTestDataRoot = testDir + "PhaseByTransmission/";
private static String phaseByTransmissionTestDataRoot = privateTestDir + "PhaseByTransmission/";
private static String goodFamilyFile = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.goodFamilies.ped";
private static String TNTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.TN.vcf";
private static String TPTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.TP.vcf";

View File

@ -11,7 +11,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
return "-T ReadBackedPhasing" +
" -R " + reference +
" -I " + validationDataLocation + reads +
" --variant " + validationDataLocation + VCF +
" --variant " + ( VCF.contains("phasing_test") ? privateTestDir : validationDataLocation) + VCF +
" --cacheWindowSize " + cacheWindowSize +
" --maxPhaseSites " + maxPhaseSites +
" --phaseQualityThresh " + phaseQualityThresh +
@ -26,7 +26,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
+ " -L chr20:332341-382503",
1,
Arrays.asList("0a41b96b04a87fdb99bc3342d48d2eba"));
Arrays.asList("442c819569417c1b7d6be9f41ce05394"));
executeTest("MAX 10 het sites [TEST ONE]; require PQ >= 10", spec);
}
@ -36,7 +36,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
+ " -L chr20:1232503-1332503",
1,
Arrays.asList("f7517896c899a872c24d8e823ac9deae"));
Arrays.asList("2a51ee7d3c024f2410dcee40c5412993"));
executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec);
}
@ -46,7 +46,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30)
+ " -L chr20:332341-382503",
1,
Arrays.asList("cdbdd2f68c232012b6fe9a322b0ea24c"));
Arrays.asList("85bc9b03e24159f746dbd0cb988f9ec8"));
executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec);
}
@ -56,7 +56,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100)
+ " -L chr20:332341-382503",
1,
Arrays.asList("6b70e3e4e28f9583d35d98bf8a7d0d59"));
Arrays.asList("96bb413a83c777ebbe622438e4565e8f"));
executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec);
}
@ -66,7 +66,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10)
+ " -L chr20:332341-482503",
1,
Arrays.asList("6163a1fba27532da77765a7a11c55332"));
Arrays.asList("7d2402f055d243e2208db9ea47973e13"));
executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec);
}
@ -76,7 +76,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
+ " -L chr20:652810-681757",
1,
Arrays.asList("61a7d05f9eb4317cf0e6937d72e1e7ec"));
Arrays.asList("72682b3f27c33580d2d4515653ba6de7"));
executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec);
}

View File

@ -271,7 +271,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
" -knownSites:anyNameABCD,VCF3 " + testDir + "vcfexample3.vcf" +
" -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf" +
" -T CountCovariates" +
" -I " + bam +
" -knownSites " + b36dbSNP129 +

View File

@ -17,8 +17,8 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
@Test(enabled=true)
public void testWikiExample() {
String siteVCF = validationDataLocation + "sites_to_validate.vcf";
String maskVCF = testDir + "amplicon_mask_sites.vcf";
String intervalTable = testDir + "amplicon_interval_table1.table";
String maskVCF = privateTestDir + "amplicon_mask_sites.vcf";
String intervalTable = privateTestDir + "amplicon_interval_table1.table";
String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons --ValidateAlleles:VCF "+siteVCF+" -o %s";
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
testArgs += " --virtualPrimerSize 30";
@ -29,9 +29,9 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
@Test(enabled=true)
public void testWikiExampleNoBWA() {
String siteVCF = testDir + "sites_to_validate.vcf";
String maskVCF = testDir + "amplicon_mask_sites.vcf";
String intervalTable = testDir + "amplicon_interval_table1.table";
String siteVCF = privateTestDir + "sites_to_validate.vcf";
String maskVCF = privateTestDir + "amplicon_mask_sites.vcf";
String intervalTable = privateTestDir + "amplicon_interval_table1.table";
String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons --ValidateAlleles:VCF "+siteVCF+" -o %s";
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
testArgs += " --virtualPrimerSize 30 --doNotUseBWA";
@ -42,9 +42,9 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
@Test(enabled=true)
public void testWikiExampleMonoFilter() {
String siteVCF = testDir + "sites_to_validate.vcf";
String maskVCF = testDir + "amplicon_mask_sites.vcf";
String intervalTable = testDir + "amplicon_interval_table1.table";
String siteVCF = privateTestDir + "sites_to_validate.vcf";
String maskVCF = privateTestDir + "amplicon_mask_sites.vcf";
String intervalTable = privateTestDir + "amplicon_interval_table1.table";
String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons --ValidateAlleles:VCF "+siteVCF+" -o %s";
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
testArgs += " --virtualPrimerSize 30 --filterMonomorphic";

View File

@ -303,7 +303,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
String tests = cmdRoot +
" --dbsnp " + b36dbSNP129 +
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
" --comp:comp_genotypes " + testDir + "yri.trio.gatk.ug.head.vcf";
" --comp:comp_genotypes " + privateTestDir + "yri.trio.gatk.ug.head.vcf";
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
1, Arrays.asList("4b9dcbce0717285e3c0c736c1bed744c"));
executeTestParallel("testSelect1", spec);
@ -343,7 +343,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test(enabled = false) // no longer supported in the GATK
public void testTranches() {
String extraArgs = "-T VariantEval -R "+ hg18Reference +" --eval " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt";
String extraArgs = "-T VariantEval -R "+ hg18Reference +" --eval " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + privateTestDir + "tranches.6.txt";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6af2b9959aa1778a5b712536de453952"));
executeTestParallel("testTranches",spec);
}
@ -530,11 +530,11 @@ public class VariantEvalIntegrationTest extends WalkerTest {
buildCommandLine(
"-T VariantEval",
"-R " + b37KGReference,
"-eval " + testDir + "/withSymbolic.b37.vcf",
"-eval " + privateTestDir + "/withSymbolic.b37.vcf",
"-noEV",
"-EV CountVariants",
"-noST",
"-stratIntervals " + testDir + "/overlapTest.bed",
"-stratIntervals " + privateTestDir + "/overlapTest.bed",
"-ST IntervalStratification",
"-L 20",
"-o %s"
@ -602,7 +602,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
buildCommandLine(
"-T VariantEval",
"-R " + b37KGReference,
"-eval " + testDir + "/ac0.vcf",
"-eval " + privateTestDir + "/ac0.vcf",
"-L 20:81006 -noST -noEV -EV VariantSummary -o %s" + (includeAC0 ? " -keepAC0" : "")
),
1,

View File

@ -45,10 +45,10 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
private static int N_VARIANTS = 100;
VariantDatum[] variantData1 = new VariantDatum[N_VARIANTS];
private final File QUAL_DATA = new File(testDir + "tranches.raw.dat");
private final File QUAL_DATA = new File(privateTestDir + "tranches.raw.dat");
private final double[] TRUTH_SENSITIVITY_CUTS = new double[]{99.9, 99.0, 97.0, 95.0};
private final File EXPECTED_TRANCHES_NEW = new File(testDir + "tranches.6.txt");
private final File EXPECTED_TRANCHES_OLD = new File(testDir + "tranches.4.txt");
private final File EXPECTED_TRANCHES_NEW = new File(privateTestDir + "tranches.6.txt");
private final File EXPECTED_TRANCHES_OLD = new File(privateTestDir + "tranches.4.txt");
private ArrayList<VariantDatum> readData() {
ArrayList<VariantDatum> vd = new ArrayList<VariantDatum>();

View File

@ -1,6 +1,5 @@
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.broadinstitute.sting.MD5DB;
import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
import org.testng.annotations.DataProvider;
@ -27,7 +26,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
"0ddd1e0e483d2eaf56004615cea23ec7", // tranches
"6e1f98bb819ccf03e17a2288742160d3", // recal file
"b9709e4180e56abc691b208bd3e8626c", // recal file
"c58ff4140e8914f0b656ed625c7f73b9"); // cut VCF
@DataProvider(name = "VRTest")
@ -75,7 +74,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf",
"da4458d05f6396f5c4ab96f274e5ccdc", // tranches
"8e2417336fa62e6c4d9f61b6deebdd82", // recal file
"a04a9001f62eff43d363f4d63769f3ee", // recal file
"05e88052e0798f1c1e83f0a8938bce56"); // cut VCF
@DataProvider(name = "VRIndelTest")
@ -129,11 +128,11 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
" -L 20:1000100-1000500" +
" -mode BOTH" +
" --no_cmdline_in_header" +
" -input " + testDir + "VQSR.mixedTest.input" +
" -input " + privateTestDir + "VQSR.mixedTest.input" +
" -o %s" +
" -tranchesFile " + testDir + "VQSR.mixedTest.tranches" +
" -recalFile " + testDir + "VQSR.mixedTest.recal",
Arrays.asList("1370d7701a6231633d43a8062b7aff7f"));
" -tranchesFile " + privateTestDir + "VQSR.mixedTest.tranches" +
" -recalFile " + privateTestDir + "VQSR.mixedTest.recal",
Arrays.asList("d670c684f73e2744b6c01738a01d5ec4"));
executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
}
}

View File

@ -34,8 +34,23 @@ import java.util.Arrays;
* Tests CombineVariants
*/
public class CombineVariantsIntegrationTest extends WalkerTest {
public static String baseTestString(String args) {
return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -R " + b36KGReference + args;
//
// TODO TODO TODO TODO TODO TODO TODO TODO
// TODO TODO TODO TODO TODO TODO TODO TODO
//
// TODO WHEN THE HC EMITS VALID VCF HEADERS ENABLE BCF AND REMOVE allowMissingVCFHeaders ARGUMENTS
//
// TODO TODO TODO TODO TODO TODO TODO TODO
// TODO TODO TODO TODO TODO TODO TODO TODO
// TODO TODO TODO TODO TODO TODO TODO TODO
//
private static String baseTestString(String args) {
return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s --allowMissingVCFHeaders -R " + b36KGReference + args;
}
private void cvExecuteTest(final String name, final WalkerTestSpec spec) {
spec.disableShadowBCF();
executeTest(name, spec);
}
public void test1InOut(String file, String md5) {
@ -47,7 +62,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
baseTestString(" -priority v1 -V:v1 " + validationDataLocation + file + args),
1,
Arrays.asList(md5));
executeTest("testInOut1--" + file, spec);
cvExecuteTest("testInOut1--" + file, spec);
}
public void combine2(String file1, String file2, String args, String md5) {
@ -55,7 +70,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
baseTestString(" -priority v1,v2 -V:v1 " + validationDataLocation + file1 + " -V:v2 "+ validationDataLocation + file2 + args),
1,
Arrays.asList(md5));
executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
cvExecuteTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
}
public void combineSites(String args, String md5) {
@ -67,15 +82,15 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
+ " -V:hm3 " + validationDataLocation + file2 + args,
1,
Arrays.asList(md5));
executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
cvExecuteTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
}
public void combinePLs(String file1, String file2, String md5) {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CombineVariants --no_cmdline_in_header -o %s -R " + b36KGReference + " -priority v1,v2 -V:v1 " + validationDataLocation + file1 + " -V:v2 " + validationDataLocation + file2,
"-T CombineVariants --no_cmdline_in_header -o %s -R " + b36KGReference + " -priority v1,v2 -V:v1 " + privateTestDir + file1 + " -V:v2 " + privateTestDir + file2,
1,
Arrays.asList(md5));
executeTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
cvExecuteTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
}
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "6469fce8a5cd5a0f77e5ac5d9e9e192b"); }
@ -86,7 +101,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ef2d249ea4b25311966e038aac05c661"); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "cdb448aaa92ca5a9e393d875b42581b3"); }
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "284083f60792c5f817899445dfa63a42"); }
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "f0ce3fb83d4ad9ba402d7cb11cd000c3"); }
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format
@ -110,8 +125,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
" -genotypeMergeOptions UNIQUIFY -L 1"),
1,
Arrays.asList("c0d4d601aa5d2b29927c535868448d2a"));
executeTest("threeWayWithRefs", spec);
Arrays.asList("3039cfff7abee6aa7fbbafec66a1b019"));
cvExecuteTest("threeWayWithRefs", spec);
}
// complex examples with filtering, indels, and multiple alleles
@ -120,17 +135,17 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
String file2 = "combine.2.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T CombineVariants --no_cmdline_in_header -o %s -R " + b37KGReference
+ " -V:one " + validationDataLocation + file1
+ " -V:two " + validationDataLocation + file2 + args,
+ " -V:one " + privateTestDir + file1
+ " -V:two " + privateTestDir + file2 + args,
1,
Arrays.asList(md5));
executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
cvExecuteTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
}
@Test public void complexTestFull() { combineComplexSites("", "7d587bf49bbc9f8239476bab84bf9708"); }
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "4d1e0c12d95f50e472493fc14af3cc06"); }
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "9a98b01b9b2a28ae6af3125edc131dea"); }
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "9a98b01b9b2a28ae6af3125edc131dea"); }
@Test public void complexTestFull() { combineComplexSites("", "8b19b54516b59de40992f0c4b328258a"); }
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "a38dd097adc37420fe36ef8be14cfded"); }
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "a3957dac9a617f50ce2668607e3baef0"); }
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "a3957dac9a617f50ce2668607e3baef0"); }
@Test
public void combineDBSNPDuplicateSites() {
@ -138,6 +153,6 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
"-T CombineVariants --no_cmdline_in_header -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132,
1,
Arrays.asList("3d2a5a43db86e3f6217ed2a63251285b"));
executeTest("combineDBSNPDuplicateSites:", spec);
cvExecuteTest("combineDBSNPDuplicateSites:", spec);
}
}

View File

@ -38,7 +38,7 @@ public class LeftAlignVariantsIntegrationTest extends WalkerTest {
@Test
public void testLeftAlignment() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T LeftAlignVariants -o %s -R " + b37KGReference + " --variant:vcf " + testDir + "forLeftAlignVariantsTest.vcf --no_cmdline_in_header",
"-T LeftAlignVariants -o %s -R " + b37KGReference + " --variant:vcf " + privateTestDir + "forLeftAlignVariantsTest.vcf --no_cmdline_in_header",
1,
Arrays.asList("bcf05f56adbb32a47b6d6b27b327d5c2"));
executeTest("test left alignment", spec);

View File

@ -38,27 +38,27 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
@Test
public void testb36Tohg19() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + testDir + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + privateTestDir + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
1,
Arrays.asList("70aeaca5b74cc7ba8e2da7b71ff0fbfd"));
Arrays.asList("a139480c004859452d4095fe4859b42e"));
executeTest("test b36 to hg19", spec);
}
@Test
public void testb36Tohg19UnsortedSamples() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + privateTestDir + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
1,
Arrays.asList("07d1bf52125d1f9a25e260e13ec7b010"));
Arrays.asList("91344768f1e98c979364ec0d5d3aa9d6"));
executeTest("test b36 to hg19, unsorted samples", spec);
}
@Test
public void testhg18Tohg19Unsorted() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T LiftoverVariants -o %s -R " + hg18Reference + " --variant:vcf " + testDir + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
"-T LiftoverVariants -o %s -R " + hg18Reference + " --variant:vcf " + privateTestDir + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
1,
Arrays.asList("ab2c6254225d7e2ecf52eee604d5673b"));
Arrays.asList("e0b813ff873185ab51995a151f80ec98"));
executeTest("test hg18 to hg19, unsorted", spec);
}
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.Test;
import java.util.Arrays;
@ -12,12 +13,14 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
@Test
public void testDiscordanceNoSampleSpecified() {
String testFile = testDir + "NA12878.hg19.example1.vcf";
String testFile = privateTestDir + "NA12878.hg19.example1.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + hg19Reference + " -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s --no_cmdline_in_header",
"-T SelectVariants -R " + hg19Reference + " -L 20:1012700-1020000 --variant "
+ b37hapmapGenotypes + " -disc " + testFile
+ " -o %s --no_cmdline_in_header --allowMissingVCFHeaders --allowMissingVCFHeaders",
1,
Arrays.asList("133fd0ded0bb213097cbe68995afbb7e")
Arrays.asList("d88bdae45ae0e74e8d8fd196627e612c")
);
spec.disableShadowBCF();
@ -26,12 +29,12 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
@Test
public void testRepeatedLineSelection() {
String testfile = testDir + "test.dup.vcf";
String testfile = privateTestDir + "test.dup.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -sn A -sn B -sn C --variant " + testfile),
1,
Arrays.asList("b2ee12588ebda200727762a903b8c972")
Arrays.asList("337bb7fc23153cf67acc42a466834775")
);
executeTest("testRepeatedLineSelection--" + testfile, spec);
@ -39,12 +42,14 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
@Test
public void testDiscordance() {
String testFile = testDir + "NA12878.hg19.example1.vcf";
String testFile = privateTestDir + "NA12878.hg19.example1.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s --no_cmdline_in_header",
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant "
+ b37hapmapGenotypes + " -disc " + testFile
+ " -o %s --no_cmdline_in_header --allowMissingVCFHeaders",
1,
Arrays.asList("f64c90c4cca470f1095d9fa2062eac3e")
Arrays.asList("54289033d35d32b8ebbb38c51fbb614c")
);
spec.disableShadowBCF();
@ -57,9 +62,9 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
1,
Arrays.asList("446eea62630bc5325ffab30b9b9fbfe4")
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
1,
Arrays.asList("ad0514b723ee1479d861291622bd4311")
);
spec.disableShadowBCF();
executeTest("testComplexSelection--" + testfile, spec);
@ -71,9 +76,9 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -xl_sn A -xl_sf " + samplesFile + " --variant " + testfile,
1,
Arrays.asList("b24f31db48d254d8fe15295955173486")
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -xl_sn A -xl_sf " + samplesFile + " --variant " + testfile,
1,
Arrays.asList("bc0e00d0629b2bc6799e7e9db0dc775c")
);
spec.disableShadowBCF();
@ -83,12 +88,14 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
@Test
public void testConcordance() {
String testFile = testDir + "NA12878.hg19.example1.vcf";
String testFile = privateTestDir + "NA12878.hg19.example1.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc " + b37hapmapGenotypes + " --variant " + testFile + " -o %s --no_cmdline_in_header",
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc "
+ b37hapmapGenotypes + " --variant " + testFile
+ " -o %s --no_cmdline_in_header --allowMissingVCFHeaders",
1,
Arrays.asList("9da5dab3d344c1c0a5987b15e60fa082")
Arrays.asList("946e7f2e0ae08dc0e931c1634360fc46")
);
spec.disableShadowBCF();
@ -97,12 +104,12 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
@Test
public void testVariantTypeSelection() {
String testFile = testDir + "complexExample1.vcf";
String testFile = privateTestDir + "complexExample1.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -restrictAllelesTo MULTIALLELIC -selectType MIXED --variant " + testFile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("30b89b3a6706f7f46b23bfb3be69cc8e")
Arrays.asList("a111642779b05de33ad04073d6022c21")
);
executeTest("testVariantTypeSelection--" + testFile, spec);
@ -110,12 +117,12 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
@Test
public void testUsingDbsnpName() {
String testFile = testDir + "combine.3.vcf";
String testFile = privateTestDir + "combine.3.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp " + testFile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("8bf557aaa07eccb294c81f491225bf9e")
Arrays.asList("d12ae1617deb38f5ed712dc326935b9a")
);
executeTest("testUsingDbsnpName--" + testFile, spec);
@ -123,12 +130,12 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
@Test
public void testRegenotype() {
String testFile = testDir + "combine.3.vcf";
String testFile = privateTestDir + "combine.3.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("be38bdc7bd88f5d09cf1a9d55cfecb0b")
Arrays.asList("c22ad8864d9951403672a24c20d6c3c2")
);
executeTest("testRegenotype--" + testFile, spec);
@ -136,12 +143,12 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
@Test
public void testMultipleRecordsAtOnePosition() {
String testFile = testDir + "selectVariants.onePosition.vcf";
String testFile = privateTestDir + "selectVariants.onePosition.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -select 'KG_FREQ < 0.5' --variant " + testFile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("cb9932f9a7aa2e53af605b30d88ad43f")
Arrays.asList("44f7c47395ca5b2afef5313f592c8cea")
);
executeTest("testMultipleRecordsAtOnePosition--" + testFile, spec);
@ -149,12 +156,12 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
@Test
public void testNoGTs() {
String testFile = testDir + "vcf4.1.example.vcf";
String testFile = privateTestDir + "vcf4.1.example.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b37KGReference + " --variant " + testFile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("920605cc2182026e3f54c009f6a04141")
Arrays.asList("a0b7f77edc16df0992d2c1363136a17e")
);
executeTest("testNoGTs--" + testFile, spec);
@ -167,9 +174,9 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec;
spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
1,
Arrays.asList("446eea62630bc5325ffab30b9b9fbfe4")
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
1,
Arrays.asList("ad0514b723ee1479d861291622bd4311")
);
spec.disableShadowBCF();
executeTest("testParallelization (2 threads)--" + testfile, spec);
@ -177,13 +184,13 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
@Test(enabled = false)
public void testParallelization4() {
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec;
spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
1,
Arrays.asList("446eea62630bc5325ffab30b9b9fbfe4")
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec;
spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
1,
Arrays.asList("ad0514b723ee1479d861291622bd4311")
);
spec.disableShadowBCF();
@ -192,13 +199,37 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
@Test
public void testSelectFromMultiAllelic() {
String testfile = testDir + "multi-allelic.bi-allelicInGIH.vcf";
String samplesFile = testDir + "GIH.samples.list";
String testfile = privateTestDir + "multi-allelic.bi-allelicInGIH.vcf";
String samplesFile = privateTestDir + "GIH.samples.list";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b37KGReference + " -o %s --no_cmdline_in_header -sf " + samplesFile + " --excludeNonVariants --variant " + testfile,
1,
Arrays.asList("2f2a342812ba914bcce666e42ef761d7")
Arrays.asList("9acd6effcc78bfb832bed5edfd6a1b5b")
);
executeTest("test select from multi allelic with excludeNonVariants --" + testfile, spec);
}
@Test()
public void testFileWithoutInfoLineInHeader() {
testFileWithoutInfoLineInHeader("testFileWithoutInfoLineInHeader", UserException.class);
}
@Test()
public void testFileWithoutInfoLineInHeaderWithOverride() {
testFileWithoutInfoLineInHeader("testFileWithoutInfoLineInHeaderWithOverride", null);
}
private void testFileWithoutInfoLineInHeader(final String name, final Class expectedException) {
final String testFile = privateTestDir + "missingHeaderLine.vcf";
final String cmd = "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp "
+ testFile + " -o %s --no_cmdline_in_header"
+ (expectedException == null ? " -allowMissingVCFHeaders" : "");
WalkerTestSpec spec =
expectedException != null
? new WalkerTestSpec(cmd, 1, expectedException)
: new WalkerTestSpec(cmd, 1, Arrays.asList(""));
spec.disableShadowBCF();
executeTest(name, spec);
}
}

View File

@ -46,7 +46,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
// Copy VCF data from the test file into the FIFO.
String testFile = testDir + "yri.trio.gatk.ug.head.vcf";
String testFile = privateTestDir + "yri.trio.gatk.ug.head.vcf";
FileInputStream inputStream = new FileInputStream(testFile);
FileOutputStream outputStream = new FileOutputStream(tmpFifo);
outputStream.getChannel().transferFrom(inputStream.getChannel(),0,inputStream.getChannel().size());
@ -60,7 +60,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
" --no_cmdline_in_header " +
" -o %s",
1,
Arrays.asList("b532a20b5af4e8ea7a073888976c71ba")
Arrays.asList("2cdcd9e140eb1b6da7e365e37dd7d859")
);
executeTest("testSimpleVCFStreaming", spec);
@ -74,7 +74,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
File tmpFifo = File.createTempFile("vcfstreaming","");
Runtime.getRuntime().exec(new String[] {"mkfifo",tmpFifo.getAbsolutePath()});
String testFile = testDir + "yri.trio.gatk.ug.head.vcf";
String testFile = privateTestDir + "yri.trio.gatk.ug.head.vcf";
// Output select to FIFO
WalkerTestSpec selectTestSpec = new WalkerTestSpec(

View File

@ -34,7 +34,7 @@ import java.util.Arrays;
public class ValidateVariantsIntegrationTest extends WalkerTest {
public static String baseTestString(String file, String type) {
return "-T ValidateVariants -R " + b36KGReference + " -L 1:10001292-10001303 --variant:vcf " + testDir + file + " --validationType " + type;
return "-T ValidateVariants -R " + b36KGReference + " -L 1:10001292-10001303 --variant:vcf " + privateTestDir + file + " --validationType " + type;
}
@Test

View File

@ -33,7 +33,7 @@ import java.util.*;
public class VariantsToTableIntegrationTest extends WalkerTest {
private String variantsToTableCmd(String moreArgs) {
return "-R " + hg18Reference +
" --variant:vcf " + testDir + "soap_gatk_annotated.vcf" +
" --variant:vcf " + privateTestDir + "soap_gatk_annotated.vcf" +
" -T VariantsToTable" +
" -F CHROM -F POS -F ID -F REF -F ALT -F QUAL -F FILTER -F TRANSITION -F DP -F SB -F set -F RankSumP -F refseq.functionalClass*" +
" -L chr1 -o %s" + moreArgs;
@ -41,7 +41,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
private String variantsToTableMultiAllelicCmd(String moreArgs) {
return "-R " + b37KGReference +
" --variant " + testDir + "multiallelic.vcf" +
" --variant " + privateTestDir + "multiallelic.vcf" +
" -T VariantsToTable" +
" -F CHROM -F POS -F ID -F REF -F ALT -F QUAL -F MULTI-ALLELIC -F AC -F AF" +
" -o %s" + moreArgs;
@ -78,7 +78,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
public void testGenotypeFields() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
" --variant " + testDir + "vcfexample2.vcf" +
" --variant " + privateTestDir + "vcfexample2.vcf" +
" -T VariantsToTable" +
" -GF RD" +
" -o %s",
@ -91,7 +91,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
public void testGenotypeFieldsWithInline() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
" --variant " + testDir + "vcfexample2.vcf" +
" --variant " + privateTestDir + "vcfexample2.vcf" +
" -T VariantsToTable" +
" -GF RD -GF GT -GF GQ" +
" -o %s",
@ -104,7 +104,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
public void testMoltenOutput() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
" --variant " + testDir + "vcfexample2.vcf" +
" --variant " + privateTestDir + "vcfexample2.vcf" +
" -T VariantsToTable" +
" -F CHROM -F POS -F ID -F REF -F ALT -F QUAL -F FILTER" +
" --moltenize" +
@ -118,7 +118,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
public void testMoltenOutputWithGenotypeFields() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
" --variant " + testDir + "vcfexample2.vcf" +
" --variant " + privateTestDir + "vcfexample2.vcf" +
" -T VariantsToTable" +
" -GF RD" +
" --moltenize" +
@ -132,7 +132,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
public void testMoltenOutputWithMultipleAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b37KGReference +
" --variant " + testDir + "multiallelic.vcf" +
" --variant " + privateTestDir + "multiallelic.vcf" +
" -T VariantsToTable" +
" -F CHROM -F POS -F ID -F REF -F ALT -F QUAL -F MULTI-ALLELIC -F AC -F AF" +
" --moltenize -SMA" +

View File

@ -19,7 +19,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testVariantsToVCFUsingDbsnpInput() {
List<String> md5 = new ArrayList<String>();
md5.add("268c116f825c2a4b5200a416ca587adc");
md5.add("72e6ce7aff7dec7ca9e7580be7ddd435");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
@ -36,7 +36,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testVariantsToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>();
md5.add("82ca5ecef2df5d64dee9ef5a4b14ef2f");
md5.add("22373883afa2221b5a4f75a50f30f26b");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
@ -54,7 +54,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testGenotypesToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>();
md5.add("90bc2e21d633fa6c3c47c6bd86c134a0");
md5.add("738eb66dbc400dcd1786cd9e49902e8c");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
@ -72,7 +72,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testGenotypesToVCFUsingHapMapInput() {
List<String> md5 = new ArrayList<String>();
md5.add("bb71dabd072a679cc85fe8d3e130fb2b");
md5.add("67656672acc264156f5a3e01e5cac61a");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
@ -89,11 +89,11 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testGenotypesToVCFUsingVCFInput() {
List<String> md5 = new ArrayList<String>();
md5.add("ae39e2249bc20fcd0a668a7fe5fb84b0");
md5.add("95898aad8c9f9515c0e668e2fb65a024");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
" --variant:VCF " + testDir + "complexExample.vcf4" +
" --variant:VCF " + privateTestDir + "complexExample.vcf4" +
" -T VariantsToVCF" +
" -o %s" +
" --no_cmdline_in_header",

View File

@ -45,7 +45,7 @@ import java.util.List;
public class ReadClipperUnitTest extends BaseTest {
List<Cigar> cigarList;
int maximumCigarSize = 6; // 6 is the minimum necessary number to try all combinations of cigar types with guarantee of clipping an element with length = 2
int maximumCigarSize = 6; // 6 is the minimum necessary number to try all combinations of cigar types with guarantee of clipping an element with length = 2
@BeforeClass
public void init() {
@ -92,22 +92,15 @@ public class ReadClipperUnitTest extends BaseTest {
int start = read.getSoftStart();
int stop = read.getSoftEnd();
// System.out.println(String.format("CIGAR: %s (%d, %d)", cigar.toString(), start, stop));
// if (ReadUtils.readIsEntirelyInsertion(read))
// System.out.println("debug");
for (int i = start; i <= stop; i++) {
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, i);
if (!clipLeft.isEmpty()) {
// System.out.println(String.format("\t left [%d] %s -> %s ", i-start+1, cigar.toString(), clipLeft.getCigarString()));
Assert.assertTrue(clipLeft.getAlignmentStart() >= Math.min(read.getAlignmentEnd(), i + 1), String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
assertUnclippedLimits(read, clipLeft);
}
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, -1);
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
// System.out.println(String.format("\t right [%d] %s -> %s ", i-start+1, cigar.toString(), clipRight.getCigarString()));
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
Assert.assertTrue(clipRight.getAlignmentEnd() <= Math.max(read.getAlignmentStart(), i - 1), String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
assertUnclippedLimits(read, clipRight);
}
@ -121,7 +114,7 @@ public class ReadClipperUnitTest extends BaseTest {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
for (int i = alnStart; i <= alnEnd; i++) {
GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i);
@ -141,7 +134,7 @@ public class ReadClipperUnitTest extends BaseTest {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
for (int i = alnStart; i <= alnEnd; i++) {
GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i);
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
@ -165,7 +158,7 @@ public class ReadClipperUnitTest extends BaseTest {
byte[] quals = new byte[readLength];
for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) {
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the left tail
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the left tail
for (int addLeft = 0; addLeft < nLowQualBases; addLeft++)
quals[addLeft] = LOW_QUAL;
read.setBaseQualities(quals);
@ -252,7 +245,7 @@ public class ReadClipperUnitTest extends BaseTest {
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read);
assertUnclippedLimits(read, unclipped); // Make sure limits haven't changed
assertUnclippedLimits(read, unclipped); // Make sure limits haven't changed
if (leadingSoftClips > 0 || tailSoftClips > 0) {
final int expectedStart = read.getAlignmentStart() - leadingSoftClips;
@ -265,6 +258,25 @@ public class ReadClipperUnitTest extends BaseTest {
}
}
@Test(enabled = true)
public void testRevertSoftClippedBasesWithThreshold() {
for (Cigar cigar : cigarList) {
final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP);
final int tailSoftClips = leadingCigarElementLength(ReadClipperTestUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP);
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read);
assertUnclippedLimits(read, unclipped); // Make sure limits haven't changed
Assert.assertNull(read.getCigar().isValid(null, -1));
Assert.assertNull(unclipped.getCigar().isValid(null, -1));
if (!(leadingSoftClips > 0 || tailSoftClips > 0))
Assert.assertEquals(read.getCigarString(), unclipped.getCigarString());
}
}
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) {
if (!read.isEmpty()) {

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.codecs.hapmap;
import org.broad.tribble.annotation.Strand;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
@ -38,9 +39,9 @@ import java.io.IOException;
/**
* Unit tests for the HapMap codec
*/
public class HapMapUnitTest {
public class HapMapUnitTest extends BaseTest {
// our sample hapmap file
private final static File hapMapFile = new File("public/testdata/genotypes_chr1_ASW_phase3.3_first500.hapmap");
private final static File hapMapFile = new File(privateTestDir + "genotypes_chr1_ASW_phase3.3_first500.hapmap");
private final static String knownLine = "rs2185539 C/T chr1 556738 + ncbi_b36 bbs urn:lsid:bbs.hapmap.org:Protocol:Phase3.r3:1 urn:lsid:bbs.hapmap.org:Assay:Phase3.r3_r" +
"s2185539:1 urn:lsid:dcc.hapmap.org:Panel:US_African-30-trios:4 QC+ CC TC TT CT CC CC CC CC CC CC CC CC CC";
/**

View File

@ -10,6 +10,7 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.writer.Options;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
import org.testng.annotations.BeforeTest;
@ -19,14 +20,15 @@ import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Arrays;
import java.util.EnumSet;
/**
* tests out the various functions in the index factory class
*/
public class IndexFactoryUnitTest extends BaseTest {
File inputFile = new File("public/testdata/HiSeq.10000.vcf");
File outputFile = new File("public/testdata/onTheFlyOutputTest.vcf");
File inputFile = new File(privateTestDir + "HiSeq.10000.vcf");
File outputFile = new File(privateTestDir + "onTheFlyOutputTest.vcf");
File outputFileIndex = Tribble.indexFile(outputFile);
private SAMSequenceDictionary dict;
@ -56,7 +58,8 @@ public class IndexFactoryUnitTest extends BaseTest {
AbstractFeatureReader<VariantContext> source = AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), new VCFCodec(), indexFromInputFile);
int counter = 0;
VariantContextWriter writer = VariantContextWriterFactory.create(outputFile, dict);
final EnumSet<Options> options = EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
VariantContextWriter writer = VariantContextWriterFactory.create(outputFile, dict, options);
writer.writeHeader((VCFHeader)source.getHeader());
CloseableTribbleIterator<VariantContext> it = source.iterator();
while (it.hasNext() && (counter++ < maxRecords || maxRecords == -1) ) {

View File

@ -13,7 +13,7 @@ public class VCFIntegrationTest extends WalkerTest {
public void testReadingAndWritingWitHNoChanges() {
String md5ofInputVCF = "babf02baabcfa7f72a2c6f7da5fdc996";
String testVCF = testDir + "vcf4.1.example.vcf";
String testVCF = privateTestDir + "vcf4.1.example.vcf";
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
@ -26,27 +26,36 @@ public class VCFIntegrationTest extends WalkerTest {
executeTest("Test Variants To VCF from new output", spec2);
}
@Test
@Test(enabled = false)
// See https://getsatisfaction.com/gsa/topics/support_vcf_4_1_structural_variation_breakend_alleles?utm_content=topic_link&utm_medium=email&utm_source=new_topic
public void testReadingAndWritingBreakpointAlleles() {
String testVCF = testDir + "breakpoint-example.vcf";
String testVCF = privateTestDir + "breakpoint-example.vcf";
//String testVCF = validationDataLocation + "multiallelic.vcf";
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("d2604faad0613932453395c54cc68369"));
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("355b029487c3b4c499140d71310ca37e"));
executeTest("Test reading and writing breakpoint VCF", spec1);
}
@Test
public void testReadingAndWritingSamtools() {
String testVCF = testDir + "samtools.vcf";
String testVCF = privateTestDir + "samtools.vcf";
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("0807ff11bebba81b87a273ad6bee01a8"));
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("0f82ac11852e7f958c1a0ce52398c2ae"));
executeTest("Test reading and writing samtools vcf", spec1);
}
@Test
public void testReadingAndWritingSamtoolsWExBCFExample() {
String testVCF = privateTestDir + "ex2.vcf";
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("9773d6a121cfcb18d090965bc520f120"));
executeTest("Test reading and writing samtools WEx vcf/BCF example", spec1);
}
}

View File

@ -0,0 +1,148 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.vcf;
import org.broadinstitute.sting.BaseTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.testng.Assert;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: aaron
* Date: Jun 30, 2010
* Time: 3:32:08 PM
* To change this template use File | Settings | File Templates.
*/
public class VCFStandardHeaderLinesUnitTest extends BaseTest {
@DataProvider(name = "getStandardLines")
public Object[][] makeGetStandardLines() {
List<Object[]> tests = new ArrayList<Object[]>();
// info
tests.add(new Object[]{"AC", "info", true});
tests.add(new Object[]{"AN", "info", true});
tests.add(new Object[]{"AF", "info", true});
tests.add(new Object[]{"DP", "info", true});
tests.add(new Object[]{"DB", "info", true});
tests.add(new Object[]{"END", "info", true});
// format
tests.add(new Object[]{"GT", "format", true});
tests.add(new Object[]{"GQ", "format", true});
tests.add(new Object[]{"DP", "format", true});
tests.add(new Object[]{"AD", "format", true});
tests.add(new Object[]{"PL", "format", true});
tests.add(new Object[]{"NOT_STANDARD", "info", false});
tests.add(new Object[]{"NOT_STANDARD", "format", false});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "getStandardLines")
public void getStandardLines(final String key, final String type, final boolean expectedToBeStandard) {
VCFCompoundHeaderLine line = null;
if ( type.equals("info") )
line = VCFStandardHeaderLines.getInfoLine(key, false);
else if ( type.equals("format") )
line = VCFStandardHeaderLines.getFormatLine(key, false);
else
throw new IllegalArgumentException("Unexpected type in getStandardLines " + type);
if ( expectedToBeStandard ) {
Assert.assertNotNull(line);
Assert.assertEquals(line.getID(), key);
} else
Assert.assertNull(line);
}
private class RepairHeaderTest extends TestDataProvider {
final VCFCompoundHeaderLine original, expectedResult;
private RepairHeaderTest(final VCFCompoundHeaderLine original) {
this(original, original);
}
private RepairHeaderTest(final VCFCompoundHeaderLine original, final VCFCompoundHeaderLine expectedResult) {
super(RepairHeaderTest.class);
this.original = original;
this.expectedResult = expectedResult;
}
}
@DataProvider(name = "RepairHeaderTest")
public Object[][] makeRepairHeaderTest() {
final VCFInfoHeaderLine standardAC = VCFStandardHeaderLines.getInfoLine("AC");
final VCFInfoHeaderLine goodAC = new VCFInfoHeaderLine("AC", VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "x");
final VCFFormatHeaderLine standardGT = VCFStandardHeaderLines.getFormatLine("GT");
final VCFFormatHeaderLine goodGT = new VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.String, "x");
new RepairHeaderTest( standardGT, standardGT);
new RepairHeaderTest( goodGT, goodGT );
new RepairHeaderTest( new VCFFormatHeaderLine("GT", 2, VCFHeaderLineType.String, "x"), standardGT);
new RepairHeaderTest( new VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.Integer, "x"), standardGT);
new RepairHeaderTest( new VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.Float, "x"), standardGT);
new RepairHeaderTest( new VCFFormatHeaderLine("GT", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Float, "x"), standardGT);
new RepairHeaderTest( new VCFFormatHeaderLine("GT", VCFHeaderLineCount.G, VCFHeaderLineType.String, "x"), standardGT);
new RepairHeaderTest( new VCFFormatHeaderLine("GT", VCFHeaderLineCount.A, VCFHeaderLineType.String, "x"), standardGT);
new RepairHeaderTest( standardAC, standardAC);
new RepairHeaderTest( goodAC, goodAC );
new RepairHeaderTest( new VCFInfoHeaderLine("AC", 1, VCFHeaderLineType.Integer, "x"), standardAC);
new RepairHeaderTest( new VCFInfoHeaderLine("AC", VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "x"), standardAC);
new RepairHeaderTest( new VCFInfoHeaderLine("AC", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"), standardAC);
new RepairHeaderTest( new VCFInfoHeaderLine("AC", 1, VCFHeaderLineType.Float, "x"), standardAC);
new RepairHeaderTest( new VCFInfoHeaderLine("AC", 1, VCFHeaderLineType.String, "x"), standardAC);
new RepairHeaderTest( new VCFInfoHeaderLine("AC", 0, VCFHeaderLineType.Flag, "x"), standardAC);
new RepairHeaderTest( new VCFInfoHeaderLine("NON_STANDARD_INFO", 1, VCFHeaderLineType.String, "x"));
new RepairHeaderTest( new VCFFormatHeaderLine("NON_STANDARD_FORMAT", 1, VCFHeaderLineType.String, "x"));
return RepairHeaderTest.getTests(RepairHeaderTest.class);
}
@Test(dataProvider = "RepairHeaderTest")
public void testRepairHeaderTest(RepairHeaderTest cfg) {
final VCFHeader toRepair = new VCFHeader(Collections.singleton((VCFHeaderLine)cfg.original));
final VCFHeader repaired = VCFStandardHeaderLines.repairStandardHeaderLines(toRepair);
VCFCompoundHeaderLine repairedLine = (VCFCompoundHeaderLine)repaired.getFormatHeaderLine(cfg.original.getID());
if ( repairedLine == null ) repairedLine = (VCFCompoundHeaderLine)repaired.getInfoHeaderLine(cfg.original.getID());
Assert.assertNotNull(repairedLine, "Repaired header didn't contain the expected line");
Assert.assertEquals(repairedLine.getID(), cfg.expectedResult.getID());
Assert.assertEquals(repairedLine.getType(), cfg.expectedResult.getType());
Assert.assertEquals(repairedLine.getCountType(), cfg.expectedResult.getCountType());
if ( repairedLine.getCountType() == VCFHeaderLineCount.INTEGER )
Assert.assertEquals(repairedLine.getCount(), cfg.expectedResult.getCount());
}
}

Some files were not shown because too many files have changed in this diff Show More