Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
0650b349d7
2
ivy.xml
2
ivy.xml
|
|
@ -59,7 +59,7 @@
|
|||
|
||||
<!-- Commons Dependencies -->
|
||||
<dependency org="org.apache.commons" name="commons-email" rev="1.2"/>
|
||||
<dependency org="org.apache.commons" name="commons-jexl" rev="2.0"/>
|
||||
<dependency org="org.apache.commons" name="commons-jexl" rev="2.1.1"/>
|
||||
<dependency org="commons-lang" name="commons-lang" rev="2.5"/>
|
||||
<dependency org="commons-logging" name="commons-logging" rev="1.1.1"/>
|
||||
<dependency org="commons-io" name="commons-io" rev="2.1"/>
|
||||
|
|
|
|||
|
|
@ -347,6 +347,9 @@ public class GATKArgumentCollection {
|
|||
public boolean USE_SLOW_GENOTYPES = false;
|
||||
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
|
||||
|
||||
@Argument(fullName="allowMissingVCFHeaders",shortName = "allowMissingVCFHeaders",doc="If provided, the GATK will write out VCF files that contain INFO, FILTER, and FORMAT fields not found in the VCF header",required=false)
|
||||
public boolean allowMissingVCFHeaders = false;
|
||||
|
||||
/**
|
||||
* The file pointed to by this argument must be a VCF file. The GATK will read in just the header of this file
|
||||
* and then use the INFO, FORMAT, and FILTER field values from this file to repair the header file of any other
|
||||
|
|
|
|||
|
|
@ -74,7 +74,8 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
|
|||
else if ( stub.getOutputStream() != null ) {
|
||||
this.file = null;
|
||||
this.stream = stub.getOutputStream();
|
||||
writer = VariantContextWriterFactory.create(stream, stub.getMasterSequenceDictionary(), stub.getWriterOptions(false));
|
||||
writer = VariantContextWriterFactory.create(stream,
|
||||
stub.getMasterSequenceDictionary(), stub.getWriterOptions(false));
|
||||
}
|
||||
else
|
||||
throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
|
||||
|
|
|
|||
|
|
@ -183,6 +183,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
|
|||
List<Options> options = new ArrayList<Options>();
|
||||
|
||||
if ( doNotWriteGenotypes ) options.add(Options.DO_NOT_WRITE_GENOTYPES);
|
||||
if ( engine.getArguments().allowMissingVCFHeaders ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
|
||||
if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY);
|
||||
|
||||
return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options);
|
||||
|
|
|
|||
|
|
@ -55,9 +55,10 @@ import java.util.*;
|
|||
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
|
||||
public static final String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
|
||||
public static final VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
|
||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
|
||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
|
||||
public static final VCFInfoHeaderLine[] descriptions = {
|
||||
VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY),
|
||||
VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY),
|
||||
VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY) };
|
||||
|
||||
private Set<String> founderIds = new HashSet<String>();
|
||||
|
||||
|
|
|
|||
|
|
@ -8,8 +8,8 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
@ -68,5 +68,7 @@ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnno
|
|||
|
||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.DEPTH_KEY); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(VCFStandardHeaderLines.getInfoLine(getKeyNames().get(0)));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,10 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
|
@ -136,11 +133,6 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.GENOTYPE_ALLELE_DEPTHS); }
|
||||
|
||||
public List<VCFFormatHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(
|
||||
new VCFFormatHeaderLine(
|
||||
getKeyNames().get(0),
|
||||
VCFHeaderLineCount.UNBOUNDED,
|
||||
VCFHeaderLineType.Integer,
|
||||
"Allelic depths for the ref and alt alleles in the order listed"));
|
||||
return Arrays.asList(VCFStandardHeaderLines.getFormatLine(getKeyNames().get(0)));
|
||||
}
|
||||
}
|
||||
|
|
@ -7,8 +7,8 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
@ -44,5 +44,7 @@ public class MappingQualityZero extends InfoFieldAnnotation implements StandardA
|
|||
|
||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Total Mapping Quality Zero Reads")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(VCFStandardHeaderLines.getInfoLine(getKeyNames().get(0)));
|
||||
}
|
||||
}
|
||||
|
|
@ -63,7 +63,9 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
|||
|
||||
public List<String> getKeyNames() { return Arrays.asList("QD"); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth"));
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnota
|
|||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -85,5 +85,7 @@ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAn
|
|||
|
||||
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.RMS_MAPPING_QUALITY_KEY); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "RMS Mapping Quality")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(VCFStandardHeaderLines.getInfoLine(getKeyNames().get(0)));
|
||||
}
|
||||
}
|
||||
|
|
@ -68,9 +68,10 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
|
|||
}
|
||||
|
||||
public static final String[] keyNames = {STR_PRESENT, REPEAT_UNIT_KEY,REPEATS_PER_ALLELE_KEY };
|
||||
public static final VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(STR_PRESENT, 1, VCFHeaderLineType.Flag, "Variant is a short tandem repeat"),
|
||||
public static final VCFInfoHeaderLine[] descriptions = {
|
||||
new VCFInfoHeaderLine(STR_PRESENT, 0, VCFHeaderLineType.Flag, "Variant is a short tandem repeat"),
|
||||
new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"),
|
||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };
|
||||
new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };
|
||||
|
||||
public List<String> getKeyNames() {
|
||||
return Arrays.asList(keyNames);
|
||||
|
|
|
|||
|
|
@ -41,8 +41,8 @@ import java.util.*;
|
|||
|
||||
public class VariantAnnotatorEngine {
|
||||
|
||||
private List<InfoFieldAnnotation> requestedInfoAnnotations;
|
||||
private List<GenotypeAnnotation> requestedGenotypeAnnotations;
|
||||
private List<InfoFieldAnnotation> requestedInfoAnnotations = Collections.emptyList();
|
||||
private List<GenotypeAnnotation> requestedGenotypeAnnotations = Collections.emptyList();
|
||||
private List<VAExpression> requestedExpressions = new ArrayList<VAExpression>();
|
||||
|
||||
private final HashMap<RodBinding<VariantContext>, String> dbAnnotations = new HashMap<RodBinding<VariantContext>, String>();
|
||||
|
|
@ -164,8 +164,12 @@ public class VariantAnnotatorEngine {
|
|||
descriptions.addAll(annotation.getDescriptions());
|
||||
for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations )
|
||||
descriptions.addAll(annotation.getDescriptions());
|
||||
for ( String db : dbAnnotations.values() )
|
||||
descriptions.add(new VCFInfoHeaderLine(db, 0, VCFHeaderLineType.Flag, (db.equals(VCFConstants.DBSNP_KEY) ? "dbSNP" : db) + " Membership"));
|
||||
for ( String db : dbAnnotations.values() ) {
|
||||
if ( VCFStandardHeaderLines.getInfoLine(db, false) != null )
|
||||
descriptions.add(VCFStandardHeaderLines.getInfoLine(db));
|
||||
else
|
||||
descriptions.add(new VCFInfoHeaderLine(db, 0, VCFHeaderLineType.Flag, db + " Membership"));
|
||||
}
|
||||
|
||||
return descriptions;
|
||||
}
|
||||
|
|
@ -203,8 +207,9 @@ public class VariantAnnotatorEngine {
|
|||
// go through all the requested info annotationTypes
|
||||
for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
|
||||
Map<String, Object> annotationsFromCurrentType = ((ActiveRegionBasedAnnotation)annotationType).annotate(stratifiedContexts, vc);
|
||||
if ( annotationsFromCurrentType != null )
|
||||
if ( annotationsFromCurrentType != null ) {
|
||||
infoAnnotations.putAll(annotationsFromCurrentType);
|
||||
}
|
||||
}
|
||||
|
||||
// generate a new annotated VC
|
||||
|
|
@ -216,11 +221,11 @@ public class VariantAnnotatorEngine {
|
|||
if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) {
|
||||
final String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), ref.getLocus()), vc.getType());
|
||||
|
||||
// put the DB key into the INFO field
|
||||
infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null);
|
||||
|
||||
// add the ID if appropriate
|
||||
if ( rsID != null ) {
|
||||
// put the DB key into the INFO field
|
||||
infoAnnotations.put(VCFConstants.DBSNP_KEY, true);
|
||||
|
||||
if ( vc.emptyID() ) {
|
||||
vc = new VariantContextBuilder(vc).id(rsID).make();
|
||||
} else if ( walker.alwaysAppendDbsnpId() && vc.getID().indexOf(rsID) == -1 ) {
|
||||
|
|
@ -236,7 +241,8 @@ public class VariantAnnotatorEngine {
|
|||
break;
|
||||
}
|
||||
}
|
||||
infoAnnotations.put(dbSet.getValue(), overlapsComp);
|
||||
if ( overlapsComp )
|
||||
infoAnnotations.put(dbSet.getValue(), overlapsComp);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -128,13 +128,13 @@ class ThresHolder {
|
|||
Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
|
||||
|
||||
// INFO fields for overall data
|
||||
headerLines.add(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
|
||||
headerLines.add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
|
||||
headerLines.add(new VCFInfoHeaderLine("AVG_INTERVAL_DP", 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
|
||||
|
||||
// FORMAT fields for each genotype
|
||||
// todo -- find the appropriate VCF constants
|
||||
headerLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
headerLines.add(new VCFFormatHeaderLine("AVG_INTERVAL_DP", 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution."));
|
||||
headerLines.add(new VCFFormatHeaderLine("MED", 1, VCFHeaderLineType.Float, "Median of depth distribution."));
|
||||
headerLines.add(new VCFFormatHeaderLine("Q3", 1, VCFHeaderLineType.Float, "Upper Quartile of depth Distribution."));
|
||||
|
|
|
|||
|
|
@ -65,6 +65,7 @@ public class VCFDiffableReader implements DiffableReader {
|
|||
br.close();
|
||||
|
||||
// must be read as state is stored in reader itself
|
||||
AbstractVCFCodec.disableOnTheFlyModifications();
|
||||
FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false);
|
||||
VCFHeader header = (VCFHeader)reader.getHeader();
|
||||
for ( VCFHeaderLine headerLine : header.getMetaData() ) {
|
||||
|
|
@ -97,7 +98,9 @@ public class VCFDiffableReader implements DiffableReader {
|
|||
vcRoot.add("REF", vc.getReference());
|
||||
vcRoot.add("ALT", vc.getAlternateAlleles());
|
||||
vcRoot.add("QUAL", vc.hasLog10PError() ? vc.getLog10PError() * -10 : VCFConstants.MISSING_VALUE_v4);
|
||||
vcRoot.add("FILTER", vc.getFilters());
|
||||
vcRoot.add("FILTER", ! vc.filtersWereApplied() // needs null to differentiate between PASS and .
|
||||
? VCFConstants.MISSING_VALUE_v4
|
||||
: ( vc.getFilters().isEmpty() ? VCFConstants.PASSES_FILTERS_v4 : vc.getFilters()) );
|
||||
|
||||
// add info fields
|
||||
for (Map.Entry<String, Object> attribute : vc.getAttributes().entrySet()) {
|
||||
|
|
|
|||
|
|
@ -56,9 +56,10 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
public enum Model {
|
||||
SNP,
|
||||
INDEL,
|
||||
BOTH,
|
||||
POOLSNP,
|
||||
POOLINDEL,
|
||||
BOTH
|
||||
POOLBOTH
|
||||
}
|
||||
|
||||
public enum GENOTYPING_MODE {
|
||||
|
|
|
|||
|
|
@ -241,7 +241,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
||||
// initialize the header
|
||||
Set<VCFHeaderLine> headerInfo = getHeaderInfo();
|
||||
Set<VCFHeaderLine> headerInfo = getHeaderInfo(UAC, annotationEngine, dbsnp);
|
||||
|
||||
// invoke initialize() method on each of the annotation classes, allowing them to add their own header lines
|
||||
// and perform any necessary initialization/validation steps
|
||||
|
|
@ -250,49 +250,45 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
writer.writeHeader(new VCFHeader(headerInfo, samples));
|
||||
}
|
||||
|
||||
private Set<VCFHeaderLine> getHeaderInfo() {
|
||||
public static Set<VCFHeaderLine> getHeaderInfo(final UnifiedArgumentCollection UAC,
|
||||
final VariantAnnotatorEngine annotationEngine,
|
||||
final DbsnpArgumentCollection dbsnp) {
|
||||
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
|
||||
|
||||
// all annotation fields from VariantAnnotatorEngine
|
||||
headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
|
||||
if ( annotationEngine != null )
|
||||
headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
|
||||
|
||||
// annotation (INFO) fields from UnifiedGenotyper
|
||||
if ( !UAC.NO_SLOD )
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias"));
|
||||
VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true, VCFConstants.STRAND_BIAS_KEY);
|
||||
|
||||
if ( UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED )
|
||||
headerInfo.add(new VCFInfoHeaderLine(UnifiedGenotyperEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY, 1, VCFHeaderLineType.Integer, "Number of alternate alleles discovered (but not necessarily genotyped) at this site"));
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.DOWNSAMPLED_KEY, 0, VCFHeaderLineType.Flag, "Were any of the samples downsampled?"));
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.MLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed"));
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed"));
|
||||
|
||||
VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true,
|
||||
VCFConstants.DOWNSAMPLED_KEY,
|
||||
VCFConstants.MLE_ALLELE_COUNT_KEY,
|
||||
VCFConstants.MLE_ALLELE_FREQUENCY_KEY);
|
||||
|
||||
// also, check to see whether comp rods were included
|
||||
if ( dbsnp.dbsnp.isBound() )
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.DBSNP_KEY, 0, VCFHeaderLineType.Flag, "dbSNP Membership"));
|
||||
if ( dbsnp != null && dbsnp.dbsnp.isBound() )
|
||||
VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true, VCFConstants.DBSNP_KEY);
|
||||
|
||||
// FORMAT fields
|
||||
headerInfo.addAll(getSupportedHeaderStrings());
|
||||
VCFStandardHeaderLines.addStandardFormatLines(headerInfo, true,
|
||||
VCFConstants.GENOTYPE_KEY,
|
||||
VCFConstants.GENOTYPE_QUALITY_KEY,
|
||||
VCFConstants.DEPTH_KEY,
|
||||
VCFConstants.GENOTYPE_PL_KEY);
|
||||
|
||||
// FILTER fields
|
||||
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING )
|
||||
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
|
||||
// FILTER fields are added unconditionally as it's not always 100% certain the circumstances
|
||||
// where the filters are used. For example, in emitting all sites the lowQual field is used
|
||||
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
|
||||
|
||||
return headerInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* return a set of supported format lines; what we currently support for output in the genotype fields of a VCF
|
||||
* @return a set of VCF format lines
|
||||
*/
|
||||
private static Set<VCFFormatHeaderLine> getSupportedHeaderStrings() {
|
||||
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "Genotype Quality"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute at a given locus.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -316,7 +316,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
// first, the basic info
|
||||
headerInfo.add(new VCFHeaderLine("source", "SomaticIndelDetector"));
|
||||
headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
|
||||
headerInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));
|
||||
|
||||
// FORMAT and INFO fields
|
||||
// headerInfo.addAll(VCFUtils.getSupportedHeaderStrings());
|
||||
|
|
|
|||
|
|
@ -198,8 +198,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
// Variables
|
||||
private Set<SortableJexlVCMatchExp> jexlExpressions = new TreeSet<SortableJexlVCMatchExp>();
|
||||
|
||||
private Set<String> sampleNamesForEvaluation = new TreeSet<String>();
|
||||
private Set<String> sampleNamesForStratification = new TreeSet<String>();
|
||||
private boolean isSubsettingSamples;
|
||||
private Set<String> sampleNamesForEvaluation = new LinkedHashSet<String>();
|
||||
private Set<String> sampleNamesForStratification = new LinkedHashSet<String>();
|
||||
|
||||
// important stratifications
|
||||
private boolean byFilterIsEnabled = false;
|
||||
|
|
@ -249,8 +250,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), evals);
|
||||
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
|
||||
// Load the sample list
|
||||
sampleNamesForEvaluation.addAll(SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS));
|
||||
// Load the sample list, using an intermediate tree set to sort the samples
|
||||
final Set<String> allSampleNames = SampleUtils.getSamplesFromCommandLineInput(vcfSamples);
|
||||
sampleNamesForEvaluation.addAll(new TreeSet<String>(SampleUtils.getSamplesFromCommandLineInput(vcfSamples, SAMPLE_EXPRESSIONS)));
|
||||
isSubsettingSamples = ! sampleNamesForEvaluation.containsAll(allSampleNames);
|
||||
|
||||
if (Arrays.asList(STRATIFICATIONS_TO_USE).contains("Sample")) {
|
||||
sampleNamesForStratification.addAll(sampleNamesForEvaluation);
|
||||
|
|
@ -571,6 +574,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
|
||||
public List<RodBinding<VariantContext>> getEvals() { return evals; }
|
||||
|
||||
public boolean isSubsettingToSpecificSamples() { return isSubsettingSamples; }
|
||||
public Set<String> getSampleNamesForEvaluation() { return sampleNamesForEvaluation; }
|
||||
|
||||
public Set<String> getSampleNamesForStratification() { return sampleNamesForStratification; }
|
||||
|
|
|
|||
|
|
@ -28,8 +28,6 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.StandardEval;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||
|
|
@ -37,13 +35,13 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.Require
|
|||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.StandardStratification;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.*;
|
||||
|
||||
public class VariantEvalUtils {
|
||||
|
|
@ -199,18 +197,32 @@ public class VariantEvalUtils {
|
|||
* @return a new VariantContext with just the requested samples
|
||||
*/
|
||||
public VariantContext getSubsetOfVariantContext(VariantContext vc, Set<String> sampleNames) {
|
||||
VariantContext vcsub = vc.subContextFromSamples(sampleNames, false);
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vcsub);
|
||||
return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, false));
|
||||
}
|
||||
|
||||
public VariantContext ensureAnnotations(final VariantContext vc, final VariantContext vcsub) {
|
||||
final int originalAlleleCount = vc.getHetCount() + 2 * vc.getHomVarCount();
|
||||
final int newAlleleCount = vcsub.getHetCount() + 2 * vcsub.getHomVarCount();
|
||||
final boolean isSingleton = originalAlleleCount == newAlleleCount && newAlleleCount == 1;
|
||||
final boolean hasChrCountAnnotations = vcsub.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) &&
|
||||
vcsub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) &&
|
||||
vcsub.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY);
|
||||
|
||||
if (originalAlleleCount == newAlleleCount && newAlleleCount == 1) {
|
||||
builder.attribute(VariantEvalWalker.IS_SINGLETON_KEY, true);
|
||||
if ( ! isSingleton && hasChrCountAnnotations ) {
|
||||
// nothing to update
|
||||
return vcsub;
|
||||
} else {
|
||||
// have to do the work
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vcsub);
|
||||
|
||||
if ( isSingleton )
|
||||
builder.attribute(VariantEvalWalker.IS_SINGLETON_KEY, true);
|
||||
|
||||
if ( ! hasChrCountAnnotations )
|
||||
VariantContextUtils.calculateChromosomeCounts(builder, true);
|
||||
|
||||
return builder.make();
|
||||
}
|
||||
|
||||
VariantContextUtils.calculateChromosomeCounts(builder, true);
|
||||
return builder.make();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -250,8 +262,11 @@ public class VariantEvalUtils {
|
|||
// First, filter the VariantContext to represent only the samples for evaluation
|
||||
VariantContext vcsub = vc;
|
||||
|
||||
if (subsetBySample && vc.hasGenotypes() && vc.hasGenotypes(variantEvalWalker.getSampleNamesForEvaluation())) {
|
||||
vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
|
||||
if (subsetBySample && vc.hasGenotypes()) {
|
||||
if ( variantEvalWalker.isSubsettingToSpecificSamples() )
|
||||
vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
|
||||
else
|
||||
vcsub = ensureAnnotations(vc, vc);
|
||||
}
|
||||
|
||||
if ((byFilter || !vcsub.isFiltered())) {
|
||||
|
|
|
|||
|
|
@ -150,8 +150,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
|
|||
// setup the header fields
|
||||
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames));
|
||||
hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "Log odds ratio of being a true variant versus being false under the trained gaussian mixture model"));
|
||||
hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.CULPRIT_KEY, 1, VCFHeaderLineType.String, "The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out"));
|
||||
addVQSRStandardHeaderLines(hInfo);
|
||||
final TreeSet<String> samples = new TreeSet<String>();
|
||||
samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames));
|
||||
|
||||
|
|
@ -173,6 +172,12 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
|
|||
vcfWriter.writeHeader(vcfHeader);
|
||||
}
|
||||
|
||||
public static final void addVQSRStandardHeaderLines(final Set<VCFHeaderLine> hInfo) {
|
||||
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
|
||||
hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "Log odds ratio of being a true variant versus being false under the trained gaussian mixture model"));
|
||||
hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.CULPRIT_KEY, 1, VCFHeaderLineType.String, "The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out"));
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// map
|
||||
|
|
|
|||
|
|
@ -37,7 +37,9 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.R.RScriptExecutor;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.io.Resource;
|
||||
|
|
@ -229,7 +231,10 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
throw new UserException.CommandLineException( "No truth set found! Please provide sets of known polymorphic loci marked with the truth=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
|
||||
}
|
||||
|
||||
recalWriter.writeHeader( new VCFHeader() );
|
||||
|
||||
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
ApplyRecalibration.addVQSRStandardHeaderLines(hInfo);
|
||||
recalWriter.writeHeader( new VCFHeader(hInfo) );
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -129,7 +129,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
|||
.attribute("OriginalStart", fromInterval.getStart()).make();
|
||||
}
|
||||
|
||||
VariantContext newVC = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, false);
|
||||
VariantContext newVC = VariantContextUtils.createVariantContextWithPaddedAlleles(vc);
|
||||
if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
|
||||
logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
|
||||
originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(),
|
||||
|
|
|
|||
|
|
@ -34,9 +34,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.text.ListFileUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
@ -202,6 +200,9 @@ public class SelectHeaders extends RodWalker<Integer, Integer> implements TreeRe
|
|||
// Remove any excluded headers.
|
||||
if (XLheaderNames != null)
|
||||
selectedHeaders = ListFileUtils.excludeMatching(selectedHeaders, headerKey, XLheaderNames, true);
|
||||
|
||||
// always include the contig lines
|
||||
selectedHeaders = VCFUtils.withUpdatedContigsAsLines(selectedHeaders, getToolkit().getArguments().referenceFile, getToolkit().getMasterSequenceDictionary());
|
||||
return selectedHeaders;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
|
|
@ -427,13 +428,12 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
headerLines.add(new VCFHeaderLine("source", "SelectVariants"));
|
||||
|
||||
if (KEEP_ORIGINAL_CHR_COUNTS) {
|
||||
headerLines.add(new VCFFormatHeaderLine("AC_Orig", 1, VCFHeaderLineType.Integer, "Original AC"));
|
||||
headerLines.add(new VCFFormatHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF"));
|
||||
headerLines.add(new VCFFormatHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN"));
|
||||
headerLines.add(new VCFInfoHeaderLine("AC_Orig", 1, VCFHeaderLineType.Integer, "Original AC"));
|
||||
headerLines.add(new VCFInfoHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF"));
|
||||
headerLines.add(new VCFInfoHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN"));
|
||||
}
|
||||
headerLines.addAll(Arrays.asList(ChromosomeCounts.descriptions));
|
||||
headerLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Depth of coverage"));
|
||||
vcfWriter.writeHeader(new VCFHeader(headerLines, samples));
|
||||
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY));
|
||||
|
||||
for (int i = 0; i < SELECT_EXPRESSIONS.size(); i++) {
|
||||
// It's not necessary that the user supply select names for the JEXL expressions, since those
|
||||
|
|
@ -469,6 +469,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
|
||||
UAC.NO_SLOD = true;
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
headerLines.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null));
|
||||
}
|
||||
|
||||
/** load in the IDs file to a hashset for matching */
|
||||
|
|
@ -483,6 +484,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
throw new UserException.CouldNotReadInputFile(rsIDFile, e);
|
||||
}
|
||||
}
|
||||
|
||||
vcfWriter.writeHeader(new VCFHeader(headerLines, samples));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -170,8 +170,8 @@ public class VariantValidationAssessor extends RodWalker<VariantContext,Integer>
|
|||
hInfo.add(new VCFInfoHeaderLine("HetPct", 1, VCFHeaderLineType.Float, "Percent of heterozygous genotypes"));
|
||||
hInfo.add(new VCFInfoHeaderLine("HomVarPct", 1, VCFHeaderLineType.Float, "Percent homozygous variant genotypes"));
|
||||
hInfo.add(new VCFInfoHeaderLine("HW", 1, VCFHeaderLineType.Float, "Phred-scaled Hardy-Weinberg violation p-value"));
|
||||
hInfo.add(new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"));
|
||||
hInfo.add(new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes"));
|
||||
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY));
|
||||
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY));
|
||||
hInfo.add(new VCFFilterHeaderLine("HardyWeinbergViolation", "The validation is in Hardy-Weinberg violation"));
|
||||
hInfo.add(new VCFFilterHeaderLine("HighNoCallRate", "The validation no-call rate is too high"));
|
||||
hInfo.add(new VCFFilterHeaderLine("TooManyHomVars", "The validation homozygous variant rate is too high"));
|
||||
|
|
|
|||
|
|
@ -232,7 +232,8 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
firstEntry = false;
|
||||
else
|
||||
sb.append("\t");
|
||||
sb.append(sample);
|
||||
// spaces in sample names are legal but wreak havoc in R data frames
|
||||
sb.append(sample.replace(" ","_"));
|
||||
sb.append(".");
|
||||
sb.append(gf);
|
||||
}
|
||||
|
|
@ -247,7 +248,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
for ( final String sample : samples ) {
|
||||
for ( final String gf : genotypeFieldsToTake ) {
|
||||
out.println(String.format("%d\t%s\t%s\t%s", nRecords, sample, gf, record.get(index++)));
|
||||
out.println(String.format("%d\t%s\t%s\t%s", nRecords, sample.replace(" ","_"), gf, record.get(index++)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -219,8 +219,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
// setup the header fields
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName())));
|
||||
//hInfo.add(new VCFHeaderLine("source", "VariantsToVCF"));
|
||||
//hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getID()));
|
||||
hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));
|
||||
|
||||
allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY);
|
||||
for ( VCFHeaderLine field : hInfo ) {
|
||||
|
|
|
|||
|
|
@ -150,8 +150,7 @@ public class SampleUtils {
|
|||
// iterate to get all of the sample names
|
||||
|
||||
for ( Map.Entry<String, VCFHeader> pair : VCFUtils.getVCFHeadersFromRods(toolkit).entrySet() ) {
|
||||
Set<String> vcfSamples = pair.getValue().getGenotypeSamples();
|
||||
for ( String sample : vcfSamples )
|
||||
for ( String sample : pair.getValue().getGenotypeSamples() )
|
||||
addUniqueSample(samples, sampleOverlapMap, rodNamesToSampleNames, sample, pair.getKey());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -422,13 +422,24 @@ public class ReadClipper {
|
|||
/**
|
||||
* Reverts only soft clipped bases with quality score greater than or equal to minQual
|
||||
*
|
||||
* Note: Will write a temporary field with the number of soft clips that were undone on each side (left: 'SL', right: 'SR')
|
||||
* todo -- Note: Will write a temporary field with the number of soft clips that were undone on each side (left: 'SL', right: 'SR') -- THIS HAS BEEN REMOVED TEMPORARILY SHOULD HAPPEN INSIDE THE CLIPPING ROUTINE!
|
||||
*
|
||||
* @param read the read
|
||||
* @param minQual the mininum base quality score to revert the base (inclusive)
|
||||
* @return the read with high quality soft clips reverted
|
||||
* @return a new read with high quality soft clips reverted
|
||||
*/
|
||||
public static GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read, byte minQual) {
|
||||
return revertSoftClippedBases(hardClipLowQualitySoftClips(read, minQual));
|
||||
}
|
||||
|
||||
/**
|
||||
* Hard clips away soft clipped bases that are below the given quality threshold
|
||||
*
|
||||
* @param read the read
|
||||
* @param minQual the mininum base quality score to revert the base (inclusive)
|
||||
* @return a new read without low quality soft clipped bases
|
||||
*/
|
||||
public static GATKSAMRecord hardClipLowQualitySoftClips(GATKSAMRecord read, byte minQual) {
|
||||
int nLeadingSoftClips = read.getAlignmentStart() - read.getSoftStart();
|
||||
if (read.isEmpty() || nLeadingSoftClips > read.getReadLength())
|
||||
return GATKSAMRecord.emptyRead(read);
|
||||
|
|
@ -457,17 +468,12 @@ public class ReadClipper {
|
|||
}
|
||||
|
||||
GATKSAMRecord clippedRead = read;
|
||||
if (right >= 0) {
|
||||
if (right + 1 < clippedRead.getReadLength())
|
||||
clippedRead = hardClipByReadCoordinates(clippedRead, right+1, clippedRead.getReadLength()-1); // first we hard clip the low quality soft clips on the left tail
|
||||
clippedRead.setTemporaryAttribute("SR", nTailingSoftClips - (read.getReadLength() - right - 1)); // keep track of how may bases to 're-softclip' after processing
|
||||
}
|
||||
if (left >= 0) {
|
||||
if (left - 1 > 0)
|
||||
clippedRead = hardClipByReadCoordinates(clippedRead, 0, left-1); // then we hard clip the low quality soft clips on the right tail
|
||||
clippedRead.setTemporaryAttribute("SL", nLeadingSoftClips - left); // keep track of how may bases to 're-softclip' after processing
|
||||
}
|
||||
return revertSoftClippedBases(clippedRead); // now that we have only good bases in the soft clips, we can revert them all
|
||||
if (right >= 0 && right + 1 < clippedRead.getReadLength()) // only clip if there are softclipped bases (right >= 0) and the first high quality soft clip is not the last base (right+1 < readlength)
|
||||
clippedRead = hardClipByReadCoordinates(clippedRead, right+1, clippedRead.getReadLength()-1); // first we hard clip the low quality soft clips on the right tail
|
||||
if (left >= 0 && left - 1 > 0) // only clip if there are softclipped bases (left >= 0) and the first high quality soft clip is not the last base (left-1 > 0)
|
||||
clippedRead = hardClipByReadCoordinates(clippedRead, 0, left-1); // then we hard clip the low quality soft clips on the left tail
|
||||
|
||||
return clippedRead;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ import org.broad.tribble.readers.PositionalBufferedStream;
|
|||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
|
|
@ -79,6 +80,14 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
|||
*/
|
||||
private BCF2GenotypeFieldDecoders gtFieldDecoders = null;
|
||||
|
||||
/**
|
||||
* A cached array of GenotypeBuilders for efficient genotype decoding.
|
||||
*
|
||||
* Caching it allows us to avoid recreating this intermediate data
|
||||
* structure each time we decode genotypes
|
||||
*/
|
||||
private GenotypeBuilder[] builders = null;
|
||||
|
||||
// for error handling
|
||||
private int recordNo = 0;
|
||||
private int pos = 0;
|
||||
|
|
@ -168,6 +177,13 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
|||
// prepare the genotype field decoders
|
||||
gtFieldDecoders = new BCF2GenotypeFieldDecoders(header);
|
||||
|
||||
// create and initialize the genotype builder array
|
||||
final int nSamples = header.getNGenotypeSamples();
|
||||
builders = new GenotypeBuilder[nSamples];
|
||||
for ( int i = 0; i < nSamples; i++ ) {
|
||||
builders[i] = new GenotypeBuilder(header.getGenotypeSamples().get(i));
|
||||
}
|
||||
|
||||
// position right before next line (would be right before first real record byte at end of header)
|
||||
return new FeatureCodecHeader(header, inputStream.getPosition());
|
||||
}
|
||||
|
|
@ -256,6 +272,11 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
|||
final int nFormatFields = nFormatSamples >> 24;
|
||||
final int nSamples = nFormatSamples & 0x00FFFFF;
|
||||
|
||||
if ( header.getNGenotypeSamples() != nSamples )
|
||||
throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " +
|
||||
"different numbers of samples per record. Saw " + header.getNGenotypeSamples() +
|
||||
" samples in header but have a record with " + nSamples + " samples");
|
||||
|
||||
decodeID(builder);
|
||||
final ArrayList<Allele> alleles = decodeAlleles(builder, pos, nAlleles);
|
||||
decodeFilter(builder);
|
||||
|
|
@ -314,7 +335,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
|||
*/
|
||||
protected static ArrayList<Allele> clipAllelesIfNecessary(int position, String ref, ArrayList<Allele> unclippedAlleles) {
|
||||
if ( ! AbstractVCFCodec.isSingleNucleotideEvent(unclippedAlleles) ) {
|
||||
ArrayList<Allele> clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
|
||||
final ArrayList<Allele> clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
|
||||
AbstractVCFCodec.clipAlleles(position, ref, unclippedAlleles, clippedAlleles, -1);
|
||||
return clippedAlleles;
|
||||
} else
|
||||
|
|
@ -335,14 +356,16 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
|||
String ref = null;
|
||||
|
||||
for ( int i = 0; i < nAlleles; i++ ) {
|
||||
final String allele = (String)decoder.decodeTypedValue();
|
||||
final String alleleBases = (String)decoder.decodeTypedValue();
|
||||
|
||||
if ( i == 0 ) {
|
||||
ref = allele;
|
||||
alleles.add(Allele.create(allele, true));
|
||||
} else {
|
||||
alleles.add(Allele.create(allele, false));
|
||||
}
|
||||
final boolean isRef = i == 0;
|
||||
final Allele allele = Allele.create(alleleBases, isRef);
|
||||
if ( isRef ) ref = alleleBases;
|
||||
|
||||
alleles.add(allele);
|
||||
|
||||
if ( allele.isSymbolic() )
|
||||
throw new ReviewedStingException("LIMITATION: GATK BCF2 codec does not yet support symbolic alleles");
|
||||
}
|
||||
assert ref != null;
|
||||
|
||||
|
|
@ -416,11 +439,11 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
|||
final VariantContextBuilder builder ) {
|
||||
if (siteInfo.nSamples > 0) {
|
||||
final LazyGenotypesContext.LazyParser lazyParser =
|
||||
new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields);
|
||||
final int nGenotypes = header.getGenotypeSamples().size();
|
||||
new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields, builders);
|
||||
|
||||
LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser,
|
||||
new LazyData(siteInfo.nFormatFields, decoder.getRecordBytes()),
|
||||
nGenotypes);
|
||||
header.getNGenotypeSamples());
|
||||
|
||||
// did we resort the sample names? If so, we need to load the genotype data
|
||||
if ( !header.samplesWereAlreadySorted() )
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ public class BCF2GenotypeFieldDecoders {
|
|||
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_FILTER_KEY, new FTDecoder());
|
||||
genotypeFieldDecoder.put(VCFConstants.DEPTH_KEY, new DPDecoder());
|
||||
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, new ADDecoder());
|
||||
genotypeFieldDecoder.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, new PLDecoder());
|
||||
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_PL_KEY, new PLDecoder());
|
||||
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_QUALITY_KEY, new GQDecoder());
|
||||
}
|
||||
|
||||
|
|
@ -99,21 +99,21 @@ public class BCF2GenotypeFieldDecoders {
|
|||
*/
|
||||
public interface Decoder {
|
||||
@Requires({"siteAlleles != null", "! siteAlleles.isEmpty()",
|
||||
"field != null", "decoder != null", "gbs != null", "! gbs.isEmpty()"})
|
||||
"field != null", "decoder != null", "gbs != null", "gbs.length != 0"})
|
||||
public void decode(final List<Allele> siteAlleles,
|
||||
final String field,
|
||||
final BCF2Decoder decoder,
|
||||
final byte typeDescriptor,
|
||||
final List<GenotypeBuilder> gbs);
|
||||
final GenotypeBuilder[] gbs);
|
||||
}
|
||||
|
||||
private class GTDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
// we have to do a bit of low-level processing here as we want to know the size upfronta
|
||||
final int ploidy = decoder.decodeNumberOfElements(typeDescriptor);
|
||||
|
||||
if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && ploidy == 2 && gbs.size() >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
|
||||
if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && ploidy == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
|
||||
fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs);
|
||||
else {
|
||||
generalDecode(siteAlleles, ploidy, decoder, typeDescriptor, gbs);
|
||||
|
|
@ -137,7 +137,7 @@ public class BCF2GenotypeFieldDecoders {
|
|||
private final void fastBiallelicDiploidDecode(final List<Allele> siteAlleles,
|
||||
final BCF2Decoder decoder,
|
||||
final byte typeDescriptor,
|
||||
final List<GenotypeBuilder> gbs) {
|
||||
final GenotypeBuilder[] gbs) {
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
|
||||
final int nPossibleGenotypes = 3 * 3;
|
||||
|
|
@ -169,6 +169,9 @@ public class BCF2GenotypeFieldDecoders {
|
|||
|
||||
gb.alleles(gt);
|
||||
}
|
||||
|
||||
final boolean phased = (a1 & 0x01) == 1;
|
||||
gb.phased(phased);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -176,7 +179,7 @@ public class BCF2GenotypeFieldDecoders {
|
|||
final int ploidy,
|
||||
final BCF2Decoder decoder,
|
||||
final byte typeDescriptor,
|
||||
final List<GenotypeBuilder> gbs) {
|
||||
final GenotypeBuilder[] gbs) {
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
|
||||
// a single cache for the encoded genotypes, since we don't actually need this vector
|
||||
|
|
@ -199,6 +202,8 @@ public class BCF2GenotypeFieldDecoders {
|
|||
gt.add(getAlleleFromEncoded(siteAlleles, encode));
|
||||
|
||||
gb.alleles(gt);
|
||||
final boolean phased = (encoded[0] & 0x01) == 1;
|
||||
gb.phased(phased);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -213,7 +218,7 @@ public class BCF2GenotypeFieldDecoders {
|
|||
|
||||
private class DPDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
// the -1 is for missing
|
||||
gb.DP(decoder.decodeInt(typeDescriptor, -1));
|
||||
|
|
@ -223,7 +228,7 @@ public class BCF2GenotypeFieldDecoders {
|
|||
|
||||
private class GQDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
// the -1 is for missing
|
||||
gb.GQ(decoder.decodeInt(typeDescriptor, -1));
|
||||
|
|
@ -233,7 +238,7 @@ public class BCF2GenotypeFieldDecoders {
|
|||
|
||||
private class ADDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
gb.AD(decoder.decodeIntArray(typeDescriptor));
|
||||
}
|
||||
|
|
@ -242,7 +247,7 @@ public class BCF2GenotypeFieldDecoders {
|
|||
|
||||
private class PLDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
gb.PL(decoder.decodeIntArray(typeDescriptor));
|
||||
}
|
||||
|
|
@ -251,7 +256,7 @@ public class BCF2GenotypeFieldDecoders {
|
|||
|
||||
private class GenericDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
Object value = decoder.decodeTypedValue(typeDescriptor);
|
||||
if ( value != null ) { // don't add missing values
|
||||
|
|
@ -270,7 +275,7 @@ public class BCF2GenotypeFieldDecoders {
|
|||
|
||||
private class FTDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
Object value = decoder.decodeTypedValue(typeDescriptor);
|
||||
if ( value != null ) { // don't add missing values
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -46,12 +47,16 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
|
|||
private final ArrayList<Allele> siteAlleles;
|
||||
private final int nSamples;
|
||||
private final int nFields;
|
||||
private final GenotypeBuilder[] builders;
|
||||
|
||||
BCF2LazyGenotypesDecoder(final BCF2Codec codec, final ArrayList<Allele> alleles, final int nSamples, final int nFields) {
|
||||
@Requires("codec.getHeader().getNGenotypeSamples() == builders.length")
|
||||
BCF2LazyGenotypesDecoder(final BCF2Codec codec, final ArrayList<Allele> alleles, final int nSamples,
|
||||
final int nFields, final GenotypeBuilder[] builders) {
|
||||
this.codec = codec;
|
||||
this.siteAlleles = alleles;
|
||||
this.nSamples = nSamples;
|
||||
this.nFields = nFields;
|
||||
this.builders = builders;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -62,21 +67,8 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
|
|||
// load our byte[] data into the decoder
|
||||
final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes);
|
||||
|
||||
// TODO -- fast path for sites only
|
||||
|
||||
// go ahead and decode everyone
|
||||
final List<String> samples = new ArrayList<String>(codec.getHeader().getGenotypeSamples());
|
||||
|
||||
if ( samples.size() != nSamples )
|
||||
throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " +
|
||||
"different numbers of samples per record. Saw " + samples.size() +
|
||||
" samples in header but have a record with " + nSamples + " samples");
|
||||
|
||||
// create and initialize the genotypes array
|
||||
final ArrayList<GenotypeBuilder> builders = new ArrayList<GenotypeBuilder>(nSamples);
|
||||
for ( int i = 0; i < nSamples; i++ ) {
|
||||
builders.add(new GenotypeBuilder(samples.get(i)));
|
||||
}
|
||||
for ( int i = 0; i < nSamples; i++ )
|
||||
builders[i].reset(true);
|
||||
|
||||
for ( int i = 0; i < nFields; i++ ) {
|
||||
// get the field name
|
||||
|
|
|
|||
|
|
@ -82,18 +82,27 @@ public final class BCF2Utils {
|
|||
@Requires("header != null")
|
||||
@Ensures({"result != null", "new HashSet(result).size() == result.size()"})
|
||||
public final static ArrayList<String> makeDictionary(final VCFHeader header) {
|
||||
final Set<String> dict = new TreeSet<String>();
|
||||
final Set<String> seen = new HashSet<String>();
|
||||
final ArrayList<String> dict = new ArrayList<String>();
|
||||
|
||||
boolean sawPASS = false;
|
||||
// set up the strings dictionary
|
||||
dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
|
||||
for ( VCFHeaderLine line : header.getMetaData() ) {
|
||||
if ( line instanceof VCFIDHeaderLine) {
|
||||
VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
|
||||
dict.add(idLine.getID());
|
||||
final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
|
||||
if ( ! seen.contains(idLine.getID())) {
|
||||
sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4);
|
||||
dict.add(idLine.getID());
|
||||
seen.add(idLine.getID());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new ArrayList<String>(dict);
|
||||
|
||||
if ( ! sawPASS )
|
||||
dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
@Requires({"nElements >= 0", "type != null"})
|
||||
|
|
@ -142,25 +151,6 @@ public final class BCF2Utils {
|
|||
}
|
||||
}
|
||||
|
||||
@Requires({"stream != null", "bytesForEachInt > 0"})
|
||||
public final static int readInt(int bytesForEachInt, final InputStream stream) {
|
||||
switch ( bytesForEachInt ) {
|
||||
case 1: {
|
||||
return (byte)(readByte(stream));
|
||||
} case 2: {
|
||||
final int b1 = readByte(stream) & 0xFF;
|
||||
final int b2 = readByte(stream) & 0xFF;
|
||||
return (short)((b1 << 8) | b2);
|
||||
} case 4: {
|
||||
final int b1 = readByte(stream) & 0xFF;
|
||||
final int b2 = readByte(stream) & 0xFF;
|
||||
final int b3 = readByte(stream) & 0xFF;
|
||||
final int b4 = readByte(stream) & 0xFF;
|
||||
return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4);
|
||||
} default: throw new ReviewedStingException("Unexpected size during decoding");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Collapse multiple strings into a comma separated list
|
||||
*
|
||||
|
|
@ -174,8 +164,10 @@ public final class BCF2Utils {
|
|||
public static final String collapseStringList(final List<String> strings) {
|
||||
final StringBuilder b = new StringBuilder();
|
||||
for ( final String s : strings ) {
|
||||
assert s.indexOf(",") == -1; // no commas in individual strings
|
||||
b.append(",").append(s);
|
||||
if ( s != null ) {
|
||||
assert s.indexOf(",") == -1; // no commas in individual strings
|
||||
b.append(",").append(s);
|
||||
}
|
||||
}
|
||||
return b.toString();
|
||||
}
|
||||
|
|
@ -299,20 +291,40 @@ public final class BCF2Utils {
|
|||
else return Collections.singletonList(o);
|
||||
}
|
||||
|
||||
|
||||
@Requires({"stream != null", "bytesForEachInt > 0"})
|
||||
public final static int readInt(int bytesForEachInt, final InputStream stream) {
|
||||
switch ( bytesForEachInt ) {
|
||||
case 1: {
|
||||
return (byte)(readByte(stream));
|
||||
} case 2: {
|
||||
final int b2 = readByte(stream) & 0xFF;
|
||||
final int b1 = readByte(stream) & 0xFF;
|
||||
return (short)((b1 << 8) | b2);
|
||||
} case 4: {
|
||||
final int b4 = readByte(stream) & 0xFF;
|
||||
final int b3 = readByte(stream) & 0xFF;
|
||||
final int b2 = readByte(stream) & 0xFF;
|
||||
final int b1 = readByte(stream) & 0xFF;
|
||||
return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4);
|
||||
} default: throw new ReviewedStingException("Unexpected size during decoding");
|
||||
}
|
||||
}
|
||||
|
||||
public final static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
|
||||
switch ( type.getSizeInBytes() ) {
|
||||
case 1:
|
||||
encodeStream.write(0xFF & value);
|
||||
break;
|
||||
case 2:
|
||||
encodeStream.write((0x00FF & value));
|
||||
encodeStream.write((0xFF00 & value) >> 8);
|
||||
encodeStream.write(0xFF & value);
|
||||
break;
|
||||
case 4:
|
||||
encodeStream.write((0xFF000000 & value) >> 24);
|
||||
encodeStream.write((0x00FF0000 & value) >> 16);
|
||||
encodeStream.write((0x0000FF00 & value) >> 8);
|
||||
encodeStream.write((0x000000FF & value));
|
||||
encodeStream.write((0x0000FF00 & value) >> 8);
|
||||
encodeStream.write((0x00FF0000 & value) >> 16);
|
||||
encodeStream.write((0xFF000000 & value) >> 24);
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("BUG: unexpected type size " + type);
|
||||
|
|
|
|||
|
|
@ -22,8 +22,9 @@ import java.util.zip.GZIPInputStream;
|
|||
|
||||
public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext> implements NameAwareCodec {
|
||||
public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20);
|
||||
protected static boolean doOnTheFlyModifications = true;
|
||||
|
||||
protected final static Logger log = Logger.getLogger(VCFCodec.class);
|
||||
protected final static Logger log = Logger.getLogger(AbstractVCFCodec.class);
|
||||
protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column
|
||||
|
||||
// we have to store the list of strings that make up the header until they're needed
|
||||
|
|
@ -58,6 +59,8 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
|
||||
protected Map<String, String> stringCache = new HashMap<String, String>();
|
||||
|
||||
protected boolean warnedAboutNoEqualsForNonFlag = false;
|
||||
|
||||
protected AbstractVCFCodec() {
|
||||
super(VariantContext.class);
|
||||
}
|
||||
|
|
@ -168,6 +171,8 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
}
|
||||
|
||||
this.header = new VCFHeader(metaData, sampleNames);
|
||||
if ( doOnTheFlyModifications )
|
||||
this.header = VCFStandardHeaderLines.repairStandardHeaderLines(this.header);
|
||||
return this.header;
|
||||
}
|
||||
|
||||
|
|
@ -344,7 +349,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
// do we have genotyping data
|
||||
if (parts.length > NUM_STANDARD_FIELDS) {
|
||||
final LazyGenotypesContext.LazyParser lazyParser = new LazyVCFGenotypesParser(alleles, chr, pos);
|
||||
final int nGenotypes = header.getGenotypeSamples().size();
|
||||
final int nGenotypes = header.getNGenotypeSamples();
|
||||
LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser, parts[8], nGenotypes);
|
||||
|
||||
// did we resort the sample names? If so, we need to load the genotype data
|
||||
|
|
@ -425,6 +430,11 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
int infoValueSplitSize = ParsingUtils.split(str, infoValueArray, VCFConstants.INFO_FIELD_ARRAY_SEPARATOR_CHAR, false);
|
||||
if ( infoValueSplitSize == 1 ) {
|
||||
value = infoValueArray[0];
|
||||
final VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(key);
|
||||
if ( headerLine != null && headerLine.getType() == VCFHeaderLineType.Flag && value.equals("0") ) {
|
||||
// deal with the case where a flag field has =0, such as DB=0, by skipping the add
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
ArrayList<String> valueList = new ArrayList<String>(infoValueSplitSize);
|
||||
for ( int j = 0; j < infoValueSplitSize; j++ )
|
||||
|
|
@ -433,7 +443,18 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
}
|
||||
} else {
|
||||
key = infoFieldArray[i];
|
||||
value = true;
|
||||
final VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(key);
|
||||
if ( headerLine != null && headerLine.getType() != VCFHeaderLineType.Flag ) {
|
||||
if ( ! warnedAboutNoEqualsForNonFlag ) {
|
||||
log.warn("Found info key " + key + " without a = value, but the header says the field is of type "
|
||||
+ headerLine.getType() + " but this construct is only value for FLAG type fields");
|
||||
warnedAboutNoEqualsForNonFlag = true;
|
||||
}
|
||||
|
||||
value = VCFConstants.MISSING_VALUE_v4;
|
||||
} else {
|
||||
value = true;
|
||||
}
|
||||
}
|
||||
|
||||
attributes.put(key, value);
|
||||
|
|
@ -780,7 +801,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
gb.GQ((int)Math.round(Double.valueOf(GTValueArray[i])));
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
|
||||
gb.AD(decodeInts(GTValueArray[i]));
|
||||
} else if (gtKey.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) {
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_PL_KEY)) {
|
||||
gb.PL(decodeInts(GTValueArray[i]));
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_LIKELIHOODS_KEY)) {
|
||||
gb.PL(GenotypeLikelihoods.fromGLField(GTValueArray[i]).getAsPLs());
|
||||
|
|
@ -823,4 +844,13 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
values[i] = Integer.valueOf(INT_DECODE_ARRAY[i]);
|
||||
return values;
|
||||
}
|
||||
|
||||
/**
|
||||
* Forces all VCFCodecs to not perform any on the fly modifications to the VCF header
|
||||
* of VCF records. Useful primarily for raw comparisons such as when comparing
|
||||
* raw VCF records
|
||||
*/
|
||||
public static final void disableOnTheFlyModifications() {
|
||||
doOnTheFlyModifications = false;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -114,7 +114,8 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
* @param headerForRepairs
|
||||
*/
|
||||
public void setHeaderForRepairs(final VCFHeader headerForRepairs) {
|
||||
log.info("Using master VCF header to repair missing files from incoming VCFs");
|
||||
if ( headerForRepairs != null )
|
||||
log.info("Using master VCF header to repair missing files from incoming VCFs");
|
||||
this.headerForRepairs = headerForRepairs;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -24,8 +24,10 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashMap;
|
||||
|
|
@ -35,6 +37,8 @@ import java.util.Map;
|
|||
* a base class for compound header lines, which include info lines and format lines (so far)
|
||||
*/
|
||||
public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCFIDHeaderLine {
|
||||
final protected static Logger logger = Logger.getLogger(VCFHeader.class);
|
||||
|
||||
public enum SupportedHeaderLineType {
|
||||
INFO(true), FORMAT(false);
|
||||
|
||||
|
|
@ -151,6 +155,10 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
count = Integer.valueOf(numberStr);
|
||||
|
||||
}
|
||||
|
||||
if ( count < 0 && countType == VCFHeaderLineCount.INTEGER )
|
||||
throw new UserException.MalformedVCFHeader("Count < 0 for fixed size VCF header field " + name);
|
||||
|
||||
try {
|
||||
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
|
||||
} catch (Exception e) {
|
||||
|
|
@ -172,6 +180,11 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
if ( name == null || type == null || description == null || lineType == null )
|
||||
throw new IllegalArgumentException(String.format("Invalid VCFCompoundHeaderLine: key=%s name=%s type=%s desc=%s lineType=%s",
|
||||
super.getKey(), name, type, description, lineType ));
|
||||
|
||||
if ( type == VCFHeaderLineType.Flag && count != 0 ) {
|
||||
count = 0;
|
||||
logger.warn("FLAG fields must have a count value of 0, but saw " + count + " for header line " + getID() + ". Changing it to 0 inside the code");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -44,20 +44,21 @@ public final class VCFConstants {
|
|||
public static final String DOWNSAMPLED_KEY = "DS";
|
||||
public static final String EXPECTED_ALLELE_COUNT_KEY = "EC";
|
||||
public static final String END_KEY = "END";
|
||||
|
||||
public static final String GENOTYPE_FILTER_KEY = "FT";
|
||||
public static final String GENOTYPE_KEY = "GT";
|
||||
@Deprecated
|
||||
public static final String GENOTYPE_LIKELIHOODS_KEY = "GL"; // log10 scaled genotype likelihoods
|
||||
public static final String GENOTYPE_POSTERIORS_KEY = "GP";
|
||||
public static final String GENOTYPE_QUALITY_KEY = "GQ";
|
||||
public static final String GENOTYPE_ALLELE_DEPTHS = "AD";
|
||||
public static final String GENOTYPE_PL_KEY = "PL"; // phred-scaled genotype likelihoods
|
||||
@Deprecated public static final String GENOTYPE_LIKELIHOODS_KEY = "GL"; // log10 scaled genotype likelihoods
|
||||
|
||||
public static final String HAPMAP2_KEY = "H2";
|
||||
public static final String HAPMAP3_KEY = "H3";
|
||||
public static final String HAPLOTYPE_QUALITY_KEY = "HQ";
|
||||
public static final String RMS_MAPPING_QUALITY_KEY = "MQ";
|
||||
public static final String MAPPING_QUALITY_ZERO_KEY = "MQ0";
|
||||
public static final String SAMPLE_NUMBER_KEY = "NS";
|
||||
public static final String PHRED_GENOTYPE_LIKELIHOODS_KEY = "PL"; // phred-scaled genotype likelihoods
|
||||
public static final String PHASE_QUALITY_KEY = "PQ";
|
||||
public static final String PHASE_SET_KEY = "PS";
|
||||
public static final String OLD_DEPTH_KEY = "RD";
|
||||
|
|
@ -88,7 +89,8 @@ public final class VCFConstants {
|
|||
public static final String FORMAT_HEADER_START = "##FORMAT";
|
||||
public static final String INFO_HEADER_START = "##INFO";
|
||||
public static final String ALT_HEADER_START = "##ALT";
|
||||
public static final String CONTIG_HEADER_START = "##contig";
|
||||
public static final String CONTIG_HEADER_KEY = "contig";
|
||||
public static final String CONTIG_HEADER_START = "##" + CONTIG_HEADER_KEY;
|
||||
|
||||
// old indel alleles
|
||||
public static final char DELETION_ALLELE_v3 = 'D';
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.codecs.vcf;
|
|||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -54,11 +55,12 @@ public class VCFHeader {
|
|||
private final Set<VCFHeaderLine> mMetaData = new TreeSet<VCFHeaderLine>();
|
||||
private final Map<String, VCFInfoHeaderLine> mInfoMetaData = new HashMap<String, VCFInfoHeaderLine>();
|
||||
private final Map<String, VCFFormatHeaderLine> mFormatMetaData = new HashMap<String, VCFFormatHeaderLine>();
|
||||
private final Map<String, VCFFilterHeaderLine> mFilterMetaData = new HashMap<String, VCFFilterHeaderLine>();
|
||||
private final Map<String, VCFHeaderLine> mOtherMetaData = new HashMap<String, VCFHeaderLine>();
|
||||
private final List<VCFContigHeaderLine> contigMetaData = new ArrayList<VCFContigHeaderLine>();
|
||||
|
||||
// the list of auxillary tags
|
||||
private final Set<String> mGenotypeSampleNames = new LinkedHashSet<String>();
|
||||
private final List<String> mGenotypeSampleNames = new ArrayList<String>();
|
||||
|
||||
// the character string that indicates meta data
|
||||
public static final String METADATA_INDICATOR = "##";
|
||||
|
|
@ -106,7 +108,15 @@ public class VCFHeader {
|
|||
* @param genotypeSampleNames the sample names
|
||||
*/
|
||||
public VCFHeader(Set<VCFHeaderLine> metaData, Set<String> genotypeSampleNames) {
|
||||
this(metaData, new ArrayList<String>(genotypeSampleNames));
|
||||
}
|
||||
|
||||
public VCFHeader(Set<VCFHeaderLine> metaData, List<String> genotypeSampleNames) {
|
||||
this(metaData);
|
||||
|
||||
if ( genotypeSampleNames.size() != new HashSet<String>(genotypeSampleNames).size() )
|
||||
throw new ReviewedStingException("BUG: VCF header has duplicate sample names");
|
||||
|
||||
mGenotypeSampleNames.addAll(genotypeSampleNames);
|
||||
samplesWereAlreadySorted = ParsingUtils.isSorted(genotypeSampleNames);
|
||||
buildVCFReaderMaps(genotypeSampleNames);
|
||||
|
|
@ -175,12 +185,23 @@ public class VCFHeader {
|
|||
} else if ( line instanceof VCFFormatHeaderLine ) {
|
||||
VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line;
|
||||
addMetaDataMapBinding(mFormatMetaData, formatLine);
|
||||
} else if ( line instanceof VCFFilterHeaderLine ) {
|
||||
VCFFilterHeaderLine filterLine = (VCFFilterHeaderLine)line;
|
||||
mFilterMetaData.put(filterLine.getID(), filterLine);
|
||||
} else if ( line instanceof VCFContigHeaderLine ) {
|
||||
contigMetaData.add((VCFContigHeaderLine)line);
|
||||
} else {
|
||||
mOtherMetaData.put(line.getKey(), line);
|
||||
}
|
||||
}
|
||||
|
||||
if ( hasFormatLine(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && ! hasFormatLine(VCFConstants.GENOTYPE_PL_KEY) ) {
|
||||
logger.warn("Found " + VCFConstants.GENOTYPE_LIKELIHOODS_KEY + " format, but no "
|
||||
+ VCFConstants.GENOTYPE_PL_KEY + " field. As the GATK now only manages PL fields internally"
|
||||
+ " automatically adding a corresponding PL field to your VCF header");
|
||||
addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
|
||||
loadMetaDataMaps();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -239,7 +260,7 @@ public class VCFHeader {
|
|||
*
|
||||
* @return a list of the genotype column names, which may be empty if hasGenotypingData() returns false
|
||||
*/
|
||||
public Set<String> getGenotypeSamples() {
|
||||
public List<String> getGenotypeSamples() {
|
||||
return mGenotypeSampleNames;
|
||||
}
|
||||
|
||||
|
|
@ -294,6 +315,26 @@ public class VCFHeader {
|
|||
return mFormatMetaData.get(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param id the header key name
|
||||
* @return the meta data line, or null if there is none
|
||||
*/
|
||||
public VCFFilterHeaderLine getFilterHeaderLine(final String id) {
|
||||
return mFilterMetaData.get(id);
|
||||
}
|
||||
|
||||
public boolean hasInfoLine(final String id) {
|
||||
return getInfoHeaderLine(id) != null;
|
||||
}
|
||||
|
||||
public boolean hasFormatLine(final String id) {
|
||||
return getFormatHeaderLine(id) != null;
|
||||
}
|
||||
|
||||
public boolean hasFilterLine(final String id) {
|
||||
return getFilterHeaderLine(id) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param key the header key name
|
||||
* @return the meta data line, or null if there is none
|
||||
|
|
|
|||
|
|
@ -0,0 +1,262 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Manages header lines for standard VCF INFO and FORMAT fields
|
||||
*
|
||||
* Provides simple mechanisms for registering standard lines,
|
||||
* looking them up, and adding them to headers
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 6/12
|
||||
*/
|
||||
public class VCFStandardHeaderLines {
|
||||
/**
|
||||
* Enabling this causes us to repair header lines even if only their descriptions differ
|
||||
*/
|
||||
private final static boolean REPAIR_BAD_DESCRIPTIONS = false;
|
||||
protected final static Logger logger = Logger.getLogger(VCFStandardHeaderLines.class);
|
||||
private static Standards<VCFFormatHeaderLine> formatStandards = new Standards<VCFFormatHeaderLine>();
|
||||
private static Standards<VCFInfoHeaderLine> infoStandards = new Standards<VCFInfoHeaderLine>();
|
||||
|
||||
/**
|
||||
* Walks over the VCF header and repairs the standard VCF header lines in it, returning a freshly
|
||||
* allocated VCFHeader with standard VCF header lines repaired as necessary
|
||||
*
|
||||
* @param header
|
||||
* @return
|
||||
*/
|
||||
@Requires("header != null")
|
||||
@Ensures("result != null")
|
||||
public static VCFHeader repairStandardHeaderLines(final VCFHeader header) {
|
||||
final Set<VCFHeaderLine> newLines = new LinkedHashSet<VCFHeaderLine>(header.getMetaData().size());
|
||||
for ( VCFHeaderLine line : header.getMetaData() ) {
|
||||
if ( line instanceof VCFFormatHeaderLine ) {
|
||||
line = formatStandards.repair((VCFFormatHeaderLine) line);
|
||||
} else if ( line instanceof VCFInfoHeaderLine) {
|
||||
line = infoStandards.repair((VCFInfoHeaderLine) line);
|
||||
}
|
||||
|
||||
newLines.add(line);
|
||||
}
|
||||
|
||||
return new VCFHeader(newLines, header.getGenotypeSamples());
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds header lines for each of the format fields in IDs to header, returning the set of
|
||||
* IDs without standard descriptions, unless throwErrorForMissing is true, in which
|
||||
* case this situation results in a ReviewedStingException
|
||||
*
|
||||
* @param IDs
|
||||
* @return
|
||||
*/
|
||||
public static Set<String> addStandardFormatLines(final Set<VCFHeaderLine> headerLines, final boolean throwErrorForMissing, final Collection<String> IDs) {
|
||||
return formatStandards.addToHeader(headerLines, IDs, throwErrorForMissing);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #addStandardFormatLines(java.util.Set, boolean, java.util.Collection)
|
||||
*
|
||||
* @param headerLines
|
||||
* @param throwErrorForMissing
|
||||
* @param IDs
|
||||
* @return
|
||||
*/
|
||||
public static Set<String> addStandardFormatLines(final Set<VCFHeaderLine> headerLines, final boolean throwErrorForMissing, final String ... IDs) {
|
||||
return addStandardFormatLines(headerLines, throwErrorForMissing, Arrays.asList(IDs));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the standard format line for ID. If none exists, return null or throw an exception, depending
|
||||
* on throwErrorForMissing
|
||||
*
|
||||
* @param ID
|
||||
* @param throwErrorForMissing
|
||||
* @return
|
||||
*/
|
||||
public static VCFFormatHeaderLine getFormatLine(final String ID, final boolean throwErrorForMissing) {
|
||||
return formatStandards.get(ID, throwErrorForMissing);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the standard format line for ID. If none exists throw an exception
|
||||
*
|
||||
* @param ID
|
||||
* @return
|
||||
*/
|
||||
public static VCFFormatHeaderLine getFormatLine(final String ID) {
|
||||
return formatStandards.get(ID, true);
|
||||
}
|
||||
|
||||
private static void registerStandard(final VCFFormatHeaderLine line) {
|
||||
formatStandards.add(line);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds header lines for each of the info fields in IDs to header, returning the set of
|
||||
* IDs without standard descriptions, unless throwErrorForMissing is true, in which
|
||||
* case this situation results in a ReviewedStingException
|
||||
*
|
||||
* @param IDs
|
||||
* @return
|
||||
*/
|
||||
public static Set<String> addStandardInfoLines(final Set<VCFHeaderLine> headerLines, final boolean throwErrorForMissing, final Collection<String> IDs) {
|
||||
return infoStandards.addToHeader(headerLines, IDs, throwErrorForMissing);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #addStandardFormatLines(java.util.Set, boolean, java.util.Collection)
|
||||
*
|
||||
* @param IDs
|
||||
* @return
|
||||
*/
|
||||
public static Set<String> addStandardInfoLines(final Set<VCFHeaderLine> headerLines, final boolean throwErrorForMissing, final String ... IDs) {
|
||||
return addStandardInfoLines(headerLines, throwErrorForMissing, Arrays.asList(IDs));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the standard info line for ID. If none exists, return null or throw an exception, depending
|
||||
* on throwErrorForMissing
|
||||
*
|
||||
* @param ID
|
||||
* @param throwErrorForMissing
|
||||
* @return
|
||||
*/
|
||||
public static VCFInfoHeaderLine getInfoLine(final String ID, final boolean throwErrorForMissing) {
|
||||
return infoStandards.get(ID, throwErrorForMissing);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the standard info line for ID. If none exists throw an exception
|
||||
*
|
||||
* @param ID
|
||||
* @return
|
||||
*/
|
||||
public static VCFInfoHeaderLine getInfoLine(final String ID) {
|
||||
return getInfoLine(ID, true);
|
||||
}
|
||||
|
||||
private static void registerStandard(final VCFInfoHeaderLine line) {
|
||||
infoStandards.add(line);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// VCF header line constants
|
||||
//
|
||||
static {
|
||||
// FORMAT lines
|
||||
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
|
||||
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "Genotype Quality"));
|
||||
registerStandard(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
|
||||
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
|
||||
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed"));
|
||||
|
||||
// INFO lines
|
||||
registerStandard(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
|
||||
registerStandard(new VCFInfoHeaderLine(VCFConstants.MLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed"));
|
||||
registerStandard(new VCFInfoHeaderLine(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed"));
|
||||
registerStandard(new VCFInfoHeaderLine(VCFConstants.DOWNSAMPLED_KEY, 0, VCFHeaderLineType.Flag, "Were any of the samples downsampled?"));
|
||||
registerStandard(new VCFInfoHeaderLine(VCFConstants.DBSNP_KEY, 0, VCFHeaderLineType.Flag, "dbSNP Membership"));
|
||||
registerStandard(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered"));
|
||||
registerStandard(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias"));
|
||||
registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"));
|
||||
registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"));
|
||||
registerStandard(new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes"));
|
||||
registerStandard(new VCFInfoHeaderLine(VCFConstants.MAPPING_QUALITY_ZERO_KEY, 1, VCFHeaderLineType.Integer, "Total Mapping Quality Zero Reads"));
|
||||
registerStandard(new VCFInfoHeaderLine(VCFConstants.RMS_MAPPING_QUALITY_KEY, 1, VCFHeaderLineType.Float, "RMS Mapping Quality"));
|
||||
registerStandard(new VCFInfoHeaderLine(VCFConstants.SOMATIC_KEY, 0, VCFHeaderLineType.Flag, "Somatic event"));
|
||||
}
|
||||
|
||||
private static class Standards<T extends VCFCompoundHeaderLine> {
|
||||
private final Map<String, T> standards = new HashMap<String, T>();
|
||||
|
||||
@Requires("line != null")
|
||||
@Ensures({"result != null", "result.getID().equals(line.getID())"})
|
||||
public T repair(final T line) {
|
||||
final T standard = get(line.getID(), false);
|
||||
if ( standard != null ) {
|
||||
final boolean badCountType = line.getCountType() != standard.getCountType();
|
||||
final boolean badCount = line.isFixedCount() && ! badCountType && line.getCount() != standard.getCount();
|
||||
final boolean badType = line.getType() != standard.getType();
|
||||
final boolean badDesc = ! line.getDescription().equals(standard.getDescription());
|
||||
final boolean needsRepair = badCountType || badCount || badType || (REPAIR_BAD_DESCRIPTIONS && badDesc);
|
||||
|
||||
if ( needsRepair ) {
|
||||
logger.warn("Repairing standard header line for field " + line.getID() + " because"
|
||||
+ (badCountType ? " -- count types disagree; header has " + line.getCountType() + " but standard is " + standard.getCountType() : "")
|
||||
+ (badType ? " -- type disagree; header has " + line.getType() + " but standard is " + standard.getType() : "")
|
||||
+ (badCount ? " -- counts disagree; header has " + line.getCount() + " but standard is " + standard.getCount() : "")
|
||||
+ (badDesc ? " -- descriptions disagree; header has '" + line.getDescription() + "' but standard is '" + standard.getDescription() + "'": ""));
|
||||
return standard;
|
||||
} else
|
||||
return line;
|
||||
} else
|
||||
return line;
|
||||
}
|
||||
|
||||
@Requires("headerLines != null")
|
||||
@Ensures({"result != null", "result.isEmpty() || ! throwErrorForMissing", "IDs.containsAll(result)"})
|
||||
public Set<String> addToHeader(final Set<VCFHeaderLine> headerLines, final Collection<String> IDs, final boolean throwErrorForMissing) {
|
||||
final Set<String> missing = new HashSet<String>();
|
||||
for ( final String ID : IDs ) {
|
||||
final T line = get(ID, throwErrorForMissing);
|
||||
if ( line == null )
|
||||
missing.add(ID);
|
||||
else
|
||||
headerLines.add(line);
|
||||
}
|
||||
|
||||
return missing;
|
||||
}
|
||||
|
||||
@Requires("line != null")
|
||||
@Ensures({"standards.containsKey(line.getID())"})
|
||||
public void add(final T line) {
|
||||
if ( standards.containsKey(line.getID()) )
|
||||
throw new ReviewedStingException("Attempting to add multiple standard header lines for ID " + line.getID());
|
||||
standards.put(line.getID(), line);
|
||||
}
|
||||
|
||||
@Requires("ID != null")
|
||||
@Ensures({"result != null || ! throwErrorForMissing"})
|
||||
public T get(final String ID, final boolean throwErrorForMissing) {
|
||||
final T x = standards.get(ID);
|
||||
if ( throwErrorForMissing && x == null )
|
||||
throw new ReviewedStingException("Couldn't find a standard VCF header line for field " + ID);
|
||||
return x;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -247,9 +247,13 @@ public class VCFUtils {
|
|||
* @param refDict the SAM formatted reference sequence dictionary
|
||||
*/
|
||||
public final static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) {
|
||||
final Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>(oldHeader.getMetaData().size());
|
||||
return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaData(), referenceFile, refDict), oldHeader.getGenotypeSamples());
|
||||
}
|
||||
|
||||
for ( final VCFHeaderLine line : oldHeader.getMetaData() ) {
|
||||
public final static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict) {
|
||||
final Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>(oldLines.size());
|
||||
|
||||
for ( final VCFHeaderLine line : oldLines ) {
|
||||
if ( line instanceof VCFContigHeaderLine )
|
||||
continue; // skip old contig lines
|
||||
if ( line.getKey().equals(VCFHeader.REFERENCE_KEY) )
|
||||
|
|
@ -261,7 +265,7 @@ public class VCFUtils {
|
|||
lines.add(contigLine);
|
||||
|
||||
lines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, "file://" + referenceFile.getAbsolutePath()));
|
||||
return new VCFHeader(lines, oldHeader.getGenotypeSamples());
|
||||
return lines;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -194,12 +194,35 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return whether or not this read has base insertion or deletion qualities (one of the two is sufficient to return true)
|
||||
*/
|
||||
public boolean hasBaseIndelQualities() {
|
||||
return getAttribute( BQSR_BASE_INSERTION_QUALITIES ) != null || getAttribute( BQSR_BASE_DELETION_QUALITIES ) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the base deletion quality or null if read doesn't have one
|
||||
*/
|
||||
public byte[] getExistingBaseInsertionQualities() {
|
||||
return SAMUtils.fastqToPhred( getStringAttribute(BQSR_BASE_INSERTION_QUALITIES));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the base deletion quality or null if read doesn't have one
|
||||
*/
|
||||
public byte[] getExistingBaseDeletionQualities() {
|
||||
return SAMUtils.fastqToPhred( getStringAttribute(BQSR_BASE_DELETION_QUALITIES));
|
||||
}
|
||||
|
||||
/**
|
||||
* Default utility to query the base insertion quality of a read. If the read doesn't have one, it creates an array of default qualities (currently Q45)
|
||||
* and assigns it to the read.
|
||||
*
|
||||
* @return the base insertion quality array
|
||||
*/
|
||||
public byte[] getBaseInsertionQualities() {
|
||||
byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_INSERTION_QUALITIES ) );
|
||||
byte [] quals = getExistingBaseInsertionQualities();
|
||||
if( quals == null ) {
|
||||
quals = new byte[getBaseQualities().length];
|
||||
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
|
||||
|
|
@ -209,8 +232,14 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
return quals;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default utility to query the base deletion quality of a read. If the read doesn't have one, it creates an array of default qualities (currently Q45)
|
||||
* and assigns it to the read.
|
||||
*
|
||||
* @return the base deletion quality array
|
||||
*/
|
||||
public byte[] getBaseDeletionQualities() {
|
||||
byte[] quals = SAMUtils.fastqToPhred( getStringAttribute( BQSR_BASE_DELETION_QUALITIES ) );
|
||||
byte[] quals = getExistingBaseDeletionQualities();
|
||||
if( quals == null ) {
|
||||
quals = new byte[getBaseQualities().length];
|
||||
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
|
||||
|
|
@ -478,6 +507,7 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
public Object clone() throws CloneNotSupportedException {
|
||||
final GATKSAMRecord clone = (GATKSAMRecord) super.clone();
|
||||
if (temporaryAttributes != null) {
|
||||
clone.temporaryAttributes = new HashMap<Object, Object>();
|
||||
for (Object attribute : temporaryAttributes.keySet())
|
||||
clone.setTemporaryAttribute(attribute, temporaryAttributes.get(attribute));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -347,6 +347,15 @@ public class Allele implements Comparable<Allele> {
|
|||
*/
|
||||
public String getDisplayString() { return new String(bases); }
|
||||
|
||||
/**
|
||||
* Same as #getDisplayString() but returns the result as byte[].
|
||||
*
|
||||
* Slightly faster then getDisplayString()
|
||||
*
|
||||
* @return the allele string representation
|
||||
*/
|
||||
public byte[] getDisplayBases() { return bases; }
|
||||
|
||||
/**
|
||||
* @param other the other allele
|
||||
*
|
||||
|
|
|
|||
|
|
@ -156,11 +156,6 @@ public final class FastGenotype extends Genotype {
|
|||
return (List<String>) getExtendedAttribute(VCFConstants.GENOTYPE_FILTER_KEY, Collections.emptyList());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean filtersWereApplied() {
|
||||
return hasExtendedAttribute(VCFConstants.GENOTYPE_FILTER_KEY);
|
||||
}
|
||||
|
||||
@Override public int[] getPL() {
|
||||
return PL;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ public abstract class Genotype implements Comparable<Genotype> {
|
|||
VCFConstants.GENOTYPE_QUALITY_KEY,
|
||||
VCFConstants.DEPTH_KEY,
|
||||
VCFConstants.GENOTYPE_ALLELE_DEPTHS,
|
||||
VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
|
||||
VCFConstants.GENOTYPE_PL_KEY);
|
||||
|
||||
public final static String PHASED_ALLELE_SEPARATOR = "|";
|
||||
public final static String UNPHASED_ALLELE_SEPARATOR = "/";
|
||||
|
|
@ -354,7 +354,7 @@ public abstract class Genotype implements Comparable<Genotype> {
|
|||
toStringIfExists(VCFConstants.GENOTYPE_QUALITY_KEY, getGQ()),
|
||||
toStringIfExists(VCFConstants.DEPTH_KEY, getDP()),
|
||||
toStringIfExists(VCFConstants.GENOTYPE_ALLELE_DEPTHS, getAD()),
|
||||
toStringIfExists(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, getPL()),
|
||||
toStringIfExists(VCFConstants.GENOTYPE_PL_KEY, getPL()),
|
||||
sortedString(getExtendedAttributes()));
|
||||
}
|
||||
|
||||
|
|
@ -451,7 +451,7 @@ public abstract class Genotype implements Comparable<Genotype> {
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
@Ensures({"result != null", "filtersWereApplied() || result.isEmpty()"})
|
||||
@Ensures({"result != null"})
|
||||
public abstract List<String> getFilters();
|
||||
|
||||
@Ensures({"result != getFilters().isEmpty()"})
|
||||
|
|
@ -459,9 +459,6 @@ public abstract class Genotype implements Comparable<Genotype> {
|
|||
return ! getFilters().isEmpty();
|
||||
}
|
||||
|
||||
@Ensures("result == true || getFilters().isEmpty()")
|
||||
public abstract boolean filtersWereApplied();
|
||||
|
||||
@Deprecated public boolean hasLog10PError() { return hasGQ(); }
|
||||
@Deprecated public double getLog10PError() { return getGQ() / -10.0; }
|
||||
@Deprecated public int getPhredScaledQual() { return getGQ(); }
|
||||
|
|
@ -505,7 +502,7 @@ public abstract class Genotype implements Comparable<Genotype> {
|
|||
return getGQ();
|
||||
} else if (key.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
|
||||
return Arrays.asList(getAD());
|
||||
} else if (key.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) {
|
||||
} else if (key.equals(VCFConstants.GENOTYPE_PL_KEY)) {
|
||||
return Arrays.asList(getPL());
|
||||
} else if (key.equals(VCFConstants.DEPTH_KEY)) {
|
||||
return getDP();
|
||||
|
|
@ -521,7 +518,7 @@ public abstract class Genotype implements Comparable<Genotype> {
|
|||
return hasGQ();
|
||||
} else if (key.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
|
||||
return hasAD();
|
||||
} else if (key.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) {
|
||||
} else if (key.equals(VCFConstants.GENOTYPE_PL_KEY)) {
|
||||
return hasPL();
|
||||
} else if (key.equals(VCFConstants.DEPTH_KEY)) {
|
||||
return hasDP();
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.variantcontext;
|
|||
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
|
||||
|
|
@ -49,6 +50,7 @@ import java.util.*;
|
|||
* @author Mark DePristo
|
||||
* @since 06/12
|
||||
*/
|
||||
@Invariant({"alleles != null"})
|
||||
public final class GenotypeBuilder {
|
||||
public static boolean MAKE_FAST_BY_DEFAULT = true;
|
||||
|
||||
|
|
@ -154,9 +156,9 @@ public final class GenotypeBuilder {
|
|||
* function you must provide sampleName and alleles before trying to
|
||||
* make more Genotypes.
|
||||
*/
|
||||
public final void reset() {
|
||||
sampleName = null;
|
||||
alleles = null;
|
||||
public final void reset(final boolean keepSampleName) {
|
||||
if ( ! keepSampleName ) sampleName = null;
|
||||
alleles = Collections.emptyList();
|
||||
isPhased = false;
|
||||
GQ = -1;
|
||||
DP = -1;
|
||||
|
|
@ -381,7 +383,8 @@ public final class GenotypeBuilder {
|
|||
*/
|
||||
@Requires("filters != null")
|
||||
public GenotypeBuilder filters(final List<String> filters) {
|
||||
attribute(VCFConstants.GENOTYPE_FILTER_KEY, filters);
|
||||
if ( ! filters.isEmpty() )
|
||||
attribute(VCFConstants.GENOTYPE_FILTER_KEY, filters);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ public class SlowGenotype extends Genotype {
|
|||
this.alleles = Collections.unmodifiableList(alleles);
|
||||
commonInfo = new CommonInfo(sampleName, log10PError, filters, attributes);
|
||||
if ( log10Likelihoods != null )
|
||||
commonInfo.putAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods));
|
||||
commonInfo.putAttribute(VCFConstants.GENOTYPE_PL_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods));
|
||||
this.isPhased = isPhased;
|
||||
validate();
|
||||
}
|
||||
|
|
@ -72,12 +72,12 @@ public class SlowGenotype extends Genotype {
|
|||
// Useful methods for getting genotype likelihoods for a genotype object, if present
|
||||
//
|
||||
@Override public boolean hasLikelihoods() {
|
||||
return (commonInfo.hasAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) && !commonInfo.getAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4)) ||
|
||||
return (commonInfo.hasAttribute(VCFConstants.GENOTYPE_PL_KEY) && !commonInfo.getAttribute(VCFConstants.GENOTYPE_PL_KEY).equals(VCFConstants.MISSING_VALUE_v4)) ||
|
||||
(commonInfo.hasAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && !commonInfo.getAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4));
|
||||
}
|
||||
|
||||
@Override public GenotypeLikelihoods getLikelihoods() {
|
||||
GenotypeLikelihoods x = getLikelihoods(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, true);
|
||||
GenotypeLikelihoods x = getLikelihoods(VCFConstants.GENOTYPE_PL_KEY, true);
|
||||
if ( x != null )
|
||||
return x;
|
||||
else {
|
||||
|
|
@ -113,7 +113,6 @@ public class SlowGenotype extends Genotype {
|
|||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
@Override public List<String> getFilters() { return new ArrayList<String>(commonInfo.getFilters()); }
|
||||
@Override public boolean filtersWereApplied() { return commonInfo.filtersWereApplied(); }
|
||||
@Override public boolean hasLog10PError() { return commonInfo.hasLog10PError(); }
|
||||
@Override public double getLog10PError() { return commonInfo.getLog10PError(); }
|
||||
|
||||
|
|
|
|||
|
|
@ -339,7 +339,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
* @return
|
||||
*/
|
||||
public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
|
||||
if ( ! rederiveAllelesFromGenotypes && sampleNames.containsAll(getSampleNames()) ) {
|
||||
if ( sampleNames.containsAll(getSampleNames()) ) {
|
||||
return this; // fast path when you don't have any work to do
|
||||
} else {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(this);
|
||||
|
|
@ -559,7 +559,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
|
||||
public String getAlleleStringWithRefPadding(final Allele allele) {
|
||||
if ( VariantContextUtils.needsPadding(this) )
|
||||
return VariantContextUtils.padAllele(this, allele);
|
||||
return VariantContextUtils.padAllele(this, allele).getDisplayString();
|
||||
else
|
||||
return allele.getDisplayString();
|
||||
}
|
||||
|
|
@ -1177,8 +1177,9 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
// if ( getType() == Type.INDEL ) {
|
||||
// if ( getReference().length() != (getLocation().size()-1) ) {
|
||||
long length = (stop - start) + 1;
|
||||
if ( (getReference().isNull() && length != 1 ) ||
|
||||
(!isSymbolic() && getReference().isNonNull() && (length - getReference().length() > 1))) {
|
||||
if ( ! isSymbolic()
|
||||
&& ((getReference().isNull() && length != 1 )
|
||||
|| (getReference().isNonNull() && (length - getReference().length() > 1)))) {
|
||||
throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
|
||||
}
|
||||
}
|
||||
|
|
@ -1358,19 +1359,37 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
}
|
||||
|
||||
private final void fullyDecodeInfo(final VariantContextBuilder builder, final VCFHeader header) {
|
||||
builder.attributes(fullyDecodeAttributes(getAttributes(), header));
|
||||
builder.attributes(fullyDecodeAttributes(getAttributes(), header, false));
|
||||
}
|
||||
|
||||
private final Map<String, Object> fullyDecodeAttributes(final Map<String, Object> attributes, final VCFHeader header) {
|
||||
private final Map<String, Object> fullyDecodeAttributes(final Map<String, Object> attributes,
|
||||
final VCFHeader header,
|
||||
final boolean allowMissingValuesComparedToHeader) {
|
||||
final Map<String, Object> newAttributes = new HashMap<String, Object>(attributes.size());
|
||||
|
||||
for ( final Map.Entry<String, Object> attr : attributes.entrySet() ) {
|
||||
final String field = attr.getKey();
|
||||
|
||||
if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) )
|
||||
continue; // gross, FT is part of the extended attributes
|
||||
|
||||
final VCFCompoundHeaderLine format = VariantContextUtils.getMetaDataForField(header, field);
|
||||
final Object decoded = decodeValue(field, attr.getValue(), format);
|
||||
|
||||
if ( decoded != null )
|
||||
newAttributes.put(field, decoded);
|
||||
if ( decoded != null &&
|
||||
! allowMissingValuesComparedToHeader
|
||||
&& format.getCountType() != VCFHeaderLineCount.UNBOUNDED
|
||||
&& format.getType() != VCFHeaderLineType.Flag ) { // we expect exactly the right number of elements
|
||||
final int obsSize = decoded instanceof List ? ((List) decoded).size() : 1;
|
||||
final int expSize = format.getCount(this.getNAlleles() - 1);
|
||||
if ( obsSize != expSize ) {
|
||||
throw new UserException.MalformedVCFHeader("Discordant field size detected for field " +
|
||||
field + " at " + getChr() + ":" + getStart() + ". Field had " + obsSize + " values " +
|
||||
"but the header says this should have " + expSize + " values based on header record " +
|
||||
format);
|
||||
}
|
||||
}
|
||||
newAttributes.put(field, decoded);
|
||||
}
|
||||
|
||||
return newAttributes;
|
||||
|
|
@ -1378,7 +1397,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
|
||||
private final Object decodeValue(final String field, final Object value, final VCFCompoundHeaderLine format) {
|
||||
if ( value instanceof String ) {
|
||||
if ( field.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) )
|
||||
if ( field.equals(VCFConstants.GENOTYPE_PL_KEY) )
|
||||
return GenotypeLikelihoods.fromPLField((String)value);
|
||||
|
||||
final String string = (String)value;
|
||||
|
|
@ -1400,6 +1419,8 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
} else {
|
||||
return value;
|
||||
}
|
||||
|
||||
// allowMissingValuesComparedToHeader
|
||||
}
|
||||
|
||||
private final Object decodeOne(final String field, final String string, final VCFCompoundHeaderLine format) {
|
||||
|
|
@ -1409,7 +1430,12 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
else {
|
||||
switch ( format.getType() ) {
|
||||
case Character: return string;
|
||||
case Flag: return Boolean.valueOf(string);
|
||||
case Flag:
|
||||
final boolean b = Boolean.valueOf(string);
|
||||
if ( b == false )
|
||||
throw new UserException.MalformedVCF("VariantContext FLAG fields " + field + " cannot contain false values"
|
||||
+ " as seen at " + getChr() + ":" + getStart());
|
||||
return b;
|
||||
case String: return string;
|
||||
case Integer: return Integer.valueOf(string);
|
||||
case Float: return Double.valueOf(string);
|
||||
|
|
@ -1430,7 +1456,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
}
|
||||
|
||||
private final Genotype fullyDecodeGenotypes(final Genotype g, final VCFHeader header) {
|
||||
final Map<String, Object> map = fullyDecodeAttributes(g.getExtendedAttributes(), header);
|
||||
final Map<String, Object> map = fullyDecodeAttributes(g.getExtendedAttributes(), header, true);
|
||||
return new GenotypeBuilder(g).attributes(map).make();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ public class VariantContextUtils {
|
|||
static {
|
||||
engine.setSilent(false); // will throw errors now for selects that don't evaluate properly
|
||||
engine.setLenient(false);
|
||||
engine.setDebug(false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -119,10 +120,6 @@ public class VariantContextUtils {
|
|||
attributes.put(VCFConstants.ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
|
||||
attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs);
|
||||
}
|
||||
else {
|
||||
attributes.put(VCFConstants.ALLELE_COUNT_KEY, 0);
|
||||
attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
return attributes;
|
||||
|
|
@ -186,17 +183,23 @@ public class VariantContextUtils {
|
|||
return false;
|
||||
}
|
||||
|
||||
public static String padAllele(final VariantContext vc, final Allele allele) {
|
||||
public static Allele padAllele(final VariantContext vc, final Allele allele) {
|
||||
assert needsPadding(vc);
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append((char)vc.getReferenceBaseForIndel().byteValue());
|
||||
sb.append(allele.getDisplayString());
|
||||
return sb.toString();
|
||||
if ( allele.isSymbolic() )
|
||||
return allele;
|
||||
else {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
sb.append((char)vc.getReferenceBaseForIndel().byteValue());
|
||||
sb.append(allele.getDisplayString());
|
||||
final String newBases = sb.toString();
|
||||
return Allele.create(newBases, allele.isReference());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
|
||||
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC) {
|
||||
final boolean padVC = needsPadding(inputVC);
|
||||
|
||||
// nothing to do if we don't need to pad bases
|
||||
|
|
@ -204,46 +207,21 @@ public class VariantContextUtils {
|
|||
if ( !inputVC.hasReferenceBaseForIndel() )
|
||||
throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available.");
|
||||
|
||||
Byte refByte = inputVC.getReferenceBaseForIndel();
|
||||
final ArrayList<Allele> alleles = new ArrayList<Allele>(inputVC.getNAlleles());
|
||||
final Map<Allele, Allele> unpaddedToPadded = new HashMap<Allele, Allele>(inputVC.getNAlleles());
|
||||
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
|
||||
for (Allele a : inputVC.getAlleles()) {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
if (a.isSymbolic()) {
|
||||
alleles.add(a);
|
||||
} else {
|
||||
String newBases;
|
||||
if ( refBaseShouldBeAppliedToEndOfAlleles )
|
||||
newBases = a.getBaseString() + new String(new byte[]{refByte});
|
||||
else
|
||||
newBases = new String(new byte[]{refByte}) + a.getBaseString();
|
||||
alleles.add(Allele.create(newBases,a.isReference()));
|
||||
}
|
||||
for (final Allele a : inputVC.getAlleles()) {
|
||||
final Allele padded = padAllele(inputVC, a);
|
||||
alleles.add(padded);
|
||||
unpaddedToPadded.put(a, padded);
|
||||
}
|
||||
|
||||
// now we can recreate new genotypes with trimmed alleles
|
||||
GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples());
|
||||
for (final Genotype g : inputVC.getGenotypes() ) {
|
||||
List<Allele> inAlleles = g.getAlleles();
|
||||
List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
|
||||
for (Allele a : inAlleles) {
|
||||
if (a.isCalled()) {
|
||||
if (a.isSymbolic()) {
|
||||
newGenotypeAlleles.add(a);
|
||||
} else {
|
||||
String newBases;
|
||||
if ( refBaseShouldBeAppliedToEndOfAlleles )
|
||||
newBases = a.getBaseString() + new String(new byte[]{refByte});
|
||||
else
|
||||
newBases = new String(new byte[]{refByte}) + a.getBaseString();
|
||||
newGenotypeAlleles.add(Allele.create(newBases,a.isReference()));
|
||||
}
|
||||
}
|
||||
else {
|
||||
// add no-call allele
|
||||
newGenotypeAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
final List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
|
||||
for (final Allele a : g.getAlleles()) {
|
||||
newGenotypeAlleles.add( a.isCalled() ? unpaddedToPadded.get(a) : Allele.NO_CALL);
|
||||
}
|
||||
genotypes.add(new GenotypeBuilder(g).alleles(newGenotypeAlleles).make());
|
||||
|
||||
|
|
@ -560,7 +538,7 @@ public class VariantContextUtils {
|
|||
for (final VariantContext vc : prepaddedVCs) {
|
||||
// also a reasonable place to remove filtered calls, if needed
|
||||
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
||||
VCs.add(createVariantContextWithPaddedAlleles(vc, false));
|
||||
VCs.add(createVariantContextWithPaddedAlleles(vc));
|
||||
}
|
||||
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -264,7 +264,9 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
|
|||
// if the context is null, we need to create it to evaluate the JEXL expression
|
||||
if (this.jContext == null) createContext();
|
||||
try {
|
||||
jexl.put (exp, (Boolean) exp.exp.evaluate(jContext));
|
||||
final Boolean value = (Boolean) exp.exp.evaluate(jContext);
|
||||
// treat errors as no match
|
||||
jexl.put(exp, value == null ? false : value);
|
||||
} catch (Exception e) {
|
||||
throw new UserException.CommandLineException(String.format("Invalid JEXL expression detected for %s with message %s", exp.name, e.getMessage()));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -260,7 +260,7 @@ public abstract class BCF2FieldEncoder {
|
|||
@Requires("isDynamicallyTyped()")
|
||||
@Ensures("result != null")
|
||||
public BCF2Type getDynamicType(final Object value) {
|
||||
throw new ReviewedStingException("BUG: cannot get dynamic type for statically typed BCF2 field");
|
||||
throw new ReviewedStingException("BUG: cannot get dynamic type for statically typed BCF2 field " + getField());
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
|
@ -269,21 +269,6 @@ public abstract class BCF2FieldEncoder {
|
|||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Convenience method that just called encodeValue with a no minimum for the number of values.
|
||||
*
|
||||
* Primarily useful for encoding site values
|
||||
*
|
||||
* @param encoder
|
||||
* @param value
|
||||
* @param type
|
||||
* @throws IOException
|
||||
*/
|
||||
@Requires({"encoder != null", "isDynamicallyTyped() || type == getStaticType()"})
|
||||
public void encodeOneValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
encodeValue(encoder, value, type, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Key abstract method that should encode a value of the given type into the encoder.
|
||||
*
|
||||
|
|
@ -348,10 +333,10 @@ public abstract class BCF2FieldEncoder {
|
|||
if ( value == null )
|
||||
return "";
|
||||
else if (value instanceof List) {
|
||||
if ( ((List) value).size() == 1 )
|
||||
return (String)((List) value).get(0);
|
||||
else
|
||||
return BCF2Utils.collapseStringList((List<String>)value);
|
||||
final List<String> l = (List<String>)value;
|
||||
if ( l.isEmpty() ) return "";
|
||||
else if ( l.size() == 1 ) return (String)l.get(0);
|
||||
else return BCF2Utils.collapseStringList(l);
|
||||
} else
|
||||
return (String)value;
|
||||
}
|
||||
|
|
@ -367,7 +352,7 @@ public abstract class BCF2FieldEncoder {
|
|||
public Flag(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||
super(headerLine, dict, BCF2Type.INT8);
|
||||
if ( ! headerLine.isFixedCount() || headerLine.getCount() != 0 )
|
||||
throw new ReviewedStingException("Flag encoder only suppports atomic flags!");
|
||||
throw new ReviewedStingException("Flag encoder only suppports atomic flags for field " + getField());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -376,7 +361,7 @@ public abstract class BCF2FieldEncoder {
|
|||
}
|
||||
|
||||
@Override
|
||||
@Requires("minValues <= 1")
|
||||
@Requires({"minValues <= 1", "value != null", "value instanceof Boolean", "((Boolean)value) == true"})
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
encoder.encodeRawBytes(1, getStaticType());
|
||||
}
|
||||
|
|
@ -409,9 +394,11 @@ public abstract class BCF2FieldEncoder {
|
|||
} else {
|
||||
// handle generic case
|
||||
final List<Double> doubles = toList(Double.class, value);
|
||||
for ( final double d : doubles ) {
|
||||
encoder.encodeRawFloat(d);
|
||||
count++;
|
||||
for ( final Double d : doubles ) {
|
||||
if ( d != null ) { // necessary because .,. => [null, null] in VC
|
||||
encoder.encodeRawFloat(d);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type);
|
||||
|
|
@ -439,6 +426,7 @@ public abstract class BCF2FieldEncoder {
|
|||
return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType((int[])value);
|
||||
}
|
||||
|
||||
@Requires("value == null || ((int[])value).length <= minValues")
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
int count = 0;
|
||||
|
|
@ -495,9 +483,11 @@ public abstract class BCF2FieldEncoder {
|
|||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
int count = 0;
|
||||
for ( final int i : toList(Integer.class, value) ) {
|
||||
encoder.encodeRawInt(i, type);
|
||||
count++;
|
||||
for ( final Integer i : toList(Integer.class, value) ) {
|
||||
if ( i != null ) { // necessary because .,. => [null, null] in VC
|
||||
encoder.encodeRawInt(i, type);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ public abstract class BCF2FieldWriter {
|
|||
} else {
|
||||
final int valueCount = getFieldEncoder().numElements(vc, rawValue);
|
||||
encoder.encodeType(valueCount, type);
|
||||
getFieldEncoder().encodeOneValue(encoder, rawValue, type);
|
||||
getFieldEncoder().encodeValue(encoder, rawValue, type, valueCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -179,7 +179,7 @@ public abstract class BCF2FieldWriter {
|
|||
final List<Integer> values = new ArrayList<Integer>(vc.getNSamples());
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
for ( final Object i : BCF2Utils.toList(g.getExtendedAttribute(getField(), null)) ) {
|
||||
values.add((Integer)i); // we know they are all integers
|
||||
if ( i != null ) values.add((Integer)i); // we know they are all integers
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -246,6 +246,10 @@ public abstract class BCF2FieldWriter {
|
|||
buildAlleleMap(vc);
|
||||
nValuesPerGenotype = vc.getMaxPloidy();
|
||||
|
||||
// deal with the case where we have no call everywhere, in which case we write out diploid
|
||||
if ( nValuesPerGenotype == -1 )
|
||||
nValuesPerGenotype = 2;
|
||||
|
||||
super.start(encoder, vc);
|
||||
}
|
||||
|
||||
|
|
@ -298,7 +302,6 @@ public abstract class BCF2FieldWriter {
|
|||
if ( nAlleles > 2 ) {
|
||||
// for multi-allelics we need to clear the map, and add additional looks
|
||||
alleleMapForTriPlus.clear();
|
||||
alleleMapForTriPlus.put(Allele.NO_CALL, -1); // convenience for lookup
|
||||
final List<Allele> alleles = vc.getAlleles();
|
||||
for ( int i = 2; i < alleles.size(); i++ ) {
|
||||
alleleMapForTriPlus.put(alleles.get(i), i);
|
||||
|
|
|
|||
|
|
@ -84,6 +84,8 @@ import java.util.*;
|
|||
*/
|
||||
class BCF2Writer extends IndexingVariantContextWriter {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
|
||||
final private static List<Allele> MISSING_GENOTYPE = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
final private static boolean ALLOW_MISSING_CONTIG_LINES = false;
|
||||
|
||||
private final OutputStream outputStream; // Note: do not flush until completely done writing, to avoid issues with eventual BGZF support
|
||||
private VCFHeader header;
|
||||
|
|
@ -111,8 +113,12 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
public void writeHeader(final VCFHeader header) {
|
||||
// create the config offsets map
|
||||
if ( header.getContigLines().isEmpty() ) {
|
||||
logger.warn("No contig dictionary found in header, falling back to reference sequence dictionary");
|
||||
createContigDictionary(VCFUtils.makeContigHeaderLines(getRefDict(), null));
|
||||
if ( ALLOW_MISSING_CONTIG_LINES ) {
|
||||
logger.warn("No contig dictionary found in header, falling back to reference sequence dictionary");
|
||||
createContigDictionary(VCFUtils.makeContigHeaderLines(getRefDict(), null));
|
||||
} else {
|
||||
throw new UserException.MalformedBCF2("Cannot write BCF2 file with missing contig lines");
|
||||
}
|
||||
} else {
|
||||
createContigDictionary(header.getContigLines());
|
||||
}
|
||||
|
|
@ -213,7 +219,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
final int nAlleles = vc.getNAlleles();
|
||||
final int nInfo = vc.getAttributes().size();
|
||||
final int nGenotypeFormatFields = getNGenotypeFormatFields(vc);
|
||||
final int nSamples = vc.getNSamples();
|
||||
final int nSamples = header.getNGenotypeSamples();
|
||||
|
||||
encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x0000FFFF), BCF2Type.INT32);
|
||||
encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x00FFFFF), BCF2Type.INT32);
|
||||
|
|
@ -256,10 +262,10 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
|
||||
private void buildAlleles( VariantContext vc ) throws IOException {
|
||||
final boolean needsPadding = VariantContextUtils.needsPadding(vc);
|
||||
for ( final Allele allele : vc.getAlleles() ) {
|
||||
byte[] s = allele.getBases();
|
||||
for ( Allele allele : vc.getAlleles() ) {
|
||||
if ( needsPadding )
|
||||
s = VariantContextUtils.padAllele(vc,allele).getBytes();
|
||||
allele = VariantContextUtils.padAllele(vc,allele);
|
||||
final byte[] s = allele.getDisplayBases();
|
||||
encoder.encodeTypedString(s);
|
||||
}
|
||||
}
|
||||
|
|
@ -298,7 +304,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
Genotype g = vc.getGenotype(name);
|
||||
if ( g == null )
|
||||
// we don't have any data about g at all
|
||||
g = new GenotypeBuilder(name).make();
|
||||
g = new GenotypeBuilder(name).alleles(MISSING_GENOTYPE).make();
|
||||
writer.addGenotype(encoder, vc, g);
|
||||
}
|
||||
writer.done(encoder, vc);
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ class IntGenotypeFieldAccessors {
|
|||
public IntGenotypeFieldAccessors() {
|
||||
intGenotypeFieldEncoders.put(VCFConstants.DEPTH_KEY, new IntGenotypeFieldAccessors.DPAccessor());
|
||||
intGenotypeFieldEncoders.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, new IntGenotypeFieldAccessors.ADAccessor());
|
||||
intGenotypeFieldEncoders.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, new IntGenotypeFieldAccessors.PLAccessor());
|
||||
intGenotypeFieldEncoders.put(VCFConstants.GENOTYPE_PL_KEY, new IntGenotypeFieldAccessors.PLAccessor());
|
||||
intGenotypeFieldEncoders.put(VCFConstants.GENOTYPE_QUALITY_KEY, new IntGenotypeFieldAccessors.GQAccessor());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -33,5 +33,6 @@ package org.broadinstitute.sting.utils.variantcontext.writer;
|
|||
public enum Options {
|
||||
INDEX_ON_THE_FLY,
|
||||
DO_NOT_WRITE_GENOTYPES,
|
||||
ALLOW_MISSING_FIELDS_IN_HEADER,
|
||||
FORCE_BCF
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,9 +27,9 @@ package org.broadinstitute.sting.utils.variantcontext.writer;
|
|||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.io.*;
|
||||
|
|
@ -51,15 +51,17 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
// the VCF header we're storing
|
||||
protected VCFHeader mHeader = null;
|
||||
|
||||
// were filters applied?
|
||||
protected boolean filtersWereAppliedToContext = false;
|
||||
final private boolean allowMissingFieldsInHeader;
|
||||
|
||||
private IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors();
|
||||
|
||||
public VCFWriter(final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) {
|
||||
public VCFWriter(final File location, final OutputStream output, final SAMSequenceDictionary refDict,
|
||||
final boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes,
|
||||
final boolean allowMissingFieldsInHeader ) {
|
||||
super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing);
|
||||
mWriter = new BufferedWriter(new OutputStreamWriter(getOutputStream())); // todo -- fix buffer size
|
||||
this.doNotWriteGenotypes = doNotWriteGenotypes;
|
||||
this.allowMissingFieldsInHeader = allowMissingFieldsInHeader;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
|
|
@ -73,13 +75,6 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
// note we need to update the mHeader object after this call because they header
|
||||
// may have genotypes trimmed out of it, if doNotWriteGenotypes is true
|
||||
mHeader = writeHeader(header, mWriter, doNotWriteGenotypes, getVersionLine(), getStreamName());
|
||||
|
||||
// determine if we use filters, so we should FORCE pass the records
|
||||
// TODO -- this might not be necessary any longer as we have unfiltered, filtered, and PASS VCs
|
||||
for ( final VCFHeaderLine line : header.getMetaData() ) {
|
||||
if ( line instanceof VCFFilterHeaderLine)
|
||||
filtersWereAppliedToContext = true;
|
||||
}
|
||||
}
|
||||
|
||||
public static final String getVersionLine() {
|
||||
|
|
@ -166,7 +161,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
vc = new VariantContextBuilder(vc).noGenotypes().make();
|
||||
|
||||
try {
|
||||
vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, false);
|
||||
vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc);
|
||||
super.add(vc);
|
||||
|
||||
Map<Allele, String> alleleMap = buildAlleleMap(vc);
|
||||
|
|
@ -214,7 +209,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// FILTER
|
||||
String filters = getFilterString(vc, filtersWereAppliedToContext);
|
||||
String filters = getFilterString(vc);
|
||||
mWriter.write(filters);
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
|
|
@ -222,6 +217,10 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
Map<String, String> infoFields = new TreeMap<String, String>();
|
||||
for ( Map.Entry<String, Object> field : vc.getAttributes().entrySet() ) {
|
||||
String key = field.getKey();
|
||||
|
||||
if ( ! mHeader.hasInfoLine(key) )
|
||||
fieldIsMissingFromHeaderError(vc, key, "INFO");
|
||||
|
||||
String outputValue = formatVCFField(field.getValue());
|
||||
if ( outputValue != null )
|
||||
infoFields.put(key, outputValue);
|
||||
|
|
@ -236,6 +235,10 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
} else {
|
||||
List<String> genotypeAttributeKeys = calcVCFGenotypeKeys(vc, mHeader);
|
||||
if ( ! genotypeAttributeKeys.isEmpty() ) {
|
||||
for ( final String format : genotypeAttributeKeys )
|
||||
if ( ! mHeader.hasFormatLine(format) )
|
||||
fieldIsMissingFromHeaderError(vc, format, "FORMAT");
|
||||
|
||||
final String genotypeFormatString = ParsingUtils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys);
|
||||
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
|
@ -270,12 +273,18 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public static final String getFilterString(final VariantContext vc) {
|
||||
return getFilterString(vc, false);
|
||||
}
|
||||
private final String getFilterString(final VariantContext vc) {
|
||||
if ( vc.isFiltered() ) {
|
||||
for ( final String filter : vc.getFilters() )
|
||||
if ( ! mHeader.hasFilterLine(filter) )
|
||||
fieldIsMissingFromHeaderError(vc, filter, "FILTER");
|
||||
|
||||
public static final String getFilterString(final VariantContext vc, boolean forcePASS) {
|
||||
return vc.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters())) : (forcePASS || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
|
||||
return ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters()));
|
||||
}
|
||||
else if ( vc.filtersWereApplied() )
|
||||
return VCFConstants.PASSES_FILTERS_v4;
|
||||
else
|
||||
return VCFConstants.UNFILTERED;
|
||||
}
|
||||
|
||||
private static final String QUAL_FORMAT_STRING = "%.2f";
|
||||
|
|
@ -330,13 +339,13 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
*/
|
||||
private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
|
||||
throws IOException {
|
||||
if ( ! mHeader.getGenotypeSamples().containsAll(vc.getSampleNames()) ) {
|
||||
final List<String> badSampleNames = new ArrayList<String>();
|
||||
for ( final Genotype g : vc.getGenotypes() )
|
||||
if ( ! mHeader.getGenotypeSamples().contains(g.getSampleName()) )
|
||||
badSampleNames.add(g.getSampleName());
|
||||
throw new ReviewedStingException("BUG: VariantContext contains some samples not in the VCF header: bad samples are " + Utils.join(",",badSampleNames));
|
||||
}
|
||||
// if ( ! mHeader.getGenotypeSamples().containsAll(vc.getSampleNames()) ) {
|
||||
// final List<String> badSampleNames = new ArrayList<String>();
|
||||
// for ( final Genotype g : vc.getGenotypes() )
|
||||
// if ( ! mHeader.getGenotypeSamples().contains(g.getSampleName()) )
|
||||
// badSampleNames.add(g.getSampleName());
|
||||
// throw new ReviewedStingException("BUG: VariantContext contains some samples not in the VCF header: bad samples are " + Utils.join(",",badSampleNames));
|
||||
// }
|
||||
|
||||
for ( String sample : mHeader.getGenotypeSamples() ) {
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
|
@ -388,7 +397,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
|
||||
// some exceptions
|
||||
if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY ) ) {
|
||||
val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
|
||||
val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : VCFConstants.PASSES_FILTERS_v4;
|
||||
}
|
||||
|
||||
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(field);
|
||||
|
|
@ -524,7 +533,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
if ( sawGoodQual ) keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);
|
||||
if ( sawDP ) keys.add(VCFConstants.DEPTH_KEY);
|
||||
if ( sawAD ) keys.add(VCFConstants.GENOTYPE_ALLELE_DEPTHS);
|
||||
if ( sawPL ) keys.add(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
|
||||
if ( sawPL ) keys.add(VCFConstants.GENOTYPE_PL_KEY);
|
||||
if ( sawGenotypeFilter ) keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
|
||||
|
||||
List<String> sortedList = ParsingUtils.sortList(new ArrayList<String>(keys));
|
||||
|
|
@ -553,4 +562,13 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
private final void fieldIsMissingFromHeaderError(final VariantContext vc, final String id, final String field) {
|
||||
if ( !allowMissingFieldsInHeader)
|
||||
throw new UserException.MalformedVCFHeader("Key " + id + " found in VariantContext field " + field
|
||||
+ " at " + vc.getChr() + ":" + vc.getStart()
|
||||
+ " but this key isn't defined in the VCFHeader. The GATK now requires all VCFs to have"
|
||||
+ " complete VCF headers by default. This error can be disabled with the engine argument"
|
||||
+ " --allowMissingVCFHeaders");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,7 +79,8 @@ public class VariantContextWriterFactory {
|
|||
else {
|
||||
return new VCFWriter(location, output, refDict,
|
||||
options.contains(Options.INDEX_ON_THE_FLY),
|
||||
options.contains(Options.DO_NOT_WRITE_GENOTYPES));
|
||||
options.contains(Options.DO_NOT_WRITE_GENOTYPES),
|
||||
options.contains(Options.ALLOW_MISSING_FIELDS_IN_HEADER));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest {
|
|||
public void unixFileTest() {
|
||||
logger.warn("Executing unixFileTest");
|
||||
|
||||
fastaFile = new File(testDir + "exampleFASTA.fasta");
|
||||
fastaFile = new File(publicTestDir + "exampleFASTA.fasta");
|
||||
builder = new FastaSequenceIndexBuilder(fastaFile, false);
|
||||
FastaSequenceIndex index = builder.createIndex();
|
||||
controlIndex.add(new FastaSequenceIndexEntry("chr1", 6, 100000, 60, 61,0));
|
||||
|
|
@ -72,7 +72,7 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest {
|
|||
public void windowsFileTest() {
|
||||
logger.warn("Executing windowsFileTest");
|
||||
|
||||
fastaFile = new File(testDir + "exampleFASTA-windows.fasta");
|
||||
fastaFile = new File(publicTestDir + "exampleFASTA-windows.fasta");
|
||||
builder = new FastaSequenceIndexBuilder(fastaFile, false);
|
||||
FastaSequenceIndex index = builder.createIndex();
|
||||
controlIndex.add(new FastaSequenceIndexEntry("chr2", 7, 29, 7, 9,0));
|
||||
|
|
@ -88,7 +88,7 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest {
|
|||
public void combinedWindowsUnix() {
|
||||
logger.warn("Executing combinedWindowsUnix");
|
||||
|
||||
fastaFile = new File(testDir + "exampleFASTA-combined.fasta");
|
||||
fastaFile = new File(publicTestDir + "exampleFASTA-combined.fasta");
|
||||
builder = new FastaSequenceIndexBuilder(fastaFile, false);
|
||||
FastaSequenceIndex index = builder.createIndex();
|
||||
controlIndex.add(new FastaSequenceIndexEntry("chr1", 6, 100000, 60, 61,0));
|
||||
|
|
@ -105,7 +105,7 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest {
|
|||
public void threeVariableLengthContigs() {
|
||||
logger.warn("Executing threeVariableLengthContigs");
|
||||
|
||||
fastaFile = new File(testDir + "exampleFASTA-3contigs.fasta");
|
||||
fastaFile = new File(publicTestDir + "exampleFASTA-3contigs.fasta");
|
||||
builder = new FastaSequenceIndexBuilder(fastaFile, false);
|
||||
FastaSequenceIndex index = builder.createIndex();
|
||||
controlIndex.add(new FastaSequenceIndexEntry("chr1", 6, 17, 5, 6,0));
|
||||
|
|
|
|||
|
|
@ -87,10 +87,13 @@ public abstract class BaseTest {
|
|||
private static final String networkTempDir;
|
||||
private static final File networkTempDirFile;
|
||||
|
||||
protected static final String testDirRelative = "public/testdata/";
|
||||
public static final File testDirFile = new File(testDirRelative);
|
||||
public static final String testDir = testDirFile.getAbsolutePath() + "/";
|
||||
protected static final String testDirRoot = testDir.replace(testDirRelative, "");
|
||||
private static final String privateTestDirRelative = "private/testdata/";
|
||||
public static final String privateTestDir = new File(privateTestDirRelative).getAbsolutePath() + "/";
|
||||
protected static final String privateTestDirRoot = privateTestDir.replace(privateTestDirRelative, "");
|
||||
|
||||
private static final String publicTestDirRelative = "public/testdata/";
|
||||
public static final String publicTestDir = new File(publicTestDirRelative).getAbsolutePath() + "/";
|
||||
protected static final String publicTestDirRoot = publicTestDir.replace(publicTestDirRelative, "");
|
||||
|
||||
public static final String keysDataLocation = validationDataLocation + "keys/";
|
||||
public static final String gatkKeyFile = CryptUtils.GATK_USER_KEY_DIRECTORY + "gsamembers_broadinstitute.org.key";
|
||||
|
|
@ -277,7 +280,7 @@ public abstract class BaseTest {
|
|||
Reporter.log(message, true);
|
||||
}
|
||||
|
||||
private static final double DEFAULT_FLOAT_TOLERANCE = 1e-4;
|
||||
private static final double DEFAULT_FLOAT_TOLERANCE = 1e-1;
|
||||
|
||||
public static final void assertEqualsDoubleSmart(final Object actual, final Double expected) {
|
||||
Assert.assertTrue(actual instanceof Double);
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ public class MD5DB {
|
|||
/**
|
||||
* Subdirectory under the ant build directory where we store integration test md5 results
|
||||
*/
|
||||
private static final int MAX_RECORDS_TO_READ = 100000;
|
||||
private static final int MAX_RECORDS_TO_READ = 1000000;
|
||||
private static final int MAX_RAW_DIFFS_TO_SUMMARIZE = -1;
|
||||
public static final String LOCAL_MD5_DB_DIR = "integrationtests";
|
||||
public static final String GLOBAL_MD5_DB_DIR = "/humgen/gsa-hpprojects/GATK/data/integrationtests";
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ import java.text.SimpleDateFormat;
|
|||
import java.util.*;
|
||||
|
||||
public class WalkerTest extends BaseTest {
|
||||
private static final boolean GENERATE_SHADOW_BCF = false;
|
||||
private static final boolean GENERATE_SHADOW_BCF = true;
|
||||
private static final boolean ENABLE_PHONE_HOME_FOR_TESTS = false;
|
||||
private static final boolean ENABLE_ON_THE_FLY_CHECK_FOR_VCF_INDEX = false;
|
||||
|
||||
|
|
@ -356,7 +356,7 @@ public class WalkerTest extends BaseTest {
|
|||
System.out.println(String.format("[%s] Executing test %s with GATK arguments: %s", now, name, cmdline));
|
||||
// also write the command line to the HTML log for convenient follow-up
|
||||
// do the replaceAll so paths become relative to the current
|
||||
BaseTest.log(cmdline.replaceAll(testDirRoot, ""));
|
||||
BaseTest.log(cmdline.replaceAll(publicTestDirRoot, "").replaceAll(privateTestDirRoot, ""));
|
||||
CommandLineExecutable.start(instance, command);
|
||||
} catch (Exception e) {
|
||||
gotAnException = true;
|
||||
|
|
|
|||
|
|
@ -844,8 +844,8 @@ public class ParsingEngineUnitTest extends BaseTest {
|
|||
Assert.assertEquals(argProvider.bindings.get(1).getName(), "foo2", "Name isn't set properly");
|
||||
}
|
||||
|
||||
private final static String HISEQ_VCF = testDir + "HiSeq.10000.vcf";
|
||||
private final static String TRANCHES_FILE = testDir + "tranches.6.txt";
|
||||
private final static String HISEQ_VCF = privateTestDir + "HiSeq.10000.vcf";
|
||||
private final static String TRANCHES_FILE = privateTestDir + "tranches.6.txt";
|
||||
|
||||
@Test
|
||||
public void variantContextBindingTestDynamicTyping1() {
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
|||
*/
|
||||
@Test
|
||||
public void testSingleBinding() {
|
||||
String fileName = testDir + "TabularDataTest.dat";
|
||||
String fileName = privateTestDir + "TabularDataTest.dat";
|
||||
RMDTriplet triplet = new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE,new Tags());
|
||||
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(triplet,builder,seq.getSequenceDictionary(),genomeLocParser,false);
|
||||
|
||||
|
|
@ -101,7 +101,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
|||
*/
|
||||
@Test
|
||||
public void testMultipleBinding() {
|
||||
File file = new File(testDir + "TabularDataTest.dat");
|
||||
File file = new File(privateTestDir + "TabularDataTest.dat");
|
||||
|
||||
RMDTriplet testTriplet1 = new RMDTriplet("tableTest1","Table",file.getAbsolutePath(),RMDStorageType.FILE,new Tags());
|
||||
ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(testTriplet1,builder,seq.getSequenceDictionary(),genomeLocParser,false);
|
||||
|
|
|
|||
|
|
@ -94,13 +94,13 @@ public class GATKBAMIndexUnitTest extends BaseTest {
|
|||
|
||||
@Test( expectedExceptions = UserException.MalformedFile.class )
|
||||
public void testDetectTruncatedBamIndexWordBoundary() {
|
||||
GATKBAMIndex index = new GATKBAMIndex(new File(testDir + "truncated_at_word_boundary.bai"));
|
||||
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_word_boundary.bai"));
|
||||
index.readReferenceSequence(0);
|
||||
}
|
||||
|
||||
@Test( expectedExceptions = UserException.MalformedFile.class )
|
||||
public void testDetectTruncatedBamIndexNonWordBoundary() {
|
||||
GATKBAMIndex index = new GATKBAMIndex(new File(testDir + "truncated_at_non_word_boundary.bai"));
|
||||
GATKBAMIndex index = new GATKBAMIndex(new File(privateTestDir + "truncated_at_non_word_boundary.bai"));
|
||||
index.readReferenceSequence(0);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -68,10 +68,10 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest {
|
|||
|
||||
@BeforeMethod
|
||||
public void setUp() {
|
||||
String fileName = testDir + "TabularDataTest.dat";
|
||||
String fileName = privateTestDir + "TabularDataTest.dat";
|
||||
|
||||
// check to see if we have an index, if so, delete it
|
||||
File indexFileName = new File(testDir + "TabularDataTest.dat.idx");
|
||||
File indexFileName = new File(privateTestDir + "TabularDataTest.dat.idx");
|
||||
if (indexFileName.exists()) indexFileName.delete();
|
||||
|
||||
triplet = new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE,new Tags());
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ public class ReferenceOrderedQueryDataPoolUnitTest extends BaseTest{
|
|||
@Test
|
||||
public void testCloseFilePointers() throws IOException {
|
||||
// Build up query parameters
|
||||
File file = new File(BaseTest.validationDataLocation + "NA12878.hg19.example1.vcf");
|
||||
File file = new File(BaseTest.privateTestDir + "NA12878.hg19.example1.vcf");
|
||||
RMDTriplet triplet = new RMDTriplet("test", "VCF", file.getAbsolutePath(), RMDTriplet.RMDStorageType.FILE, new Tags());
|
||||
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
|
||||
GenomeLocParser parser = new GenomeLocParser(seq);
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ import org.testng.Assert;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
|
@ -188,7 +187,7 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
List<String> filterList = new ArrayList<String>();
|
||||
filterList.add(testDir + "readgroupblacklisttest.txt");
|
||||
filterList.add(privateTestDir + "readgroupblacklisttest.txt");
|
||||
|
||||
ReadGroupBlackListFilter filter = new ReadGroupBlackListFilter(filterList);
|
||||
int filtered = 0;
|
||||
|
|
@ -227,7 +226,7 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
List<String> filterList = new ArrayList<String>();
|
||||
filterList.add(testDir + "readgroupblacklisttestlist.txt");
|
||||
filterList.add(privateTestDir + "readgroupblacklisttestlist.txt");
|
||||
|
||||
ReadGroupBlackListFilter filter = new ReadGroupBlackListFilter(filterList);
|
||||
int filtered = 0;
|
||||
|
|
|
|||
|
|
@ -52,11 +52,11 @@ import java.util.*;
|
|||
* UnitTests for RMD FeatureManager
|
||||
*/
|
||||
public class FeatureManagerUnitTest extends BaseTest {
|
||||
private static final File RANDOM_FILE = new File(testDir + "exampleGATKReport.eval");
|
||||
private static final File VCF3_FILE = new File(testDir + "vcfexample3.vcf");
|
||||
private static final File VCF4_FILE = new File(testDir + "HiSeq.10000.vcf");
|
||||
private static final File VCF4_FILE_GZ = new File(testDir + "HiSeq.10000.vcf.gz");
|
||||
private static final File VCF4_FILE_BGZIP = new File(testDir + "HiSeq.10000.bgzip.vcf.gz");
|
||||
private static final File RANDOM_FILE = new File(publicTestDir+ "exampleGATKReport.eval");
|
||||
private static final File VCF3_FILE = new File(privateTestDir + "vcf3.vcf");
|
||||
private static final File VCF4_FILE = new File(privateTestDir + "HiSeq.10000.vcf");
|
||||
private static final File VCF4_FILE_GZ = new File(privateTestDir + "HiSeq.10000.vcf.gz");
|
||||
private static final File VCF4_FILE_BGZIP = new File(privateTestDir + "HiSeq.10000.bgzip.vcf.gz");
|
||||
|
||||
private FeatureManager manager;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ public class FeatureToGATKFeatureIteratorUnitTest extends BaseTest {
|
|||
final String chr = "20";
|
||||
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
|
||||
GenomeLocParser parser = new GenomeLocParser(seq);
|
||||
File file = new File(validationDataLocation + "NA12878.hg19.example1.vcf");
|
||||
File file = new File(privateTestDir + "NA12878.hg19.example1.vcf");
|
||||
VCFCodec codec = new VCFCodec();
|
||||
TestFeatureReader reader = new TestFeatureReader(file.getAbsolutePath(), codec);
|
||||
CheckableCloseableTribbleIterator<Feature> tribbleIterator = reader.query(chr, 1, 100000);
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ import java.io.PrintStream;
|
|||
public class GATKReportUnitTest extends BaseTest {
|
||||
@Test
|
||||
public void testParse() throws Exception {
|
||||
String reportPath = testDir + "exampleGATKReportv2.tbl";
|
||||
String reportPath = publicTestDir + "exampleGATKReportv2.tbl";
|
||||
GATKReport report = new GATKReport(reportPath);
|
||||
Assert.assertEquals(report.getVersion(), GATKReportVersion.V1_1);
|
||||
Assert.assertEquals(report.getTables().size(), 5);
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ import java.util.*;
|
|||
public class SampleDBUnitTest extends BaseTest {
|
||||
private static SampleDBBuilder builder;
|
||||
// all the test sample files are located here
|
||||
private File testPED = new File(testDir + "ceutrio.ped");
|
||||
private File testPED = new File(privateTestDir + "ceutrio.ped");
|
||||
|
||||
private static final Set<Sample> testPEDSamples = new HashSet<Sample>(Arrays.asList(
|
||||
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
|
|||
public static String baseTestString(String reference, String VCF) {
|
||||
return "-T CombineVariants" +
|
||||
" -R " + reference +
|
||||
" --variant:vcf " + testDir + VCF +
|
||||
" --variant:vcf " + privateTestDir + VCF +
|
||||
" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" +
|
||||
" -genotypeMergeOptions REQUIRE_UNIQUE" +
|
||||
" -setKey null" +
|
||||
|
|
@ -19,7 +19,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
|
||||
@Test
|
||||
@Test(enabled = false)
|
||||
public void test1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(b36KGReference, "symbolic_alleles_1.vcf"),
|
||||
|
|
@ -28,7 +28,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
|
|||
executeTest("Test symbolic alleles", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(enabled = false)
|
||||
public void test2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(b36KGReference, "symbolic_alleles_2.vcf"),
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-R " + hg18Reference +
|
||||
" -T ClipReads " +
|
||||
"-I " + testDir + "clippingReadsTest.withRG.bam " +
|
||||
"-I " + privateTestDir + "clippingReadsTest.withRG.bam " +
|
||||
"-os %s " +
|
||||
"-o %s " + args,
|
||||
2, // just one output file
|
||||
|
|
@ -55,9 +55,9 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest {
|
|||
@Test public void testClipRange2() { testClipper("clipRange2", "-CT 1-5,11-15", "be4fcad5b666a5540028b774169cbad7", "3061cf742f9e5526a61130128ae761a3"); }
|
||||
|
||||
@Test public void testClipSeq() { testClipper("clipSeqX", "-X CCCCC", "db199bd06561c9f2122f6ffb07941fbc", "b89459f373e40f0b835c1faff2208839"); }
|
||||
@Test public void testClipSeqFile() { testClipper("clipSeqXF", "-XF " + testDir + "seqsToClip.fasta", "d011a3152b31822475afbe0281491f8d", "24e19116ef16a37a6d095ed5c22c2466"); }
|
||||
@Test public void testClipSeqFile() { testClipper("clipSeqXF", "-XF " + privateTestDir + "seqsToClip.fasta", "d011a3152b31822475afbe0281491f8d", "24e19116ef16a37a6d095ed5c22c2466"); }
|
||||
|
||||
@Test public void testClipMulti() { testClipper("clipSeqMulti", "-QT 10 -CT 1-5 -XF " + testDir + "seqsToClip.fasta -X CCCCC", "a23187bd9bfb06557f799706d98441de", "ad8d30300cb43d5e300fcc4d2450da8e"); }
|
||||
@Test public void testClipMulti() { testClipper("clipSeqMulti", "-QT 10 -CT 1-5 -XF " + privateTestDir + "seqsToClip.fasta -X CCCCC", "a23187bd9bfb06557f799706d98441de", "ad8d30300cb43d5e300fcc4d2450da8e"); }
|
||||
|
||||
@Test public void testClipNs() { testClipper("testClipNs", "-QT 10 -CR WRITE_NS", Q10ClipOutput, "57c05b6241db7110148a91fde2d431d0"); }
|
||||
@Test public void testClipQ0s() { testClipper("testClipQs", "-QT 10 -CR WRITE_Q0S", Q10ClipOutput, "2a1a3153e0942ab355fd8a6e082b30e0"); }
|
||||
|
|
@ -68,7 +68,7 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + hg18Reference +
|
||||
" -T ClipReads" +
|
||||
" -I " + testDir + "originalQuals.chr1.1-1K.bam" +
|
||||
" -I " + privateTestDir + "originalQuals.chr1.1-1K.bam" +
|
||||
" -L chr1:1-1,000" +
|
||||
" -OQ -QT 4 -CR WRITE_Q0S" +
|
||||
" -o %s -os %s",
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ public class PrintReadsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T PrintReads" +
|
||||
" -R " + params.reference +
|
||||
" -I " + testDir + params.bam +
|
||||
" -I " + privateTestDir + params.bam +
|
||||
params.args +
|
||||
" -o %s",
|
||||
Arrays.asList(params.md5));
|
||||
|
|
|
|||
|
|
@ -15,40 +15,40 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHasAnnotsNotAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("dfa5dff09fa964b06da19c0f4aff6928"));
|
||||
baseTestString() + " --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("55785745fe13ad81a2c4a14373d091f0"));
|
||||
executeTest("test file has annotations, not asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHasAnnotsNotAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("9914bd19f6235c550e5182e0f4591da6"));
|
||||
baseTestString() + " --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("d6f749f8dbeb2d42c9effaff9fe571d7"));
|
||||
executeTest("test file has annotations, not asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("6a52ef10bb10d72cdd82a8f7afc2dd09"));
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("9084e6c7b1cec0f3a2c6d96711844d5e"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("74d894fd31b449deffca88d0e465f01b"));
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("3dfabdcaa2648ac34380fb71860c42d3"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoAnnotsNotAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("dd89dfa22f0e1d6760095e04f528d62a"));
|
||||
baseTestString() + " --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("b85c1ea28194484b327fbe0add1b5685"));
|
||||
executeTest("test file doesn't have annotations, not asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -57,96 +57,96 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
// the genotype annotations in this file are actually out of order. If you don't parse the genotypes
|
||||
// they don't get reordered. It's a good test of the genotype ordering system.
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("542d9ed8290ef7868387af4127e0b5fa"));
|
||||
baseTestString() + " --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("fe4d4e2484c4cf8b1cd50ad42cfe468e"));
|
||||
executeTest("test file doesn't have annotations, not asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("b1b32ed3b831c92c94258c8e4a60e8c9"));
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("043fc6205b0633edcd3fadc9e044800c"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("a25eacb0ceea2c082af349f8d7776c8a"));
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("6fafb42d374a67ba4687a23078a126af"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExcludeAnnotations() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant:VCF3 " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("ef046909a6f6c6cb43653a255a99a014"));
|
||||
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("639462a0e0fa79e33def5f011fe55961"));
|
||||
executeTest("test exclude annotations", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOverwritingHeader() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + testDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
|
||||
Arrays.asList("5c2fded3b6a96b0b0788086bbb2409ed"));
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
|
||||
Arrays.asList("ebbf32f5b8b8d22f2eb247a0a3db3da0"));
|
||||
executeTest("test overwriting header", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoReads() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + testDir + "vcfexample3empty.vcf -L " + testDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("c590088d85edce786604fd600f5d5e75"));
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("afe6c9d3b4b80635a541cdfcfa48db2f"));
|
||||
executeTest("not passing it any reads", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDBTagWithDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + testDir + "vcfexample3empty.vcf -L " + testDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("ade9354a4cdd6cc92c169f252fb36f3f"));
|
||||
baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("21d696ea8c55d2fd4cbb4dcd5f7f7db6"));
|
||||
executeTest("getting DB tag with dbSNP", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleIdsWithDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + testDir + "vcfexample3withIDs.vcf -L " + testDir + "vcfexample3withIDs.vcf", 1,
|
||||
Arrays.asList("f496f40e1e9efa743e3b473f6fe6e6d3"));
|
||||
baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3withIDs.vcf -L " + privateTestDir + "vcfexample3withIDs.vcf", 1,
|
||||
Arrays.asList("ef95394c14d5c16682a322f3dfb9000c"));
|
||||
executeTest("adding multiple IDs with dbSNP", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDBTagWithHapMap() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --comp:H3 " + testDir + "fakeHM3.vcf -G Standard --variant " + testDir + "vcfexample3empty.vcf -L " + testDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("d383fbd741d604625c9507d4da1c5a27"));
|
||||
baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("e6e276b7d517d57626c8409589cd286f"));
|
||||
executeTest("getting DB tag with HM3", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoQuals() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant " + testDir + "noQual.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L " + testDir + "noQual.vcf -A QualByDepth", 1,
|
||||
Arrays.asList("4a247f039dfb16ac05b38a0dd5f98da6"));
|
||||
baseTestString() + " --variant " + privateTestDir + "noQual.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L " + privateTestDir + "noQual.vcf -A QualByDepth", 1,
|
||||
Arrays.asList("a99e8315571ed1b6bce942451b3d8612"));
|
||||
executeTest("test file doesn't have QUALs", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUsingExpression() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + testDir + "targetAnnotations.vcf -G Standard --variant:VCF3 " + testDir + "vcfexample3empty.vcf -E foo.AF -L " + testDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("067792efcffea93ade632e52a80d0d8f"));
|
||||
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.AF -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("7d6ea3b54210620cbc7e14dad8836bcb"));
|
||||
executeTest("using expression", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUsingExpressionWithID() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + testDir + "targetAnnotations.vcf -G Standard --variant:VCF3 " + testDir + "vcfexample3empty.vcf -E foo.ID -L " + testDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("66c68deb0508348324eb47d524e756de"));
|
||||
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.ID -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("35ce4fb0288dfc5c01ec6ce8b14c6157"));
|
||||
executeTest("using expression with ID", spec);
|
||||
}
|
||||
|
||||
|
|
@ -189,8 +189,8 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testTDTAnnotation() {
|
||||
final String MD5 = "81f85f0ce8cc36df7c717c478e100ba1";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + testDir + "ug.random50000.subset300bp.chr1.family.vcf" +
|
||||
" -L " + testDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + testDir + "ug.random50000.family.ped -o %s", 1,
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf" +
|
||||
" -L " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + privateTestDir + "ug.random50000.family.ped -o %s", 1,
|
||||
Arrays.asList(MD5));
|
||||
executeTest("Testing TDT annotation ", spec);
|
||||
}
|
||||
|
|
@ -200,8 +200,8 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testChromosomeCountsPed() {
|
||||
final String MD5 = "9830fe2247651377e68ad0b0894e9a4e";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -A ChromosomeCounts --variant:vcf " + testDir + "ug.random50000.subset300bp.chr1.family.vcf" +
|
||||
" -L " + testDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + testDir + "ug.random50000.family.ped -o %s", 1,
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -A ChromosomeCounts --variant:vcf " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf" +
|
||||
" -L " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + privateTestDir + "ug.random50000.family.ped -o %s", 1,
|
||||
Arrays.asList(MD5));
|
||||
executeTest("Testing ChromosomeCounts annotation with PED file", spec);
|
||||
}
|
||||
|
|
@ -210,8 +210,8 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testInbreedingCoeffPed() {
|
||||
final String MD5 = "e94d589b5691e3ecfd9cc9475a384890";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -A InbreedingCoeff --variant:vcf " + testDir + "ug.random50000.subset300bp.chr1.family.vcf" +
|
||||
" -L " + testDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + testDir + "ug.random50000.family.ped -o %s", 1,
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -A InbreedingCoeff --variant:vcf " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf" +
|
||||
" -L " + privateTestDir + "ug.random50000.subset300bp.chr1.family.vcf --no_cmdline_in_header -ped " + privateTestDir + "ug.random50000.family.ped -o %s", 1,
|
||||
Arrays.asList(MD5));
|
||||
executeTest("Testing InbreedingCoeff annotation with PED file", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import java.util.Arrays;
|
|||
|
||||
public class BeagleIntegrationTest extends WalkerTest {
|
||||
|
||||
private static final String beagleValidationDataLocation = testDir + "/Beagle/";
|
||||
private static final String beagleValidationDataLocation = privateTestDir + "/Beagle/";
|
||||
@Test
|
||||
public void testBeagleOutput() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
|
|
@ -41,7 +41,8 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
"--beagleR2:BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " +
|
||||
"--beagleProbs:BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " +
|
||||
"--beaglePhased:BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " +
|
||||
"-o %s --no_cmdline_in_header", 1, Arrays.asList("cdbf8cc557f5be9ac778e52338c0d906"));
|
||||
"-o %s --no_cmdline_in_header --allowMissingVCFHeaders", 1, Arrays.asList("c5522304abf0633041c7772dd7dafcea"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("test BeagleOutputToVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -50,7 +51,8 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T ProduceBeagleInput -R " + hg19Reference + " " +
|
||||
"--variant:VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " +
|
||||
"-o %s", 1, Arrays.asList("f301b089d21da259873f04bdc468835d"));
|
||||
"-o %s --allowMissingVCFHeaders", 1, Arrays.asList("f301b089d21da259873f04bdc468835d"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("test BeagleInput", spec);
|
||||
}
|
||||
|
||||
|
|
@ -59,8 +61,9 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T ProduceBeagleInput --variant:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+
|
||||
"--validation:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_OMNI_chr22_14-16m.vcf "+
|
||||
"-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta --no_cmdline_in_header ",2,
|
||||
"-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 --allowMissingVCFHeaders -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta --no_cmdline_in_header ",2,
|
||||
Arrays.asList("660986891b30cdc937e0f2a3a5743faa","4b6417f892ccfe5c63b8a60cb0ef3740"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("test BeagleInputWithBootstrap",spec);
|
||||
}
|
||||
|
||||
|
|
@ -72,8 +75,8 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
"--beagleR2:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+
|
||||
"--beagleProbs:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+
|
||||
"--beaglePhased:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+
|
||||
"-L 20:1-70000 -o %s --no_cmdline_in_header ",1,Arrays.asList("8c05bda0630155bcd0ebaf155ed5e491"));
|
||||
|
||||
"-L 20:1-70000 -o %s --no_cmdline_in_header --allowMissingVCFHeaders",1,Arrays.asList("fbbbebfda35bab3f6f62eea2f0be1c01"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testBeagleChangesSitesToRef",spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -52,10 +52,10 @@ public class DiffObjectsIntegrationTest extends WalkerTest {
|
|||
|
||||
@DataProvider(name = "data")
|
||||
public Object[][] createData() {
|
||||
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", true, "bf7ef17436a7eccf27be41a9477904f6");
|
||||
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", true, "3f46f5a964f7c34015d972256fe49a35");
|
||||
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", false, "8ab29169cff232e670db9a4c54fc4358");
|
||||
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", false, "47bf16c27c9e2c657a7e1d13f20880c9");
|
||||
new TestParams(privateTestDir + "diffTestMaster.vcf", privateTestDir + "diffTestTest.vcf", true, "aea3d5df32a2acd400da48d06b4dbc60");
|
||||
new TestParams(publicTestDir + "exampleBAM.bam", publicTestDir + "exampleBAM.simple.bam", true, "3f46f5a964f7c34015d972256fe49a35");
|
||||
new TestParams(privateTestDir + "diffTestMaster.vcf", privateTestDir + "diffTestTest.vcf", false, "e71e23e7ebfbe768e59527bc62f8918d");
|
||||
new TestParams(publicTestDir + "exampleBAM.bam", publicTestDir + "exampleBAM.simple.bam", false, "47bf16c27c9e2c657a7e1d13f20880c9");
|
||||
return TestParams.getTests(TestParams.class);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,8 +29,8 @@ package org.broadinstitute.sting.gatk.walkers.diffengine;
|
|||
// the imports for unit testing.
|
||||
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
|
|
@ -45,8 +45,8 @@ import java.util.*;
|
|||
public class DiffableReaderUnitTest extends BaseTest {
|
||||
DiffEngine engine;
|
||||
|
||||
File vcfFile = new File(testDir + "diffTestMaster.vcf");
|
||||
File bamFile = new File(testDir + "exampleBAM.bam");
|
||||
File vcfFile = new File(privateTestDir + "diffTestMaster.vcf");
|
||||
File bamFile = new File(publicTestDir + "exampleBAM.bam");
|
||||
|
||||
@BeforeClass(enabled = true)
|
||||
public void createDiffEngine() {
|
||||
|
|
@ -97,7 +97,7 @@ public class DiffableReaderUnitTest extends BaseTest {
|
|||
testLeaf(rec1, "REF", Allele.create("G", true));
|
||||
testLeaf(rec1, "ALT", Arrays.asList(Allele.create("A")));
|
||||
testLeaf(rec1, "QUAL", 0.15);
|
||||
testLeaf(rec1, "FILTER", Collections.<Object>emptySet());
|
||||
testLeaf(rec1, "FILTER", VCFConstants.PASSES_FILTERS_v4);
|
||||
testLeaf(rec1, "AC", "2");
|
||||
testLeaf(rec1, "AF", "1.00");
|
||||
testLeaf(rec1, "AN", "2");
|
||||
|
|
|
|||
|
|
@ -15,88 +15,88 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testNoAction() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("dfa5dff09fa964b06da19c0f4aff6928"));
|
||||
baseTestString() + " --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("fbf88e25df30181ca5422a374c7b36fa"));
|
||||
executeTest("test no action", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testClusteredSnps() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -window 10 --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("4a4596929f9fe983d8868ca142567781"));
|
||||
baseTestString() + " -window 10 --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("bb69f49e9ef0054f0ccd6d38f5ffa46a"));
|
||||
executeTest("test clustered SNPs", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMask1() {
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
baseTestString() + " -maskName foo --mask:VCF3 " + testDir + "vcfexample2.vcf --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("1719462cd17986c33e59e45b69df0270"));
|
||||
baseTestString() + " -maskName foo --mask " + privateTestDir + "vcfexample2.vcf --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("7e3225a32fcd6066901247992b2c5ca8"));
|
||||
executeTest("test mask all", spec1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMask2() {
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
baseTestString() + " -maskName foo --mask:VCF " + testDir + "vcfMask.vcf --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("db19ff7d90c82cda09fb3c3878100eb5"));
|
||||
baseTestString() + " -maskName foo --mask:VCF " + privateTestDir + "vcfMask.vcf --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("3485fe95e3f0864c3575baf05cef4bcc"));
|
||||
executeTest("test mask some", spec2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMask3() {
|
||||
WalkerTestSpec spec3 = new WalkerTestSpec(
|
||||
baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + testDir + "vcfMask.vcf --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("a9e417cba21585c786d4b9930265ea31"));
|
||||
baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + privateTestDir + "vcfMask.vcf --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("367ab9c028a68e4eda2055e3bb8b486c"));
|
||||
executeTest("test mask extend", spec3);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFilter1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("4160904b180d1f62a6bf50de6728ce00"));
|
||||
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("5a10d969e50a58d8dfbf1da54bf293df"));
|
||||
executeTest("test filter #1", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFilter2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("df80db30c7836731ac7c8c3d4fc005b4"));
|
||||
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("886dbbca2350083819ff67224f6efbd6"));
|
||||
executeTest("test filter #2", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFilterWithSeparateNames() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("71ce6c0952831cb68f575aa0173dce2b"));
|
||||
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("ee78c2e7128a8f9549233493c7cf6949"));
|
||||
executeTest("test filter with separate names #2", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGenotypeFilters1() {
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("179f7f2a90c0e6c656109aac9b775476"));
|
||||
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("285dd348c47c8c1e85d2886f9b33559e"));
|
||||
executeTest("test genotype filter #1", spec1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGenotypeFilters2() {
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("22e07c27feb9017a130dfb045c5b29b9"));
|
||||
baseTestString() + " -G_filter 'isHomVar == 1' -G_filterName foo --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("a9c835a13eb72aa22d5e271894d8ac33"));
|
||||
executeTest("test genotype filter #2", spec2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeletions() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo --variant:VCF " + testDir + "twoDeletions.vcf", 1,
|
||||
Arrays.asList("637256ee5348c1c57f1dadf581b06ed9"));
|
||||
baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo --variant:VCF " + privateTestDir + "twoDeletions.vcf", 1,
|
||||
Arrays.asList("a1c02a5a90f1262e9eb3d2cad1fd08f2"));
|
||||
executeTest("test deletions", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,23 +28,23 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("1c6ea045819b151bcd9d98947c5d4c4d"));
|
||||
Arrays.asList("f98c38defc8d619609399b4a3ba874e8"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithAllelesPassedIn1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("c09dfbfc5b76acacb616730eaa83a150"));
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("6f0c49b76225e2099c74015b6f79c96d"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("c51d037e0b1cd0ed3a1cd6c6b29646cf"));
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("42c4e37e0f130dc796231003638a197c"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -52,23 +52,23 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("0a085eac119c91d63fdd4a7e9a5e45af"));
|
||||
Arrays.asList("736607ee529b5624a3ab5521ab9e1b35"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + testDir + "multiallelic.snps.bam -o %s -L " + testDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("bdbb67743c9f75ac60d0a10f94856361"));
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("f33507add5d5c30448948906467dd3f3"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBadRead() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH -I " + testDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
|
||||
Arrays.asList("bf60763a6e9c9d3987cfbac43b941a48"));
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
|
||||
Arrays.asList("d915535c1458733f09f82670092fcab6"));
|
||||
executeTest("test bad read", spec);
|
||||
}
|
||||
|
||||
|
|
@ -76,7 +76,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testReverseTrim() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("1e991a6a7288be7ac603ef6467fb1ac2"));
|
||||
Arrays.asList("0d724551e00129730b95fd4d70faaa58"));
|
||||
executeTest("test reverse trim", spec);
|
||||
}
|
||||
|
||||
|
|
@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "3136826ec99366b0285b278aba35cec1";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "fe3429b736c50bb770e40c0320d498ed";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
|
|
@ -107,7 +107,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
||||
|
||||
String md5 = "7824468b8290ffb7795a1ec3e493c1a4";
|
||||
String md5 = "306943dd63111e2e64388cd2e2de6c01";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||
|
|
@ -139,7 +139,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinBaseQualityScore() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
|
||||
Arrays.asList("86121f5094f26c8b2e320c1f5dea4ae3"));
|
||||
Arrays.asList("b341b87742848a3224115fe94e73f244"));
|
||||
executeTest("test min_base_quality_score 26", spec);
|
||||
}
|
||||
|
||||
|
|
@ -147,7 +147,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSLOD() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("fe4a96f0049edd466c030def4c62a224"));
|
||||
Arrays.asList("acb5332a267927d78edd51d93685111c"));
|
||||
executeTest("test SLOD", spec);
|
||||
}
|
||||
|
||||
|
|
@ -155,7 +155,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testNDA() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("ca8d0d91fd0cef93d4a606dec84a7986"));
|
||||
Arrays.asList("74779b59730962bdf36a7a8ef84ac24d"));
|
||||
executeTest("test NDA", spec);
|
||||
}
|
||||
|
||||
|
|
@ -163,23 +163,23 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testCompTrack() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("7c3518d356c05c6b9a8918357c260bfe"));
|
||||
Arrays.asList("036edf58a4ed6c626f53bd2ab34b9f97"));
|
||||
executeTest("test using comp track", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterSitesOnly() {
|
||||
testOutputParameters("-sites_only", "fe204cef499e5aceb2732ba2e45903ad");
|
||||
testOutputParameters("-sites_only", "52b8336f347d182c158e8384b78f5a6d");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllConfident() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "1ab8b68891d1531923a40d594250e8e0");
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "281363e6afb3260143bfdb22710e3d0e");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllSites() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "ab179ef6ece3ab9e6b1ff5800cb89ebd");
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "a802b672850b6fbc2764611d3ad071d9");
|
||||
}
|
||||
|
||||
private void testOutputParameters(final String args, final String md5) {
|
||||
|
|
@ -193,7 +193,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("a19c6195211b0ff036c746c7e11490ed"));
|
||||
Arrays.asList("99ef7ba1747c7289ce1f963130539e18"));
|
||||
executeTest("test confidence 1", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -201,7 +201,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
||||
Arrays.asList("3fc3c36edaac133b4c11b20a5af915c4"));
|
||||
Arrays.asList("99ef7ba1747c7289ce1f963130539e18"));
|
||||
executeTest("test confidence 2", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -212,12 +212,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void testHeterozyosity1() {
|
||||
testHeterozosity( 0.01, "82caf6c25d3aeabf7978016474e04fd0" );
|
||||
testHeterozosity( 0.01, "7e1681b9052e357ca4a065fa76c8afb6" );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHeterozyosity2() {
|
||||
testHeterozosity( 1.0 / 1850, "d2a7ba1fa2d1a4153f685f3b3f6d55a2" );
|
||||
testHeterozosity( 1.0 / 1850, "68a12f3eccac6cf4b27b6424f23628ee" );
|
||||
}
|
||||
|
||||
private void testHeterozosity(final double arg, final String md5) {
|
||||
|
|
@ -241,7 +241,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("b574087efc5b259f69c429f1f415da0a"));
|
||||
Arrays.asList("b098a7744a448cf91a50886e4cc7d268"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -260,7 +260,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("1b9556725b6a2cb52ad6745e9eca37e6"));
|
||||
Arrays.asList("2f008169b82d542ec9cc94908c395a0f"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
@ -279,7 +279,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("9388a1216957c4722fe54af06a05f242"));
|
||||
Arrays.asList("0d4177d7f963f4b4e8568613e7a468f0"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -294,7 +294,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("8f942000baaf522fcea29691fe5ef75d"));
|
||||
Arrays.asList("1268bde77842e6bb6a4f337c1d589f4d"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||
}
|
||||
|
|
@ -307,7 +307,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("4b8822ccc9ac04bee37bf0c9922108f9"));
|
||||
Arrays.asList("181c4ed8dd084b83f8de92123bb85c41"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -315,9 +315,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testWithIndelAllelesPassedIn1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("13160041d8ebfb2080981f89e39eeb4f"));
|
||||
Arrays.asList("5250cefb1fff262a6a3985dee29c154d"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
|
|
@ -325,9 +325,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithIndelAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ testDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("59e874d76e42eafd98ad961eb70706bc"));
|
||||
Arrays.asList("c9b468fe75e7215a6d6d5a050af07918"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
|
|
@ -335,13 +335,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSampleIndels1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("eaef9cc984a95b5ccb4c4c1f7c20c235"));
|
||||
Arrays.asList("01fd223deb4f88fb7d9ee9736b664d8a"));
|
||||
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("b4df2bf0d820c6fc11fabcafe18bb769"));
|
||||
Arrays.asList("c48c8a1a8ec88c6f3c99187e08496ae0"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -349,9 +349,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testGGAwithNoEvidenceInReads() {
|
||||
final String vcf = "small.indel.test.vcf";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + testDir + vcf + " -I " + validationDataLocation +
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + privateTestDir + vcf + " -I " + validationDataLocation +
|
||||
"NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
|
||||
Arrays.asList("95226301a014347efc90e5f750a0db60"));
|
||||
Arrays.asList("db0f91abb901e097714d8755058e1319"));
|
||||
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
|
||||
}
|
||||
|
||||
|
|
@ -384,7 +384,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("a3ea0eea74f2031ebb2ea0edfa14c945"));
|
||||
Arrays.asList("25465c6dd3c4845f61b0f8e383388824"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
|
|
@ -392,7 +392,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("a3741b9de95e5858640220d62a0d318c"));
|
||||
Arrays.asList("aa58dc9f77132c30363562bcdc321f6e"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
|
|
@ -400,7 +400,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction100() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 1", 1,
|
||||
Arrays.asList("c1911f6ede7b4e8e83209ead66329596"));
|
||||
Arrays.asList("3f07efb768e08650a7ce333edd4f9a52"));
|
||||
executeTest("test minIndelFraction 1.0", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ public class RealignerTargetCreatorIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testKnownsOnly() {
|
||||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||
"-T RealignerTargetCreator -R " + b36KGReference + " --known " + testDir + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -L " + testDir + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -o %s",
|
||||
"-T RealignerTargetCreator -R " + b36KGReference + " --known " + privateTestDir + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -L " + privateTestDir + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -o %s",
|
||||
1,
|
||||
Arrays.asList("5206cee6c01b299417bf2feeb8b3dc96"));
|
||||
executeTest("test rods only", spec3);
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import org.testng.annotations.Test;
|
|||
import java.util.Arrays;
|
||||
|
||||
public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
||||
private static String phaseByTransmissionTestDataRoot = testDir + "PhaseByTransmission/";
|
||||
private static String phaseByTransmissionTestDataRoot = privateTestDir + "PhaseByTransmission/";
|
||||
private static String goodFamilyFile = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.goodFamilies.ped";
|
||||
private static String TNTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.TN.vcf";
|
||||
private static String TPTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.TP.vcf";
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
return "-T ReadBackedPhasing" +
|
||||
" -R " + reference +
|
||||
" -I " + validationDataLocation + reads +
|
||||
" --variant " + validationDataLocation + VCF +
|
||||
" --variant " + ( VCF.contains("phasing_test") ? privateTestDir : validationDataLocation) + VCF +
|
||||
" --cacheWindowSize " + cacheWindowSize +
|
||||
" --maxPhaseSites " + maxPhaseSites +
|
||||
" --phaseQualityThresh " + phaseQualityThresh +
|
||||
|
|
@ -26,7 +26,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("0a41b96b04a87fdb99bc3342d48d2eba"));
|
||||
Arrays.asList("442c819569417c1b7d6be9f41ce05394"));
|
||||
executeTest("MAX 10 het sites [TEST ONE]; require PQ >= 10", spec);
|
||||
}
|
||||
|
||||
|
|
@ -36,7 +36,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:1232503-1332503",
|
||||
1,
|
||||
Arrays.asList("f7517896c899a872c24d8e823ac9deae"));
|
||||
Arrays.asList("2a51ee7d3c024f2410dcee40c5412993"));
|
||||
executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec);
|
||||
}
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("cdbdd2f68c232012b6fe9a322b0ea24c"));
|
||||
Arrays.asList("85bc9b03e24159f746dbd0cb988f9ec8"));
|
||||
executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec);
|
||||
}
|
||||
|
||||
|
|
@ -56,7 +56,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("6b70e3e4e28f9583d35d98bf8a7d0d59"));
|
||||
Arrays.asList("96bb413a83c777ebbe622438e4565e8f"));
|
||||
executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec);
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10)
|
||||
+ " -L chr20:332341-482503",
|
||||
1,
|
||||
Arrays.asList("6163a1fba27532da77765a7a11c55332"));
|
||||
Arrays.asList("7d2402f055d243e2208db9ea47973e13"));
|
||||
executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec);
|
||||
}
|
||||
|
||||
|
|
@ -76,7 +76,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:652810-681757",
|
||||
1,
|
||||
Arrays.asList("61a7d05f9eb4317cf0e6937d72e1e7ec"));
|
||||
Arrays.asList("72682b3f27c33580d2d4515653ba6de7"));
|
||||
executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -271,7 +271,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" -knownSites:anyNameABCD,VCF3 " + testDir + "vcfexample3.vcf" +
|
||||
" -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf" +
|
||||
" -T CountCovariates" +
|
||||
" -I " + bam +
|
||||
" -knownSites " + b36dbSNP129 +
|
||||
|
|
|
|||
|
|
@ -17,8 +17,8 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled=true)
|
||||
public void testWikiExample() {
|
||||
String siteVCF = validationDataLocation + "sites_to_validate.vcf";
|
||||
String maskVCF = testDir + "amplicon_mask_sites.vcf";
|
||||
String intervalTable = testDir + "amplicon_interval_table1.table";
|
||||
String maskVCF = privateTestDir + "amplicon_mask_sites.vcf";
|
||||
String intervalTable = privateTestDir + "amplicon_interval_table1.table";
|
||||
String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons --ValidateAlleles:VCF "+siteVCF+" -o %s";
|
||||
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
|
||||
testArgs += " --virtualPrimerSize 30";
|
||||
|
|
@ -29,9 +29,9 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled=true)
|
||||
public void testWikiExampleNoBWA() {
|
||||
String siteVCF = testDir + "sites_to_validate.vcf";
|
||||
String maskVCF = testDir + "amplicon_mask_sites.vcf";
|
||||
String intervalTable = testDir + "amplicon_interval_table1.table";
|
||||
String siteVCF = privateTestDir + "sites_to_validate.vcf";
|
||||
String maskVCF = privateTestDir + "amplicon_mask_sites.vcf";
|
||||
String intervalTable = privateTestDir + "amplicon_interval_table1.table";
|
||||
String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons --ValidateAlleles:VCF "+siteVCF+" -o %s";
|
||||
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
|
||||
testArgs += " --virtualPrimerSize 30 --doNotUseBWA";
|
||||
|
|
@ -42,9 +42,9 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled=true)
|
||||
public void testWikiExampleMonoFilter() {
|
||||
String siteVCF = testDir + "sites_to_validate.vcf";
|
||||
String maskVCF = testDir + "amplicon_mask_sites.vcf";
|
||||
String intervalTable = testDir + "amplicon_interval_table1.table";
|
||||
String siteVCF = privateTestDir + "sites_to_validate.vcf";
|
||||
String maskVCF = privateTestDir + "amplicon_mask_sites.vcf";
|
||||
String intervalTable = privateTestDir + "amplicon_interval_table1.table";
|
||||
String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons --ValidateAlleles:VCF "+siteVCF+" -o %s";
|
||||
testArgs += " --ProbeIntervals:table "+intervalTable+" -L:table "+intervalTable+" --MaskAlleles:VCF "+maskVCF;
|
||||
testArgs += " --virtualPrimerSize 30 --filterMonomorphic";
|
||||
|
|
|
|||
|
|
@ -303,7 +303,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
String tests = cmdRoot +
|
||||
" --dbsnp " + b36dbSNP129 +
|
||||
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
|
||||
" --comp:comp_genotypes " + testDir + "yri.trio.gatk.ug.head.vcf";
|
||||
" --comp:comp_genotypes " + privateTestDir + "yri.trio.gatk.ug.head.vcf";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
|
||||
1, Arrays.asList("4b9dcbce0717285e3c0c736c1bed744c"));
|
||||
executeTestParallel("testSelect1", spec);
|
||||
|
|
@ -343,7 +343,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = false) // no longer supported in the GATK
|
||||
public void testTranches() {
|
||||
String extraArgs = "-T VariantEval -R "+ hg18Reference +" --eval " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt";
|
||||
String extraArgs = "-T VariantEval -R "+ hg18Reference +" --eval " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + privateTestDir + "tranches.6.txt";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6af2b9959aa1778a5b712536de453952"));
|
||||
executeTestParallel("testTranches",spec);
|
||||
}
|
||||
|
|
@ -530,11 +530,11 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
buildCommandLine(
|
||||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"-eval " + testDir + "/withSymbolic.b37.vcf",
|
||||
"-eval " + privateTestDir + "/withSymbolic.b37.vcf",
|
||||
"-noEV",
|
||||
"-EV CountVariants",
|
||||
"-noST",
|
||||
"-stratIntervals " + testDir + "/overlapTest.bed",
|
||||
"-stratIntervals " + privateTestDir + "/overlapTest.bed",
|
||||
"-ST IntervalStratification",
|
||||
"-L 20",
|
||||
"-o %s"
|
||||
|
|
@ -602,7 +602,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
buildCommandLine(
|
||||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"-eval " + testDir + "/ac0.vcf",
|
||||
"-eval " + privateTestDir + "/ac0.vcf",
|
||||
"-L 20:81006 -noST -noEV -EV VariantSummary -o %s" + (includeAC0 ? " -keepAC0" : "")
|
||||
),
|
||||
1,
|
||||
|
|
|
|||
|
|
@ -45,10 +45,10 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
|
|||
private static int N_VARIANTS = 100;
|
||||
VariantDatum[] variantData1 = new VariantDatum[N_VARIANTS];
|
||||
|
||||
private final File QUAL_DATA = new File(testDir + "tranches.raw.dat");
|
||||
private final File QUAL_DATA = new File(privateTestDir + "tranches.raw.dat");
|
||||
private final double[] TRUTH_SENSITIVITY_CUTS = new double[]{99.9, 99.0, 97.0, 95.0};
|
||||
private final File EXPECTED_TRANCHES_NEW = new File(testDir + "tranches.6.txt");
|
||||
private final File EXPECTED_TRANCHES_OLD = new File(testDir + "tranches.4.txt");
|
||||
private final File EXPECTED_TRANCHES_NEW = new File(privateTestDir + "tranches.6.txt");
|
||||
private final File EXPECTED_TRANCHES_OLD = new File(privateTestDir + "tranches.4.txt");
|
||||
|
||||
private ArrayList<VariantDatum> readData() {
|
||||
ArrayList<VariantDatum> vd = new ArrayList<VariantDatum>();
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.broadinstitute.sting.MD5DB;
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -27,7 +26,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
|
||||
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
|
||||
"0ddd1e0e483d2eaf56004615cea23ec7", // tranches
|
||||
"6e1f98bb819ccf03e17a2288742160d3", // recal file
|
||||
"b9709e4180e56abc691b208bd3e8626c", // recal file
|
||||
"c58ff4140e8914f0b656ed625c7f73b9"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRTest")
|
||||
|
|
@ -75,7 +74,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
|
||||
VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"da4458d05f6396f5c4ab96f274e5ccdc", // tranches
|
||||
"8e2417336fa62e6c4d9f61b6deebdd82", // recal file
|
||||
"a04a9001f62eff43d363f4d63769f3ee", // recal file
|
||||
"05e88052e0798f1c1e83f0a8938bce56"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRIndelTest")
|
||||
|
|
@ -129,11 +128,11 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -L 20:1000100-1000500" +
|
||||
" -mode BOTH" +
|
||||
" --no_cmdline_in_header" +
|
||||
" -input " + testDir + "VQSR.mixedTest.input" +
|
||||
" -input " + privateTestDir + "VQSR.mixedTest.input" +
|
||||
" -o %s" +
|
||||
" -tranchesFile " + testDir + "VQSR.mixedTest.tranches" +
|
||||
" -recalFile " + testDir + "VQSR.mixedTest.recal",
|
||||
Arrays.asList("1370d7701a6231633d43a8062b7aff7f"));
|
||||
" -tranchesFile " + privateTestDir + "VQSR.mixedTest.tranches" +
|
||||
" -recalFile " + privateTestDir + "VQSR.mixedTest.recal",
|
||||
Arrays.asList("d670c684f73e2744b6c01738a01d5ec4"));
|
||||
executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,8 +34,23 @@ import java.util.Arrays;
|
|||
* Tests CombineVariants
|
||||
*/
|
||||
public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||
public static String baseTestString(String args) {
|
||||
return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -R " + b36KGReference + args;
|
||||
//
|
||||
// TODO TODO TODO TODO TODO TODO TODO TODO
|
||||
// TODO TODO TODO TODO TODO TODO TODO TODO
|
||||
//
|
||||
// TODO WHEN THE HC EMITS VALID VCF HEADERS ENABLE BCF AND REMOVE allowMissingVCFHeaders ARGUMENTS
|
||||
//
|
||||
// TODO TODO TODO TODO TODO TODO TODO TODO
|
||||
// TODO TODO TODO TODO TODO TODO TODO TODO
|
||||
// TODO TODO TODO TODO TODO TODO TODO TODO
|
||||
//
|
||||
private static String baseTestString(String args) {
|
||||
return "-T CombineVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s --allowMissingVCFHeaders -R " + b36KGReference + args;
|
||||
}
|
||||
|
||||
private void cvExecuteTest(final String name, final WalkerTestSpec spec) {
|
||||
spec.disableShadowBCF();
|
||||
executeTest(name, spec);
|
||||
}
|
||||
|
||||
public void test1InOut(String file, String md5) {
|
||||
|
|
@ -47,7 +62,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
baseTestString(" -priority v1 -V:v1 " + validationDataLocation + file + args),
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("testInOut1--" + file, spec);
|
||||
cvExecuteTest("testInOut1--" + file, spec);
|
||||
}
|
||||
|
||||
public void combine2(String file1, String file2, String args, String md5) {
|
||||
|
|
@ -55,7 +70,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
baseTestString(" -priority v1,v2 -V:v1 " + validationDataLocation + file1 + " -V:v2 "+ validationDataLocation + file2 + args),
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
|
||||
cvExecuteTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
|
||||
}
|
||||
|
||||
public void combineSites(String args, String md5) {
|
||||
|
|
@ -67,15 +82,15 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
+ " -V:hm3 " + validationDataLocation + file2 + args,
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||
cvExecuteTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||
}
|
||||
|
||||
public void combinePLs(String file1, String file2, String md5) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T CombineVariants --no_cmdline_in_header -o %s -R " + b36KGReference + " -priority v1,v2 -V:v1 " + validationDataLocation + file1 + " -V:v2 " + validationDataLocation + file2,
|
||||
"-T CombineVariants --no_cmdline_in_header -o %s -R " + b36KGReference + " -priority v1,v2 -V:v1 " + privateTestDir + file1 + " -V:v2 " + privateTestDir + file2,
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
|
||||
cvExecuteTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
|
||||
}
|
||||
|
||||
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "6469fce8a5cd5a0f77e5ac5d9e9e192b"); }
|
||||
|
|
@ -86,7 +101,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "ef2d249ea4b25311966e038aac05c661"); }
|
||||
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "cdb448aaa92ca5a9e393d875b42581b3"); }
|
||||
|
||||
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "284083f60792c5f817899445dfa63a42"); }
|
||||
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "f0ce3fb83d4ad9ba402d7cb11cd000c3"); }
|
||||
|
||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "4efdf983918db822e4ac13d911509576"); } // official project VCF files in tabix format
|
||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "848d4408ee953053d2307cefebc6bd6d"); } // official project VCF files in tabix format
|
||||
|
|
@ -110,8 +125,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
||||
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
||||
1,
|
||||
Arrays.asList("c0d4d601aa5d2b29927c535868448d2a"));
|
||||
executeTest("threeWayWithRefs", spec);
|
||||
Arrays.asList("3039cfff7abee6aa7fbbafec66a1b019"));
|
||||
cvExecuteTest("threeWayWithRefs", spec);
|
||||
}
|
||||
|
||||
// complex examples with filtering, indels, and multiple alleles
|
||||
|
|
@ -120,17 +135,17 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
String file2 = "combine.2.vcf";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T CombineVariants --no_cmdline_in_header -o %s -R " + b37KGReference
|
||||
+ " -V:one " + validationDataLocation + file1
|
||||
+ " -V:two " + validationDataLocation + file2 + args,
|
||||
+ " -V:one " + privateTestDir + file1
|
||||
+ " -V:two " + privateTestDir + file2 + args,
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||
cvExecuteTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||
}
|
||||
|
||||
@Test public void complexTestFull() { combineComplexSites("", "7d587bf49bbc9f8239476bab84bf9708"); }
|
||||
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "4d1e0c12d95f50e472493fc14af3cc06"); }
|
||||
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "9a98b01b9b2a28ae6af3125edc131dea"); }
|
||||
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "9a98b01b9b2a28ae6af3125edc131dea"); }
|
||||
@Test public void complexTestFull() { combineComplexSites("", "8b19b54516b59de40992f0c4b328258a"); }
|
||||
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "a38dd097adc37420fe36ef8be14cfded"); }
|
||||
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "a3957dac9a617f50ce2668607e3baef0"); }
|
||||
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "a3957dac9a617f50ce2668607e3baef0"); }
|
||||
|
||||
@Test
|
||||
public void combineDBSNPDuplicateSites() {
|
||||
|
|
@ -138,6 +153,6 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
"-T CombineVariants --no_cmdline_in_header -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132,
|
||||
1,
|
||||
Arrays.asList("3d2a5a43db86e3f6217ed2a63251285b"));
|
||||
executeTest("combineDBSNPDuplicateSites:", spec);
|
||||
cvExecuteTest("combineDBSNPDuplicateSites:", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -38,7 +38,7 @@ public class LeftAlignVariantsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testLeftAlignment() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LeftAlignVariants -o %s -R " + b37KGReference + " --variant:vcf " + testDir + "forLeftAlignVariantsTest.vcf --no_cmdline_in_header",
|
||||
"-T LeftAlignVariants -o %s -R " + b37KGReference + " --variant:vcf " + privateTestDir + "forLeftAlignVariantsTest.vcf --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("bcf05f56adbb32a47b6d6b27b327d5c2"));
|
||||
executeTest("test left alignment", spec);
|
||||
|
|
|
|||
|
|
@ -38,27 +38,27 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testb36Tohg19() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + testDir + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + privateTestDir + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
1,
|
||||
Arrays.asList("70aeaca5b74cc7ba8e2da7b71ff0fbfd"));
|
||||
Arrays.asList("a139480c004859452d4095fe4859b42e"));
|
||||
executeTest("test b36 to hg19", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testb36Tohg19UnsortedSamples() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + privateTestDir + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
1,
|
||||
Arrays.asList("07d1bf52125d1f9a25e260e13ec7b010"));
|
||||
Arrays.asList("91344768f1e98c979364ec0d5d3aa9d6"));
|
||||
executeTest("test b36 to hg19, unsorted samples", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testhg18Tohg19Unsorted() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LiftoverVariants -o %s -R " + hg18Reference + " --variant:vcf " + testDir + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
"-T LiftoverVariants -o %s -R " + hg18Reference + " --variant:vcf " + privateTestDir + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
1,
|
||||
Arrays.asList("ab2c6254225d7e2ecf52eee604d5673b"));
|
||||
Arrays.asList("e0b813ff873185ab51995a151f80ec98"));
|
||||
executeTest("test hg18 to hg19, unsorted", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
@ -12,12 +13,14 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testDiscordanceNoSampleSpecified() {
|
||||
String testFile = testDir + "NA12878.hg19.example1.vcf";
|
||||
String testFile = privateTestDir + "NA12878.hg19.example1.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s --no_cmdline_in_header",
|
||||
"-T SelectVariants -R " + hg19Reference + " -L 20:1012700-1020000 --variant "
|
||||
+ b37hapmapGenotypes + " -disc " + testFile
|
||||
+ " -o %s --no_cmdline_in_header --allowMissingVCFHeaders --allowMissingVCFHeaders",
|
||||
1,
|
||||
Arrays.asList("133fd0ded0bb213097cbe68995afbb7e")
|
||||
Arrays.asList("d88bdae45ae0e74e8d8fd196627e612c")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
|
|
@ -26,12 +29,12 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testRepeatedLineSelection() {
|
||||
String testfile = testDir + "test.dup.vcf";
|
||||
String testfile = privateTestDir + "test.dup.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -sn B -sn C --variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("b2ee12588ebda200727762a903b8c972")
|
||||
Arrays.asList("337bb7fc23153cf67acc42a466834775")
|
||||
);
|
||||
|
||||
executeTest("testRepeatedLineSelection--" + testfile, spec);
|
||||
|
|
@ -39,12 +42,14 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testDiscordance() {
|
||||
String testFile = testDir + "NA12878.hg19.example1.vcf";
|
||||
String testFile = privateTestDir + "NA12878.hg19.example1.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s --no_cmdline_in_header",
|
||||
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant "
|
||||
+ b37hapmapGenotypes + " -disc " + testFile
|
||||
+ " -o %s --no_cmdline_in_header --allowMissingVCFHeaders",
|
||||
1,
|
||||
Arrays.asList("f64c90c4cca470f1095d9fa2062eac3e")
|
||||
Arrays.asList("54289033d35d32b8ebbb38c51fbb614c")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
|
|
@ -57,9 +62,9 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("446eea62630bc5325ffab30b9b9fbfe4")
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("ad0514b723ee1479d861291622bd4311")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testComplexSelection--" + testfile, spec);
|
||||
|
|
@ -71,9 +76,9 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -xl_sn A -xl_sf " + samplesFile + " --variant " + testfile,
|
||||
1,
|
||||
Arrays.asList("b24f31db48d254d8fe15295955173486")
|
||||
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -xl_sn A -xl_sf " + samplesFile + " --variant " + testfile,
|
||||
1,
|
||||
Arrays.asList("bc0e00d0629b2bc6799e7e9db0dc775c")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
|
|
@ -83,12 +88,14 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testConcordance() {
|
||||
String testFile = testDir + "NA12878.hg19.example1.vcf";
|
||||
String testFile = privateTestDir + "NA12878.hg19.example1.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc " + b37hapmapGenotypes + " --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc "
|
||||
+ b37hapmapGenotypes + " --variant " + testFile
|
||||
+ " -o %s --no_cmdline_in_header --allowMissingVCFHeaders",
|
||||
1,
|
||||
Arrays.asList("9da5dab3d344c1c0a5987b15e60fa082")
|
||||
Arrays.asList("946e7f2e0ae08dc0e931c1634360fc46")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
|
|
@ -97,12 +104,12 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testVariantTypeSelection() {
|
||||
String testFile = testDir + "complexExample1.vcf";
|
||||
String testFile = privateTestDir + "complexExample1.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -restrictAllelesTo MULTIALLELIC -selectType MIXED --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("30b89b3a6706f7f46b23bfb3be69cc8e")
|
||||
Arrays.asList("a111642779b05de33ad04073d6022c21")
|
||||
);
|
||||
|
||||
executeTest("testVariantTypeSelection--" + testFile, spec);
|
||||
|
|
@ -110,12 +117,12 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testUsingDbsnpName() {
|
||||
String testFile = testDir + "combine.3.vcf";
|
||||
String testFile = privateTestDir + "combine.3.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("8bf557aaa07eccb294c81f491225bf9e")
|
||||
Arrays.asList("d12ae1617deb38f5ed712dc326935b9a")
|
||||
);
|
||||
|
||||
executeTest("testUsingDbsnpName--" + testFile, spec);
|
||||
|
|
@ -123,12 +130,12 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testRegenotype() {
|
||||
String testFile = testDir + "combine.3.vcf";
|
||||
String testFile = privateTestDir + "combine.3.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("be38bdc7bd88f5d09cf1a9d55cfecb0b")
|
||||
Arrays.asList("c22ad8864d9951403672a24c20d6c3c2")
|
||||
);
|
||||
|
||||
executeTest("testRegenotype--" + testFile, spec);
|
||||
|
|
@ -136,12 +143,12 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testMultipleRecordsAtOnePosition() {
|
||||
String testFile = testDir + "selectVariants.onePosition.vcf";
|
||||
String testFile = privateTestDir + "selectVariants.onePosition.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -select 'KG_FREQ < 0.5' --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("cb9932f9a7aa2e53af605b30d88ad43f")
|
||||
Arrays.asList("44f7c47395ca5b2afef5313f592c8cea")
|
||||
);
|
||||
|
||||
executeTest("testMultipleRecordsAtOnePosition--" + testFile, spec);
|
||||
|
|
@ -149,12 +156,12 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testNoGTs() {
|
||||
String testFile = testDir + "vcf4.1.example.vcf";
|
||||
String testFile = privateTestDir + "vcf4.1.example.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b37KGReference + " --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("920605cc2182026e3f54c009f6a04141")
|
||||
Arrays.asList("a0b7f77edc16df0992d2c1363136a17e")
|
||||
);
|
||||
|
||||
executeTest("testNoGTs--" + testFile, spec);
|
||||
|
|
@ -167,9 +174,9 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec;
|
||||
|
||||
spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
|
||||
1,
|
||||
Arrays.asList("446eea62630bc5325ffab30b9b9fbfe4")
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
|
||||
1,
|
||||
Arrays.asList("ad0514b723ee1479d861291622bd4311")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testParallelization (2 threads)--" + testfile, spec);
|
||||
|
|
@ -177,13 +184,13 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = false)
|
||||
public void testParallelization4() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
WalkerTestSpec spec;
|
||||
spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
|
||||
1,
|
||||
Arrays.asList("446eea62630bc5325ffab30b9b9fbfe4")
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||
WalkerTestSpec spec;
|
||||
spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
|
||||
1,
|
||||
Arrays.asList("ad0514b723ee1479d861291622bd4311")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
|
|
@ -192,13 +199,37 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testSelectFromMultiAllelic() {
|
||||
String testfile = testDir + "multi-allelic.bi-allelicInGIH.vcf";
|
||||
String samplesFile = testDir + "GIH.samples.list";
|
||||
String testfile = privateTestDir + "multi-allelic.bi-allelicInGIH.vcf";
|
||||
String samplesFile = privateTestDir + "GIH.samples.list";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b37KGReference + " -o %s --no_cmdline_in_header -sf " + samplesFile + " --excludeNonVariants --variant " + testfile,
|
||||
1,
|
||||
Arrays.asList("2f2a342812ba914bcce666e42ef761d7")
|
||||
Arrays.asList("9acd6effcc78bfb832bed5edfd6a1b5b")
|
||||
);
|
||||
executeTest("test select from multi allelic with excludeNonVariants --" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void testFileWithoutInfoLineInHeader() {
|
||||
testFileWithoutInfoLineInHeader("testFileWithoutInfoLineInHeader", UserException.class);
|
||||
}
|
||||
|
||||
@Test()
|
||||
public void testFileWithoutInfoLineInHeaderWithOverride() {
|
||||
testFileWithoutInfoLineInHeader("testFileWithoutInfoLineInHeaderWithOverride", null);
|
||||
}
|
||||
|
||||
private void testFileWithoutInfoLineInHeader(final String name, final Class expectedException) {
|
||||
final String testFile = privateTestDir + "missingHeaderLine.vcf";
|
||||
final String cmd = "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp "
|
||||
+ testFile + " -o %s --no_cmdline_in_header"
|
||||
+ (expectedException == null ? " -allowMissingVCFHeaders" : "");
|
||||
WalkerTestSpec spec =
|
||||
expectedException != null
|
||||
? new WalkerTestSpec(cmd, 1, expectedException)
|
||||
: new WalkerTestSpec(cmd, 1, Arrays.asList(""));
|
||||
spec.disableShadowBCF();
|
||||
|
||||
executeTest(name, spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
|
|||
|
||||
|
||||
// Copy VCF data from the test file into the FIFO.
|
||||
String testFile = testDir + "yri.trio.gatk.ug.head.vcf";
|
||||
String testFile = privateTestDir + "yri.trio.gatk.ug.head.vcf";
|
||||
FileInputStream inputStream = new FileInputStream(testFile);
|
||||
FileOutputStream outputStream = new FileOutputStream(tmpFifo);
|
||||
outputStream.getChannel().transferFrom(inputStream.getChannel(),0,inputStream.getChannel().size());
|
||||
|
|
@ -60,7 +60,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
|
|||
" --no_cmdline_in_header " +
|
||||
" -o %s",
|
||||
1,
|
||||
Arrays.asList("b532a20b5af4e8ea7a073888976c71ba")
|
||||
Arrays.asList("2cdcd9e140eb1b6da7e365e37dd7d859")
|
||||
);
|
||||
|
||||
executeTest("testSimpleVCFStreaming", spec);
|
||||
|
|
@ -74,7 +74,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
|
|||
File tmpFifo = File.createTempFile("vcfstreaming","");
|
||||
Runtime.getRuntime().exec(new String[] {"mkfifo",tmpFifo.getAbsolutePath()});
|
||||
|
||||
String testFile = testDir + "yri.trio.gatk.ug.head.vcf";
|
||||
String testFile = privateTestDir + "yri.trio.gatk.ug.head.vcf";
|
||||
|
||||
// Output select to FIFO
|
||||
WalkerTestSpec selectTestSpec = new WalkerTestSpec(
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ import java.util.Arrays;
|
|||
public class ValidateVariantsIntegrationTest extends WalkerTest {
|
||||
|
||||
public static String baseTestString(String file, String type) {
|
||||
return "-T ValidateVariants -R " + b36KGReference + " -L 1:10001292-10001303 --variant:vcf " + testDir + file + " --validationType " + type;
|
||||
return "-T ValidateVariants -R " + b36KGReference + " -L 1:10001292-10001303 --variant:vcf " + privateTestDir + file + " --validationType " + type;
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import java.util.*;
|
|||
public class VariantsToTableIntegrationTest extends WalkerTest {
|
||||
private String variantsToTableCmd(String moreArgs) {
|
||||
return "-R " + hg18Reference +
|
||||
" --variant:vcf " + testDir + "soap_gatk_annotated.vcf" +
|
||||
" --variant:vcf " + privateTestDir + "soap_gatk_annotated.vcf" +
|
||||
" -T VariantsToTable" +
|
||||
" -F CHROM -F POS -F ID -F REF -F ALT -F QUAL -F FILTER -F TRANSITION -F DP -F SB -F set -F RankSumP -F refseq.functionalClass*" +
|
||||
" -L chr1 -o %s" + moreArgs;
|
||||
|
|
@ -41,7 +41,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
|
|||
|
||||
private String variantsToTableMultiAllelicCmd(String moreArgs) {
|
||||
return "-R " + b37KGReference +
|
||||
" --variant " + testDir + "multiallelic.vcf" +
|
||||
" --variant " + privateTestDir + "multiallelic.vcf" +
|
||||
" -T VariantsToTable" +
|
||||
" -F CHROM -F POS -F ID -F REF -F ALT -F QUAL -F MULTI-ALLELIC -F AC -F AF" +
|
||||
" -o %s" + moreArgs;
|
||||
|
|
@ -78,7 +78,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
|
|||
public void testGenotypeFields() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" --variant " + testDir + "vcfexample2.vcf" +
|
||||
" --variant " + privateTestDir + "vcfexample2.vcf" +
|
||||
" -T VariantsToTable" +
|
||||
" -GF RD" +
|
||||
" -o %s",
|
||||
|
|
@ -91,7 +91,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
|
|||
public void testGenotypeFieldsWithInline() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" --variant " + testDir + "vcfexample2.vcf" +
|
||||
" --variant " + privateTestDir + "vcfexample2.vcf" +
|
||||
" -T VariantsToTable" +
|
||||
" -GF RD -GF GT -GF GQ" +
|
||||
" -o %s",
|
||||
|
|
@ -104,7 +104,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
|
|||
public void testMoltenOutput() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" --variant " + testDir + "vcfexample2.vcf" +
|
||||
" --variant " + privateTestDir + "vcfexample2.vcf" +
|
||||
" -T VariantsToTable" +
|
||||
" -F CHROM -F POS -F ID -F REF -F ALT -F QUAL -F FILTER" +
|
||||
" --moltenize" +
|
||||
|
|
@ -118,7 +118,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
|
|||
public void testMoltenOutputWithGenotypeFields() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" --variant " + testDir + "vcfexample2.vcf" +
|
||||
" --variant " + privateTestDir + "vcfexample2.vcf" +
|
||||
" -T VariantsToTable" +
|
||||
" -GF RD" +
|
||||
" --moltenize" +
|
||||
|
|
@ -132,7 +132,7 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
|
|||
public void testMoltenOutputWithMultipleAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b37KGReference +
|
||||
" --variant " + testDir + "multiallelic.vcf" +
|
||||
" --variant " + privateTestDir + "multiallelic.vcf" +
|
||||
" -T VariantsToTable" +
|
||||
" -F CHROM -F POS -F ID -F REF -F ALT -F QUAL -F MULTI-ALLELIC -F AC -F AF" +
|
||||
" --moltenize -SMA" +
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testVariantsToVCFUsingDbsnpInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("268c116f825c2a4b5200a416ca587adc");
|
||||
md5.add("72e6ce7aff7dec7ca9e7580be7ddd435");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
|
|
@ -36,7 +36,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testVariantsToVCFUsingGeliInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("82ca5ecef2df5d64dee9ef5a4b14ef2f");
|
||||
md5.add("22373883afa2221b5a4f75a50f30f26b");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
|
|
@ -54,7 +54,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testGenotypesToVCFUsingGeliInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("90bc2e21d633fa6c3c47c6bd86c134a0");
|
||||
md5.add("738eb66dbc400dcd1786cd9e49902e8c");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
|
|
@ -72,7 +72,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testGenotypesToVCFUsingHapMapInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("bb71dabd072a679cc85fe8d3e130fb2b");
|
||||
md5.add("67656672acc264156f5a3e01e5cac61a");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
|
|
@ -89,11 +89,11 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testGenotypesToVCFUsingVCFInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("ae39e2249bc20fcd0a668a7fe5fb84b0");
|
||||
md5.add("95898aad8c9f9515c0e668e2fb65a024");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" --variant:VCF " + testDir + "complexExample.vcf4" +
|
||||
" --variant:VCF " + privateTestDir + "complexExample.vcf4" +
|
||||
" -T VariantsToVCF" +
|
||||
" -o %s" +
|
||||
" --no_cmdline_in_header",
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ import java.util.List;
|
|||
public class ReadClipperUnitTest extends BaseTest {
|
||||
|
||||
List<Cigar> cigarList;
|
||||
int maximumCigarSize = 6; // 6 is the minimum necessary number to try all combinations of cigar types with guarantee of clipping an element with length = 2
|
||||
int maximumCigarSize = 6; // 6 is the minimum necessary number to try all combinations of cigar types with guarantee of clipping an element with length = 2
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
|
|
@ -92,22 +92,15 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
int start = read.getSoftStart();
|
||||
int stop = read.getSoftEnd();
|
||||
|
||||
// System.out.println(String.format("CIGAR: %s (%d, %d)", cigar.toString(), start, stop));
|
||||
|
||||
// if (ReadUtils.readIsEntirelyInsertion(read))
|
||||
// System.out.println("debug");
|
||||
|
||||
for (int i = start; i <= stop; i++) {
|
||||
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, i);
|
||||
if (!clipLeft.isEmpty()) {
|
||||
// System.out.println(String.format("\t left [%d] %s -> %s ", i-start+1, cigar.toString(), clipLeft.getCigarString()));
|
||||
Assert.assertTrue(clipLeft.getAlignmentStart() >= Math.min(read.getAlignmentEnd(), i + 1), String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
||||
assertUnclippedLimits(read, clipLeft);
|
||||
}
|
||||
|
||||
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, -1);
|
||||
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
||||
// System.out.println(String.format("\t right [%d] %s -> %s ", i-start+1, cigar.toString(), clipRight.getCigarString()));
|
||||
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
||||
Assert.assertTrue(clipRight.getAlignmentEnd() <= Math.max(read.getAlignmentStart(), i - 1), String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
||||
assertUnclippedLimits(read, clipRight);
|
||||
}
|
||||
|
|
@ -121,7 +114,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
int alnStart = read.getAlignmentStart();
|
||||
int alnEnd = read.getAlignmentEnd();
|
||||
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
||||
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
||||
for (int i = alnStart; i <= alnEnd; i++) {
|
||||
GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i);
|
||||
|
||||
|
|
@ -141,7 +134,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
int alnStart = read.getAlignmentStart();
|
||||
int alnEnd = read.getAlignmentEnd();
|
||||
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
||||
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
||||
for (int i = alnStart; i <= alnEnd; i++) {
|
||||
GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i);
|
||||
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
||||
|
|
@ -165,7 +158,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
byte[] quals = new byte[readLength];
|
||||
|
||||
for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) {
|
||||
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the left tail
|
||||
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the left tail
|
||||
for (int addLeft = 0; addLeft < nLowQualBases; addLeft++)
|
||||
quals[addLeft] = LOW_QUAL;
|
||||
read.setBaseQualities(quals);
|
||||
|
|
@ -252,7 +245,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read);
|
||||
|
||||
assertUnclippedLimits(read, unclipped); // Make sure limits haven't changed
|
||||
assertUnclippedLimits(read, unclipped); // Make sure limits haven't changed
|
||||
|
||||
if (leadingSoftClips > 0 || tailSoftClips > 0) {
|
||||
final int expectedStart = read.getAlignmentStart() - leadingSoftClips;
|
||||
|
|
@ -265,6 +258,25 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testRevertSoftClippedBasesWithThreshold() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP);
|
||||
final int tailSoftClips = leadingCigarElementLength(ReadClipperTestUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP);
|
||||
|
||||
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read);
|
||||
|
||||
assertUnclippedLimits(read, unclipped); // Make sure limits haven't changed
|
||||
Assert.assertNull(read.getCigar().isValid(null, -1));
|
||||
Assert.assertNull(unclipped.getCigar().isValid(null, -1));
|
||||
|
||||
if (!(leadingSoftClips > 0 || tailSoftClips > 0))
|
||||
Assert.assertEquals(read.getCigarString(), unclipped.getCigarString());
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) {
|
||||
if (!read.isEmpty()) {
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.codecs.hapmap;
|
|||
import org.broad.tribble.annotation.Strand;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
|
@ -38,9 +39,9 @@ import java.io.IOException;
|
|||
/**
|
||||
* Unit tests for the HapMap codec
|
||||
*/
|
||||
public class HapMapUnitTest {
|
||||
public class HapMapUnitTest extends BaseTest {
|
||||
// our sample hapmap file
|
||||
private final static File hapMapFile = new File("public/testdata/genotypes_chr1_ASW_phase3.3_first500.hapmap");
|
||||
private final static File hapMapFile = new File(privateTestDir + "genotypes_chr1_ASW_phase3.3_first500.hapmap");
|
||||
private final static String knownLine = "rs2185539 C/T chr1 556738 + ncbi_b36 bbs urn:lsid:bbs.hapmap.org:Protocol:Phase3.r3:1 urn:lsid:bbs.hapmap.org:Assay:Phase3.r3_r" +
|
||||
"s2185539:1 urn:lsid:dcc.hapmap.org:Panel:US_African-30-trios:4 QC+ CC TC TT CT CC CC CC CC CC CC CC CC CC";
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.Options;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
|
||||
import org.testng.annotations.BeforeTest;
|
||||
|
|
@ -19,14 +20,15 @@ import java.io.File;
|
|||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumSet;
|
||||
|
||||
/**
|
||||
* tests out the various functions in the index factory class
|
||||
*/
|
||||
public class IndexFactoryUnitTest extends BaseTest {
|
||||
|
||||
File inputFile = new File("public/testdata/HiSeq.10000.vcf");
|
||||
File outputFile = new File("public/testdata/onTheFlyOutputTest.vcf");
|
||||
File inputFile = new File(privateTestDir + "HiSeq.10000.vcf");
|
||||
File outputFile = new File(privateTestDir + "onTheFlyOutputTest.vcf");
|
||||
File outputFileIndex = Tribble.indexFile(outputFile);
|
||||
|
||||
private SAMSequenceDictionary dict;
|
||||
|
|
@ -56,7 +58,8 @@ public class IndexFactoryUnitTest extends BaseTest {
|
|||
AbstractFeatureReader<VariantContext> source = AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), new VCFCodec(), indexFromInputFile);
|
||||
|
||||
int counter = 0;
|
||||
VariantContextWriter writer = VariantContextWriterFactory.create(outputFile, dict);
|
||||
final EnumSet<Options> options = EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
|
||||
VariantContextWriter writer = VariantContextWriterFactory.create(outputFile, dict, options);
|
||||
writer.writeHeader((VCFHeader)source.getHeader());
|
||||
CloseableTribbleIterator<VariantContext> it = source.iterator();
|
||||
while (it.hasNext() && (counter++ < maxRecords || maxRecords == -1) ) {
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
public void testReadingAndWritingWitHNoChanges() {
|
||||
|
||||
String md5ofInputVCF = "babf02baabcfa7f72a2c6f7da5fdc996";
|
||||
String testVCF = testDir + "vcf4.1.example.vcf";
|
||||
String testVCF = privateTestDir + "vcf4.1.example.vcf";
|
||||
|
||||
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
||||
|
||||
|
|
@ -26,27 +26,36 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
executeTest("Test Variants To VCF from new output", spec2);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(enabled = false)
|
||||
// See https://getsatisfaction.com/gsa/topics/support_vcf_4_1_structural_variation_breakend_alleles?utm_content=topic_link&utm_medium=email&utm_source=new_topic
|
||||
public void testReadingAndWritingBreakpointAlleles() {
|
||||
String testVCF = testDir + "breakpoint-example.vcf";
|
||||
String testVCF = privateTestDir + "breakpoint-example.vcf";
|
||||
//String testVCF = validationDataLocation + "multiallelic.vcf";
|
||||
|
||||
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
||||
|
||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("d2604faad0613932453395c54cc68369"));
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("355b029487c3b4c499140d71310ca37e"));
|
||||
executeTest("Test reading and writing breakpoint VCF", spec1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReadingAndWritingSamtools() {
|
||||
String testVCF = testDir + "samtools.vcf";
|
||||
String testVCF = privateTestDir + "samtools.vcf";
|
||||
|
||||
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
||||
|
||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("0807ff11bebba81b87a273ad6bee01a8"));
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("0f82ac11852e7f958c1a0ce52398c2ae"));
|
||||
executeTest("Test reading and writing samtools vcf", spec1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReadingAndWritingSamtoolsWExBCFExample() {
|
||||
String testVCF = privateTestDir + "ex2.vcf";
|
||||
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
|
||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("9773d6a121cfcb18d090965bc520f120"));
|
||||
executeTest("Test reading and writing samtools WEx vcf/BCF example", spec1);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,148 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.Assert;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: aaron
|
||||
* Date: Jun 30, 2010
|
||||
* Time: 3:32:08 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class VCFStandardHeaderLinesUnitTest extends BaseTest {
|
||||
@DataProvider(name = "getStandardLines")
|
||||
public Object[][] makeGetStandardLines() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
// info
|
||||
tests.add(new Object[]{"AC", "info", true});
|
||||
tests.add(new Object[]{"AN", "info", true});
|
||||
tests.add(new Object[]{"AF", "info", true});
|
||||
tests.add(new Object[]{"DP", "info", true});
|
||||
tests.add(new Object[]{"DB", "info", true});
|
||||
tests.add(new Object[]{"END", "info", true});
|
||||
|
||||
// format
|
||||
tests.add(new Object[]{"GT", "format", true});
|
||||
tests.add(new Object[]{"GQ", "format", true});
|
||||
tests.add(new Object[]{"DP", "format", true});
|
||||
tests.add(new Object[]{"AD", "format", true});
|
||||
tests.add(new Object[]{"PL", "format", true});
|
||||
|
||||
tests.add(new Object[]{"NOT_STANDARD", "info", false});
|
||||
tests.add(new Object[]{"NOT_STANDARD", "format", false});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
|
||||
@Test(dataProvider = "getStandardLines")
|
||||
public void getStandardLines(final String key, final String type, final boolean expectedToBeStandard) {
|
||||
VCFCompoundHeaderLine line = null;
|
||||
if ( type.equals("info") )
|
||||
line = VCFStandardHeaderLines.getInfoLine(key, false);
|
||||
else if ( type.equals("format") )
|
||||
line = VCFStandardHeaderLines.getFormatLine(key, false);
|
||||
else
|
||||
throw new IllegalArgumentException("Unexpected type in getStandardLines " + type);
|
||||
|
||||
if ( expectedToBeStandard ) {
|
||||
Assert.assertNotNull(line);
|
||||
Assert.assertEquals(line.getID(), key);
|
||||
} else
|
||||
Assert.assertNull(line);
|
||||
}
|
||||
|
||||
private class RepairHeaderTest extends TestDataProvider {
|
||||
final VCFCompoundHeaderLine original, expectedResult;
|
||||
|
||||
private RepairHeaderTest(final VCFCompoundHeaderLine original) {
|
||||
this(original, original);
|
||||
}
|
||||
|
||||
private RepairHeaderTest(final VCFCompoundHeaderLine original, final VCFCompoundHeaderLine expectedResult) {
|
||||
super(RepairHeaderTest.class);
|
||||
this.original = original;
|
||||
this.expectedResult = expectedResult;
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "RepairHeaderTest")
|
||||
public Object[][] makeRepairHeaderTest() {
|
||||
final VCFInfoHeaderLine standardAC = VCFStandardHeaderLines.getInfoLine("AC");
|
||||
final VCFInfoHeaderLine goodAC = new VCFInfoHeaderLine("AC", VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "x");
|
||||
|
||||
final VCFFormatHeaderLine standardGT = VCFStandardHeaderLines.getFormatLine("GT");
|
||||
final VCFFormatHeaderLine goodGT = new VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.String, "x");
|
||||
|
||||
new RepairHeaderTest( standardGT, standardGT);
|
||||
new RepairHeaderTest( goodGT, goodGT );
|
||||
new RepairHeaderTest( new VCFFormatHeaderLine("GT", 2, VCFHeaderLineType.String, "x"), standardGT);
|
||||
new RepairHeaderTest( new VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.Integer, "x"), standardGT);
|
||||
new RepairHeaderTest( new VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.Float, "x"), standardGT);
|
||||
new RepairHeaderTest( new VCFFormatHeaderLine("GT", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Float, "x"), standardGT);
|
||||
new RepairHeaderTest( new VCFFormatHeaderLine("GT", VCFHeaderLineCount.G, VCFHeaderLineType.String, "x"), standardGT);
|
||||
new RepairHeaderTest( new VCFFormatHeaderLine("GT", VCFHeaderLineCount.A, VCFHeaderLineType.String, "x"), standardGT);
|
||||
|
||||
new RepairHeaderTest( standardAC, standardAC);
|
||||
new RepairHeaderTest( goodAC, goodAC );
|
||||
new RepairHeaderTest( new VCFInfoHeaderLine("AC", 1, VCFHeaderLineType.Integer, "x"), standardAC);
|
||||
new RepairHeaderTest( new VCFInfoHeaderLine("AC", VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "x"), standardAC);
|
||||
new RepairHeaderTest( new VCFInfoHeaderLine("AC", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"), standardAC);
|
||||
new RepairHeaderTest( new VCFInfoHeaderLine("AC", 1, VCFHeaderLineType.Float, "x"), standardAC);
|
||||
new RepairHeaderTest( new VCFInfoHeaderLine("AC", 1, VCFHeaderLineType.String, "x"), standardAC);
|
||||
new RepairHeaderTest( new VCFInfoHeaderLine("AC", 0, VCFHeaderLineType.Flag, "x"), standardAC);
|
||||
|
||||
new RepairHeaderTest( new VCFInfoHeaderLine("NON_STANDARD_INFO", 1, VCFHeaderLineType.String, "x"));
|
||||
new RepairHeaderTest( new VCFFormatHeaderLine("NON_STANDARD_FORMAT", 1, VCFHeaderLineType.String, "x"));
|
||||
|
||||
return RepairHeaderTest.getTests(RepairHeaderTest.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "RepairHeaderTest")
|
||||
public void testRepairHeaderTest(RepairHeaderTest cfg) {
|
||||
final VCFHeader toRepair = new VCFHeader(Collections.singleton((VCFHeaderLine)cfg.original));
|
||||
final VCFHeader repaired = VCFStandardHeaderLines.repairStandardHeaderLines(toRepair);
|
||||
|
||||
VCFCompoundHeaderLine repairedLine = (VCFCompoundHeaderLine)repaired.getFormatHeaderLine(cfg.original.getID());
|
||||
if ( repairedLine == null ) repairedLine = (VCFCompoundHeaderLine)repaired.getInfoHeaderLine(cfg.original.getID());
|
||||
|
||||
Assert.assertNotNull(repairedLine, "Repaired header didn't contain the expected line");
|
||||
Assert.assertEquals(repairedLine.getID(), cfg.expectedResult.getID());
|
||||
Assert.assertEquals(repairedLine.getType(), cfg.expectedResult.getType());
|
||||
Assert.assertEquals(repairedLine.getCountType(), cfg.expectedResult.getCountType());
|
||||
if ( repairedLine.getCountType() == VCFHeaderLineCount.INTEGER )
|
||||
Assert.assertEquals(repairedLine.getCount(), cfg.expectedResult.getCount());
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue