From fba7dafa0efdfc2be83bd39b797aecf1793a854c Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 19 Jun 2012 09:46:26 -0400 Subject: [PATCH] Finalizing BCF2 mark III commit -- Moved GENOTYPE_KEY vcf header line to VCFConstants. This general migration and cleanup is on Eric's plate now -- Updated HC to initialize the annotation engine in an order that allows it to write a proper VCF header. Still doesn't work... -- Updating integration test files. Moved many more files into public/testdata. Updated their headers to all work correctly with new strict VCF header checking. -- Bugfix for TandemRepeatAnnotation that must be unbounded not A count type as it provides info for the REF as well as each alt -- No longer add FALSE values to flag values in VCs in VariantAnnotatorEngine. DB = 0 is never seen in the output VCFs now -- Fixed bug in VCFDiffableReader that didn't differeniate between "." and "PASS" VC filter status -- Unconditionally add lowQual Filter to UG output VCF files as this is in some cases (EMIT_ALL_SITES) used when the previous check said it wouldn't be -- VariantsToVCF now properly writes out the GT FORMAT field -- BCF2 codec explodes when reading symbolic alleles as I literally cannot figure out how to use the allele clipping code. Eric said he and Ami will clean up this whole piece of instructure -- Fixed bug in BCF2Codec that wasn't setting the phase field correctly. UnitTested now -- PASS string now added at the end of the BCF2 dictionary after discussion with Heng -- Fixed bug where I was writing out all field values as BigEndian. Now everything is LittleEndian. -- VCFHeader detects the case where a count field has size < 0 (some of our files have count = -1) and throws a UserException -- Cleaned up unused code -- Fixed bug in BCF2 string encoder that wasn't handling the case of an empty list of strings for encoding -- Fixed bug where all samples are no called in a VC, in which case we (like the VCFwriter) write out no called diploid genotypes for all samples -- We always write the number of genotype samples into the BCF2 nSamples header. How we can have a variable number of samples per record isn't clear to me, as we don't have a map from missing samples to header names... -- Removed old filtersWereAppliedToContext code in VCF as properly handle unfiltered, filtered, and PASS records internally -- Fastpath function getDisplayBases() in allele that just gives you the raw bytes[] you'd see for an Allele -- Genotype fields no longer differentiate between unfiltered, filtered, and PASS values. Genotype objects are all PASS implicitly, or explicitly filtered. We only write out the FT values if at least one sample is filtered. Removed interface functions and cleaned up code -- Refactored padAllele code from createVariantContextWithPaddedAlleles into the function padAllele so that it actually works. In general, **** NEVER COPY CODE **** if you need to share funcitonality make a function, that's why there were invented! -- Increased the default number of records to read for DiffObjects to 1M --- .../annotator/TandemRepeatAnnotator.java | 2 +- .../annotator/VariantAnnotatorEngine.java | 9 +-- .../walkers/diffengine/VCFDiffableReader.java | 4 +- .../walkers/genotyper/UnifiedGenotyper.java | 8 +-- .../indels/SomaticIndelDetectorWalker.java | 2 +- .../varianteval/util/VariantEvalUtils.java | 10 +-- .../variantutils/LiftoverVariants.java | 2 +- .../walkers/variantutils/VariantsToVCF.java | 1 + .../sting/utils/codecs/bcf2/BCF2Codec.java | 19 +++--- .../bcf2/BCF2GenotypeFieldDecoders.java | 5 ++ .../sting/utils/codecs/bcf2/BCF2Utils.java | 66 ++++++++++-------- .../codecs/vcf/VCFCompoundHeaderLine.java | 5 ++ .../sting/utils/codecs/vcf/VCFConstants.java | 5 ++ .../sting/utils/variantcontext/Allele.java | 9 +++ .../utils/variantcontext/FastGenotype.java | 5 -- .../sting/utils/variantcontext/Genotype.java | 5 +- .../utils/variantcontext/GenotypeBuilder.java | 3 +- .../utils/variantcontext/SlowGenotype.java | 1 - .../utils/variantcontext/VariantContext.java | 45 ++++++++++--- .../variantcontext/VariantContextUtils.java | 63 ++++++----------- .../writer/BCF2FieldEncoder.java | 25 ++----- .../writer/BCF2FieldWriter.java | 7 +- .../variantcontext/writer/BCF2Writer.java | 11 +-- .../variantcontext/writer/VCFWriter.java | 20 ++---- .../test/org/broadinstitute/sting/MD5DB.java | 2 +- ...ReferenceOrderedQueryDataPoolUnitTest.java | 2 +- .../tracks/FeatureManagerUnitTest.java | 2 +- .../FeatureToGATKFeatureIteratorUnitTest.java | 2 +- .../CNV/SymbolicAllelesIntegrationTest.java | 4 +- .../VariantAnnotatorIntegrationTest.java | 52 +++++++------- .../walkers/beagle/BeagleIntegrationTest.java | 13 ++-- .../VariantFiltrationIntegrationTest.java | 42 ++++++------ .../UnifiedGenotyperIntegrationTest.java | 67 ++++++++++--------- .../ReadBackedPhasingIntegrationTest.java | 14 ++-- .../RecalibrationWalkersIntegrationTest.java | 2 +- ...ntRecalibrationWalkersIntegrationTest.java | 2 +- .../CombineVariantsIntegrationTest.java | 6 +- .../LiftoverVariantsIntegrationTest.java | 6 +- .../SelectVariantsIntegrationTest.java | 2 +- .../utils/codecs/vcf/VCFIntegrationTest.java | 2 +- .../VariantContextTestProvider.java | 32 ++++++++- 41 files changed, 317 insertions(+), 267 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java index c5ad435b3..c4d6ea474 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java @@ -71,7 +71,7 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa public static final VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(STR_PRESENT, 0, VCFHeaderLineType.Flag, "Variant is a short tandem repeat"), new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"), - new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") }; + new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") }; public List getKeyNames() { return Arrays.asList(keyNames); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 4e9e0afce..b3d9a881c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -217,11 +217,11 @@ public class VariantAnnotatorEngine { if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) { final String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), ref.getLocus()), vc.getType()); - // put the DB key into the INFO field - infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null); - // add the ID if appropriate if ( rsID != null ) { + // put the DB key into the INFO field + infoAnnotations.put(VCFConstants.DBSNP_KEY, true); + if ( vc.emptyID() ) { vc = new VariantContextBuilder(vc).id(rsID).make(); } else if ( walker.alwaysAppendDbsnpId() && vc.getID().indexOf(rsID) == -1 ) { @@ -237,7 +237,8 @@ public class VariantAnnotatorEngine { break; } } - infoAnnotations.put(dbSet.getValue(), overlapsComp); + if ( overlapsComp ) + infoAnnotations.put(dbSet.getValue(), overlapsComp); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java index f06bca904..7c03929ae 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -97,7 +97,9 @@ public class VCFDiffableReader implements DiffableReader { vcRoot.add("REF", vc.getReference()); vcRoot.add("ALT", vc.getAlternateAlleles()); vcRoot.add("QUAL", vc.hasLog10PError() ? vc.getLog10PError() * -10 : VCFConstants.MISSING_VALUE_v4); - vcRoot.add("FILTER", vc.getFilters()); + vcRoot.add("FILTER", ! vc.filtersWereApplied() // needs null to differentiate between PASS and . + ? VCFConstants.MISSING_VALUE_v4 + : ( vc.getFilters().isEmpty() ? VCFConstants.PASSES_FILTERS_v4 : vc.getFilters()) ); // add info fields for (Map.Entry attribute : vc.getAttributes().entrySet()) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 128322bfc..7d69dcd48 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -272,9 +272,9 @@ public class UnifiedGenotyper extends LocusWalker, Unif // FORMAT fields headerInfo.addAll(getSupportedHeaderStrings()); - // FILTER fields - if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING ) - headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality")); + // FILTER fields are added unconditionally as it's not always 100% certain the circumstances + // where the filters are used. For example, in emitting all sites the lowQual field is used + headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality")); return headerInfo; } @@ -285,7 +285,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif */ private static Set getSupportedHeaderStrings() { Set result = new HashSet(); - result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype")); + result.add(VCFConstants.GENOTYPE_KEY_HEADER_LINE); result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "Genotype Quality")); result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)")); result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification")); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index e220b1f8d..4f080e90b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -316,7 +316,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { // first, the basic info headerInfo.add(new VCFHeaderLine("source", "SomaticIndelDetector")); headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); - headerInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype")); + headerInfo.add(VCFConstants.GENOTYPE_KEY_HEADER_LINE); // FORMAT and INFO fields // headerInfo.addAll(VCFUtils.getSupportedHeaderStrings()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 39033bfed..3b28747fb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -204,16 +204,16 @@ public class VariantEvalUtils { final int originalAlleleCount = vc.getHetCount() + 2 * vc.getHomVarCount(); final int newAlleleCount = vcsub.getHetCount() + 2 * vcsub.getHomVarCount(); final boolean isSingleton = originalAlleleCount == newAlleleCount && newAlleleCount == 1; - final boolean hasChrCountAnnotations = vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) && - vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) && - vc.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY); + final boolean hasChrCountAnnotations = vcsub.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) && + vcsub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) && + vcsub.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY); if ( ! isSingleton && hasChrCountAnnotations ) { // nothing to update - return vc; + return vcsub; } else { // have to do the work - VariantContextBuilder builder = new VariantContextBuilder(vc); + VariantContextBuilder builder = new VariantContextBuilder(vcsub); if ( isSingleton ) builder.attribute(VariantEvalWalker.IS_SINGLETON_KEY, true); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 9a2a06ea4..a3e1c0bd7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -129,7 +129,7 @@ public class LiftoverVariants extends RodWalker { .attribute("OriginalStart", fromInterval.getStart()).make(); } - VariantContext newVC = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, false); + VariantContext newVC = VariantContextUtils.createVariantContextWithPaddedAlleles(vc); if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) { logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s", originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(), diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 6ddfde190..215158152 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -222,6 +222,7 @@ public class VariantsToVCF extends RodWalker { //hInfo.add(new VCFHeaderLine("source", "VariantsToVCF")); //hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getID())); + hInfo.add(VCFConstants.GENOTYPE_KEY_HEADER_LINE); allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY); for ( VCFHeaderLine field : hInfo ) { if ( field instanceof VCFFormatHeaderLine) { diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index ab20299de..91331ac13 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -36,6 +36,7 @@ import org.broad.tribble.readers.PositionalBufferedStream; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.*; @@ -334,7 +335,7 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD */ protected static ArrayList clipAllelesIfNecessary(int position, String ref, ArrayList unclippedAlleles) { if ( ! AbstractVCFCodec.isSingleNucleotideEvent(unclippedAlleles) ) { - ArrayList clippedAlleles = new ArrayList(unclippedAlleles.size()); + final ArrayList clippedAlleles = new ArrayList(unclippedAlleles.size()); AbstractVCFCodec.clipAlleles(position, ref, unclippedAlleles, clippedAlleles, -1); return clippedAlleles; } else @@ -355,14 +356,16 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD String ref = null; for ( int i = 0; i < nAlleles; i++ ) { - final String allele = (String)decoder.decodeTypedValue(); + final String alleleBases = (String)decoder.decodeTypedValue(); - if ( i == 0 ) { - ref = allele; - alleles.add(Allele.create(allele, true)); - } else { - alleles.add(Allele.create(allele, false)); - } + final boolean isRef = i == 0; + final Allele allele = Allele.create(alleleBases, isRef); + if ( isRef ) ref = alleleBases; + + alleles.add(allele); + + if ( allele.isSymbolic() ) + throw new ReviewedStingException("LIMITATION: GATK BCF2 codec does not yet support symbolic alleles"); } assert ref != null; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java index 0c737c9a2..36ecb198b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java @@ -169,6 +169,9 @@ public class BCF2GenotypeFieldDecoders { gb.alleles(gt); } + + final boolean phased = (a1 & 0x01) == 1; + gb.phased(phased); } } @@ -199,6 +202,8 @@ public class BCF2GenotypeFieldDecoders { gt.add(getAlleleFromEncoded(siteAlleles, encode)); gb.alleles(gt); + final boolean phased = (encoded[0] & 0x01) == 1; + gb.phased(phased); } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java index 761c369ac..8e8ce7480 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java @@ -82,18 +82,27 @@ public final class BCF2Utils { @Requires("header != null") @Ensures({"result != null", "new HashSet(result).size() == result.size()"}) public final static ArrayList makeDictionary(final VCFHeader header) { - final Set dict = new TreeSet(); + final Set seen = new HashSet(); + final ArrayList dict = new ArrayList(); + boolean sawPASS = false; // set up the strings dictionary - dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field for ( VCFHeaderLine line : header.getMetaData() ) { if ( line instanceof VCFIDHeaderLine) { - VCFIDHeaderLine idLine = (VCFIDHeaderLine)line; - dict.add(idLine.getID()); + final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line; + if ( ! seen.contains(idLine.getID())) { + sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4); + dict.add(idLine.getID()); + seen.add(idLine.getID()); + } } } - return new ArrayList(dict); + + if ( ! sawPASS ) + dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field + + return dict; } @Requires({"nElements >= 0", "type != null"}) @@ -142,25 +151,6 @@ public final class BCF2Utils { } } - @Requires({"stream != null", "bytesForEachInt > 0"}) - public final static int readInt(int bytesForEachInt, final InputStream stream) { - switch ( bytesForEachInt ) { - case 1: { - return (byte)(readByte(stream)); - } case 2: { - final int b1 = readByte(stream) & 0xFF; - final int b2 = readByte(stream) & 0xFF; - return (short)((b1 << 8) | b2); - } case 4: { - final int b1 = readByte(stream) & 0xFF; - final int b2 = readByte(stream) & 0xFF; - final int b3 = readByte(stream) & 0xFF; - final int b4 = readByte(stream) & 0xFF; - return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4); - } default: throw new ReviewedStingException("Unexpected size during decoding"); - } - } - /** * Collapse multiple strings into a comma separated list * @@ -299,20 +289,40 @@ public final class BCF2Utils { else return Collections.singletonList(o); } + + @Requires({"stream != null", "bytesForEachInt > 0"}) + public final static int readInt(int bytesForEachInt, final InputStream stream) { + switch ( bytesForEachInt ) { + case 1: { + return (byte)(readByte(stream)); + } case 2: { + final int b2 = readByte(stream) & 0xFF; + final int b1 = readByte(stream) & 0xFF; + return (short)((b1 << 8) | b2); + } case 4: { + final int b4 = readByte(stream) & 0xFF; + final int b3 = readByte(stream) & 0xFF; + final int b2 = readByte(stream) & 0xFF; + final int b1 = readByte(stream) & 0xFF; + return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4); + } default: throw new ReviewedStingException("Unexpected size during decoding"); + } + } + public final static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException { switch ( type.getSizeInBytes() ) { case 1: encodeStream.write(0xFF & value); break; case 2: + encodeStream.write((0x00FF & value)); encodeStream.write((0xFF00 & value) >> 8); - encodeStream.write(0xFF & value); break; case 4: - encodeStream.write((0xFF000000 & value) >> 24); - encodeStream.write((0x00FF0000 & value) >> 16); - encodeStream.write((0x0000FF00 & value) >> 8); encodeStream.write((0x000000FF & value)); + encodeStream.write((0x0000FF00 & value) >> 8); + encodeStream.write((0x00FF0000 & value) >> 16); + encodeStream.write((0xFF000000 & value) >> 24); break; default: throw new ReviewedStingException("BUG: unexpected type size " + type); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java index 239748325..97f3ecd0c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.codecs.vcf; import org.apache.log4j.Logger; import org.broad.tribble.TribbleException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.Arrays; import java.util.LinkedHashMap; @@ -154,6 +155,10 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF count = Integer.valueOf(numberStr); } + + if ( count < 0 && countType == VCFHeaderLineCount.INTEGER ) + throw new UserException.MalformedVCFHeader("Count < 0 for fixed size VCF header field " + name); + try { type = VCFHeaderLineType.valueOf(mapping.get("Type")); } catch (Exception e) { diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java index 108630607..1814961f8 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java @@ -117,4 +117,9 @@ public final class VCFConstants { public static final int MAX_GENOTYPE_QUAL = 99; public static final Double VCF_ENCODING_EPSILON = 0.00005; // when we consider fields equal(), used in the Qual compare + + // + // VCF header line constants + // + public static final VCFFormatHeaderLine GENOTYPE_KEY_HEADER_LINE = new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"); } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java index c1cd0068e..2e1770581 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java @@ -347,6 +347,15 @@ public class Allele implements Comparable { */ public String getDisplayString() { return new String(bases); } + /** + * Same as #getDisplayString() but returns the result as byte[]. + * + * Slightly faster then getDisplayString() + * + * @return the allele string representation + */ + public byte[] getDisplayBases() { return bases; } + /** * @param other the other allele * diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java index b23d07cce..e1185ba70 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java @@ -156,11 +156,6 @@ public final class FastGenotype extends Genotype { return (List) getExtendedAttribute(VCFConstants.GENOTYPE_FILTER_KEY, Collections.emptyList()); } - @Override - public boolean filtersWereApplied() { - return hasExtendedAttribute(VCFConstants.GENOTYPE_FILTER_KEY); - } - @Override public int[] getPL() { return PL; } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 06f9606e3..cffa5f528 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -451,7 +451,7 @@ public abstract class Genotype implements Comparable { * * @return */ - @Ensures({"result != null", "filtersWereApplied() || result.isEmpty()"}) + @Ensures({"result != null"}) public abstract List getFilters(); @Ensures({"result != getFilters().isEmpty()"}) @@ -459,9 +459,6 @@ public abstract class Genotype implements Comparable { return ! getFilters().isEmpty(); } - @Ensures("result == true || getFilters().isEmpty()") - public abstract boolean filtersWereApplied(); - @Deprecated public boolean hasLog10PError() { return hasGQ(); } @Deprecated public double getLog10PError() { return getGQ() / -10.0; } @Deprecated public int getPhredScaledQual() { return getGQ(); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java index 160f96056..ac0c503f7 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java @@ -383,7 +383,8 @@ public final class GenotypeBuilder { */ @Requires("filters != null") public GenotypeBuilder filters(final List filters) { - attribute(VCFConstants.GENOTYPE_FILTER_KEY, filters); + if ( ! filters.isEmpty() ) + attribute(VCFConstants.GENOTYPE_FILTER_KEY, filters); return this; } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/SlowGenotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/SlowGenotype.java index e5dd1451a..ded7c63bd 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/SlowGenotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/SlowGenotype.java @@ -113,7 +113,6 @@ public class SlowGenotype extends Genotype { // // --------------------------------------------------------------------------------------------------------- @Override public List getFilters() { return new ArrayList(commonInfo.getFilters()); } - @Override public boolean filtersWereApplied() { return commonInfo.filtersWereApplied(); } @Override public boolean hasLog10PError() { return commonInfo.hasLog10PError(); } @Override public double getLog10PError() { return commonInfo.getLog10PError(); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 6739f1d0e..2f9074ded 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -339,7 +339,7 @@ public class VariantContext implements Feature { // to enable tribble integratio * @return */ public VariantContext subContextFromSamples(Set sampleNames, final boolean rederiveAllelesFromGenotypes ) { - if ( ! rederiveAllelesFromGenotypes && sampleNames.containsAll(getSampleNames()) ) { + if ( sampleNames.containsAll(getSampleNames()) ) { return this; // fast path when you don't have any work to do } else { VariantContextBuilder builder = new VariantContextBuilder(this); @@ -559,7 +559,7 @@ public class VariantContext implements Feature { // to enable tribble integratio public String getAlleleStringWithRefPadding(final Allele allele) { if ( VariantContextUtils.needsPadding(this) ) - return VariantContextUtils.padAllele(this, allele); + return VariantContextUtils.padAllele(this, allele).getDisplayString(); else return allele.getDisplayString(); } @@ -1177,8 +1177,9 @@ public class VariantContext implements Feature { // to enable tribble integratio // if ( getType() == Type.INDEL ) { // if ( getReference().length() != (getLocation().size()-1) ) { long length = (stop - start) + 1; - if ( (getReference().isNull() && length != 1 ) || - (!isSymbolic() && getReference().isNonNull() && (length - getReference().length() > 1))) { + if ( ! isSymbolic() + && ((getReference().isNull() && length != 1 ) + || (getReference().isNonNull() && (length - getReference().length() > 1)))) { throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this); } } @@ -1358,19 +1359,38 @@ public class VariantContext implements Feature { // to enable tribble integratio } private final void fullyDecodeInfo(final VariantContextBuilder builder, final VCFHeader header) { - builder.attributes(fullyDecodeAttributes(getAttributes(), header)); + builder.attributes(fullyDecodeAttributes(getAttributes(), header, false)); } - private final Map fullyDecodeAttributes(final Map attributes, final VCFHeader header) { + private final Map fullyDecodeAttributes(final Map attributes, + final VCFHeader header, + final boolean allowMissingValuesComparedToHeader) { final Map newAttributes = new HashMap(attributes.size()); for ( final Map.Entry attr : attributes.entrySet() ) { final String field = attr.getKey(); + + if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) + continue; // gross, FT is part of the extended attributes + final VCFCompoundHeaderLine format = VariantContextUtils.getMetaDataForField(header, field); final Object decoded = decodeValue(field, attr.getValue(), format); - if ( decoded != null ) + if ( decoded != null ) { + if ( ! allowMissingValuesComparedToHeader + && format.getCountType() != VCFHeaderLineCount.UNBOUNDED + && format.getType() != VCFHeaderLineType.Flag ) { // we expect exactly the right number of elements + final int obsSize = decoded instanceof List ? ((List) decoded).size() : 1; + final int expSize = format.getCount(this.getNAlleles() - 1); + if ( obsSize != expSize ) { + throw new UserException.MalformedVCFHeader("Discordant field size detected for field " + + field + " at " + getChr() + ":" + getStart() + ". Field had " + obsSize + " values " + + "but the header says this should have " + expSize + " values based on header record " + + format); + } + } newAttributes.put(field, decoded); + } } return newAttributes; @@ -1400,6 +1420,8 @@ public class VariantContext implements Feature { // to enable tribble integratio } else { return value; } + + // allowMissingValuesComparedToHeader } private final Object decodeOne(final String field, final String string, final VCFCompoundHeaderLine format) { @@ -1409,7 +1431,12 @@ public class VariantContext implements Feature { // to enable tribble integratio else { switch ( format.getType() ) { case Character: return string; - case Flag: return Boolean.valueOf(string); + case Flag: + final boolean b = Boolean.valueOf(string); + if ( b == false ) + throw new UserException.MalformedVCF("VariantContext FLAG fields " + field + " cannot contain false values" + + " as seen at " + getChr() + ":" + getStart()); + return b; case String: return string; case Integer: return Integer.valueOf(string); case Float: return Double.valueOf(string); @@ -1430,7 +1457,7 @@ public class VariantContext implements Feature { // to enable tribble integratio } private final Genotype fullyDecodeGenotypes(final Genotype g, final VCFHeader header) { - final Map map = fullyDecodeAttributes(g.getExtendedAttributes(), header); + final Map map = fullyDecodeAttributes(g.getExtendedAttributes(), header, true); return new GenotypeBuilder(g).attributes(map).make(); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 9276046dc..099c3d541 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -182,17 +182,23 @@ public class VariantContextUtils { return false; } - public static String padAllele(final VariantContext vc, final Allele allele) { + public static Allele padAllele(final VariantContext vc, final Allele allele) { assert needsPadding(vc); - StringBuilder sb = new StringBuilder(); - sb.append((char)vc.getReferenceBaseForIndel().byteValue()); - sb.append(allele.getDisplayString()); - return sb.toString(); + if ( allele.isSymbolic() ) + return allele; + else { + // get bases for current allele and create a new one with trimmed bases + final StringBuilder sb = new StringBuilder(); + sb.append((char)vc.getReferenceBaseForIndel().byteValue()); + sb.append(allele.getDisplayString()); + final String newBases = sb.toString(); + return Allele.create(newBases, allele.isReference()); + } } - public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) { + public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC) { final boolean padVC = needsPadding(inputVC); // nothing to do if we don't need to pad bases @@ -200,46 +206,21 @@ public class VariantContextUtils { if ( !inputVC.hasReferenceBaseForIndel() ) throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available."); - Byte refByte = inputVC.getReferenceBaseForIndel(); + final ArrayList alleles = new ArrayList(inputVC.getNAlleles()); + final Map unpaddedToPadded = new HashMap(inputVC.getNAlleles()); - List alleles = new ArrayList(); - - for (Allele a : inputVC.getAlleles()) { - // get bases for current allele and create a new one with trimmed bases - if (a.isSymbolic()) { - alleles.add(a); - } else { - String newBases; - if ( refBaseShouldBeAppliedToEndOfAlleles ) - newBases = a.getBaseString() + new String(new byte[]{refByte}); - else - newBases = new String(new byte[]{refByte}) + a.getBaseString(); - alleles.add(Allele.create(newBases,a.isReference())); - } + for (final Allele a : inputVC.getAlleles()) { + final Allele padded = padAllele(inputVC, a); + alleles.add(padded); + unpaddedToPadded.put(a, padded); } // now we can recreate new genotypes with trimmed alleles GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples()); for (final Genotype g : inputVC.getGenotypes() ) { - List inAlleles = g.getAlleles(); - List newGenotypeAlleles = new ArrayList(g.getAlleles().size()); - for (Allele a : inAlleles) { - if (a.isCalled()) { - if (a.isSymbolic()) { - newGenotypeAlleles.add(a); - } else { - String newBases; - if ( refBaseShouldBeAppliedToEndOfAlleles ) - newBases = a.getBaseString() + new String(new byte[]{refByte}); - else - newBases = new String(new byte[]{refByte}) + a.getBaseString(); - newGenotypeAlleles.add(Allele.create(newBases,a.isReference())); - } - } - else { - // add no-call allele - newGenotypeAlleles.add(Allele.NO_CALL); - } + final List newGenotypeAlleles = new ArrayList(g.getAlleles().size()); + for (final Allele a : g.getAlleles()) { + newGenotypeAlleles.add( a.isCalled() ? unpaddedToPadded.get(a) : Allele.NO_CALL); } genotypes.add(new GenotypeBuilder(g).alleles(newGenotypeAlleles).make()); @@ -556,7 +537,7 @@ public class VariantContextUtils { for (final VariantContext vc : prepaddedVCs) { // also a reasonable place to remove filtered calls, if needed if ( ! filteredAreUncalled || vc.isNotFiltered() ) - VCs.add(createVariantContextWithPaddedAlleles(vc, false)); + VCs.add(createVariantContextWithPaddedAlleles(vc)); } if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled return null; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java index 2ff32964f..28fb8a602 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java @@ -269,21 +269,6 @@ public abstract class BCF2FieldEncoder { // // ---------------------------------------------------------------------- - /** - * Convenience method that just called encodeValue with a no minimum for the number of values. - * - * Primarily useful for encoding site values - * - * @param encoder - * @param value - * @param type - * @throws IOException - */ - @Requires({"encoder != null", "isDynamicallyTyped() || type == getStaticType()"}) - public void encodeOneValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException { - encodeValue(encoder, value, type, 0); - } - /** * Key abstract method that should encode a value of the given type into the encoder. * @@ -348,10 +333,10 @@ public abstract class BCF2FieldEncoder { if ( value == null ) return ""; else if (value instanceof List) { - if ( ((List) value).size() == 1 ) - return (String)((List) value).get(0); - else - return BCF2Utils.collapseStringList((List)value); + final List l = (List)value; + if ( l.isEmpty() ) return ""; + else if ( l.size() == 1 ) return (String)l.get(0); + else return BCF2Utils.collapseStringList(l); } else return (String)value; } @@ -376,7 +361,7 @@ public abstract class BCF2FieldEncoder { } @Override - @Requires("minValues <= 1") + @Requires({"minValues <= 1", "value != null", "value instanceof Boolean", "((Boolean)value) == true"}) public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException { encoder.encodeRawBytes(1, getStaticType()); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java index 0a54bc5d0..9d0adeed8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java @@ -100,7 +100,7 @@ public abstract class BCF2FieldWriter { } else { final int valueCount = getFieldEncoder().numElements(vc, rawValue); encoder.encodeType(valueCount, type); - getFieldEncoder().encodeOneValue(encoder, rawValue, type); + getFieldEncoder().encodeValue(encoder, rawValue, type, valueCount); } } } @@ -246,6 +246,10 @@ public abstract class BCF2FieldWriter { buildAlleleMap(vc); nValuesPerGenotype = vc.getMaxPloidy(); + // deal with the case where we have no call everywhere, in which case we write out diploid + if ( nValuesPerGenotype == -1 ) + nValuesPerGenotype = 2; + super.start(encoder, vc); } @@ -298,7 +302,6 @@ public abstract class BCF2FieldWriter { if ( nAlleles > 2 ) { // for multi-allelics we need to clear the map, and add additional looks alleleMapForTriPlus.clear(); - alleleMapForTriPlus.put(Allele.NO_CALL, -1); // convenience for lookup final List alleles = vc.getAlleles(); for ( int i = 2; i < alleles.size(); i++ ) { alleleMapForTriPlus.put(alleles.get(i), i); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index 37c8e83ff..92d99c609 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -84,6 +84,7 @@ import java.util.*; */ class BCF2Writer extends IndexingVariantContextWriter { final protected static Logger logger = Logger.getLogger(BCF2Writer.class); + final private static List MISSING_GENOTYPE = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); private final OutputStream outputStream; // Note: do not flush until completely done writing, to avoid issues with eventual BGZF support private VCFHeader header; @@ -213,7 +214,7 @@ class BCF2Writer extends IndexingVariantContextWriter { final int nAlleles = vc.getNAlleles(); final int nInfo = vc.getAttributes().size(); final int nGenotypeFormatFields = getNGenotypeFormatFields(vc); - final int nSamples = vc.getNSamples(); + final int nSamples = header.getNGenotypeSamples(); encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x0000FFFF), BCF2Type.INT32); encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x00FFFFF), BCF2Type.INT32); @@ -256,10 +257,10 @@ class BCF2Writer extends IndexingVariantContextWriter { private void buildAlleles( VariantContext vc ) throws IOException { final boolean needsPadding = VariantContextUtils.needsPadding(vc); - for ( final Allele allele : vc.getAlleles() ) { - byte[] s = allele.getBases(); + for ( Allele allele : vc.getAlleles() ) { if ( needsPadding ) - s = VariantContextUtils.padAllele(vc,allele).getBytes(); + allele = VariantContextUtils.padAllele(vc,allele); + final byte[] s = allele.getDisplayBases(); encoder.encodeTypedString(s); } } @@ -298,7 +299,7 @@ class BCF2Writer extends IndexingVariantContextWriter { Genotype g = vc.getGenotype(name); if ( g == null ) // we don't have any data about g at all - g = new GenotypeBuilder(name).make(); + g = new GenotypeBuilder(name).alleles(MISSING_GENOTYPE).make(); writer.addGenotype(encoder, vc, g); } writer.done(encoder, vc); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java index 69649aca7..2e5d984d0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java @@ -51,9 +51,6 @@ class VCFWriter extends IndexingVariantContextWriter { // the VCF header we're storing protected VCFHeader mHeader = null; - // were filters applied? - protected boolean filtersWereAppliedToContext = false; - final private boolean allowMissingFieldsInHeader; private IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors(); @@ -78,13 +75,6 @@ class VCFWriter extends IndexingVariantContextWriter { // note we need to update the mHeader object after this call because they header // may have genotypes trimmed out of it, if doNotWriteGenotypes is true mHeader = writeHeader(header, mWriter, doNotWriteGenotypes, getVersionLine(), getStreamName()); - - // determine if we use filters, so we should FORCE pass the records - // TODO -- this might not be necessary any longer as we have unfiltered, filtered, and PASS VCs - for ( final VCFHeaderLine line : header.getMetaData() ) { - if ( line instanceof VCFFilterHeaderLine) - filtersWereAppliedToContext = true; - } } public static final String getVersionLine() { @@ -171,7 +161,7 @@ class VCFWriter extends IndexingVariantContextWriter { vc = new VariantContextBuilder(vc).noGenotypes().make(); try { - vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, false); + vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc); super.add(vc); Map alleleMap = buildAlleleMap(vc); @@ -219,7 +209,7 @@ class VCFWriter extends IndexingVariantContextWriter { mWriter.write(VCFConstants.FIELD_SEPARATOR); // FILTER - String filters = getFilterString(vc, filtersWereAppliedToContext); + String filters = getFilterString(vc); mWriter.write(filters); mWriter.write(VCFConstants.FIELD_SEPARATOR); @@ -283,7 +273,7 @@ class VCFWriter extends IndexingVariantContextWriter { // // -------------------------------------------------------------------------------- - private final String getFilterString(final VariantContext vc, boolean forcePASS) { + private final String getFilterString(final VariantContext vc) { if ( vc.isFiltered() ) { for ( final String filter : vc.getFilters() ) if ( ! mHeader.hasFilterLine(filter) ) @@ -291,7 +281,7 @@ class VCFWriter extends IndexingVariantContextWriter { return ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters())); } - else if ( forcePASS || vc.filtersWereApplied() ) + else if ( vc.filtersWereApplied() ) return VCFConstants.PASSES_FILTERS_v4; else return VCFConstants.UNFILTERED; @@ -407,7 +397,7 @@ class VCFWriter extends IndexingVariantContextWriter { // some exceptions if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY ) ) { - val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED); + val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : VCFConstants.PASSES_FILTERS_v4; } VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(field); diff --git a/public/java/test/org/broadinstitute/sting/MD5DB.java b/public/java/test/org/broadinstitute/sting/MD5DB.java index 85780b569..22388f3a2 100644 --- a/public/java/test/org/broadinstitute/sting/MD5DB.java +++ b/public/java/test/org/broadinstitute/sting/MD5DB.java @@ -48,7 +48,7 @@ public class MD5DB { /** * Subdirectory under the ant build directory where we store integration test md5 results */ - private static final int MAX_RECORDS_TO_READ = 100000; + private static final int MAX_RECORDS_TO_READ = 1000000; private static final int MAX_RAW_DIFFS_TO_SUMMARIZE = -1; public static final String LOCAL_MD5_DB_DIR = "integrationtests"; public static final String GLOBAL_MD5_DB_DIR = "/humgen/gsa-hpprojects/GATK/data/integrationtests"; diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedQueryDataPoolUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedQueryDataPoolUnitTest.java index 6773b2bff..c481a00fe 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedQueryDataPoolUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedQueryDataPoolUnitTest.java @@ -43,7 +43,7 @@ public class ReferenceOrderedQueryDataPoolUnitTest extends BaseTest{ @Test public void testCloseFilePointers() throws IOException { // Build up query parameters - File file = new File(BaseTest.validationDataLocation + "NA12878.hg19.example1.vcf"); + File file = new File(BaseTest.testDir + "NA12878.hg19.example1.vcf"); RMDTriplet triplet = new RMDTriplet("test", "VCF", file.getAbsolutePath(), RMDTriplet.RMDStorageType.FILE, new Tags()); IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference)); GenomeLocParser parser = new GenomeLocParser(seq); diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java index 513b34ebb..1a3a86313 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java @@ -53,7 +53,7 @@ import java.util.*; */ public class FeatureManagerUnitTest extends BaseTest { private static final File RANDOM_FILE = new File(testDir + "exampleGATKReport.eval"); - private static final File VCF3_FILE = new File(testDir + "vcfexample3.vcf"); + private static final File VCF3_FILE = new File(testDir + "vcf3.vcf"); private static final File VCF4_FILE = new File(testDir + "HiSeq.10000.vcf"); private static final File VCF4_FILE_GZ = new File(testDir + "HiSeq.10000.vcf.gz"); private static final File VCF4_FILE_BGZIP = new File(testDir + "HiSeq.10000.bgzip.vcf.gz"); diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIteratorUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIteratorUnitTest.java index a5a0d24e9..55f70f16e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIteratorUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIteratorUnitTest.java @@ -44,7 +44,7 @@ public class FeatureToGATKFeatureIteratorUnitTest extends BaseTest { final String chr = "20"; IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference)); GenomeLocParser parser = new GenomeLocParser(seq); - File file = new File(validationDataLocation + "NA12878.hg19.example1.vcf"); + File file = new File(testDir + "NA12878.hg19.example1.vcf"); VCFCodec codec = new VCFCodec(); TestFeatureReader reader = new TestFeatureReader(file.getAbsolutePath(), codec); CheckableCloseableTribbleIterator tribbleIterator = reader.query(chr, 1, 100000); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java index 30647de1f..0ffdb9ffa 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/CNV/SymbolicAllelesIntegrationTest.java @@ -19,7 +19,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest { } - @Test + @Test(enabled = false) public void test1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(b36KGReference, "symbolic_alleles_1.vcf"), @@ -28,7 +28,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest { executeTest("Test symbolic alleles", spec); } - @Test + @Test(enabled = false) public void test2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(b36KGReference, "symbolic_alleles_2.vcf"), diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 6cbd21824..6dc93e549 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -15,15 +15,15 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testHasAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " --variant:VCF3 " + testDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("dfa5dff09fa964b06da19c0f4aff6928")); + baseTestString() + " --variant " + testDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, + Arrays.asList("bd6848e7dbf2f809ee2f690ee2cf8ef4")); executeTest("test file has annotations, not asking for annotations, #1", spec); } @Test public void testHasAnnotsNotAsking2() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " --variant:VCF3 " + testDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, + baseTestString() + " --variant " + testDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, Arrays.asList("9914bd19f6235c550e5182e0f4591da6")); executeTest("test file has annotations, not asking for annotations, #2", spec); } @@ -31,15 +31,15 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("6a52ef10bb10d72cdd82a8f7afc2dd09")); + baseTestString() + " -G Standard --variant " + testDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, + Arrays.asList("9084e6c7b1cec0f3a2c6d96711844d5e")); executeTest("test file has annotations, asking for annotations, #1", spec); } @Test public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, + baseTestString() + " -G Standard --variant " + testDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, Arrays.asList("74d894fd31b449deffca88d0e465f01b")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -47,8 +47,8 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testNoAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " --variant:VCF3 " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("dd89dfa22f0e1d6760095e04f528d62a")); + baseTestString() + " --variant " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, + Arrays.asList("b85c1ea28194484b327fbe0add1b5685")); executeTest("test file doesn't have annotations, not asking for annotations, #1", spec); } @@ -57,32 +57,32 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { // the genotype annotations in this file are actually out of order. If you don't parse the genotypes // they don't get reordered. It's a good test of the genotype ordering system. WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " --variant:VCF3 " + testDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("542d9ed8290ef7868387af4127e0b5fa")); + baseTestString() + " --variant " + testDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, + Arrays.asList("fe4d4e2484c4cf8b1cd50ad42cfe468e")); executeTest("test file doesn't have annotations, not asking for annotations, #2", spec); } @Test public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("b1b32ed3b831c92c94258c8e4a60e8c9")); + baseTestString() + " -G Standard --variant " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, + Arrays.asList("043fc6205b0633edcd3fadc9e044800c")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @Test public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("a25eacb0ceea2c082af349f8d7776c8a")); + baseTestString() + " -G Standard --variant " + testDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, + Arrays.asList("6fafb42d374a67ba4687a23078a126af")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } @Test public void testExcludeAnnotations() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant:VCF3 " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("ef046909a6f6c6cb43653a255a99a014")); + baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, + Arrays.asList("639462a0e0fa79e33def5f011fe55961")); executeTest("test exclude annotations", spec); } @@ -90,7 +90,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testOverwritingHeader() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + testDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1, - Arrays.asList("5c2fded3b6a96b0b0788086bbb2409ed")); + Arrays.asList("ebbf32f5b8b8d22f2eb247a0a3db3da0")); executeTest("test overwriting header", spec); } @@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoReads() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G Standard --variant " + testDir + "vcfexample3empty.vcf -L " + testDir + "vcfexample3empty.vcf", 1, - Arrays.asList("c590088d85edce786604fd600f5d5e75")); + Arrays.asList("afe6c9d3b4b80635a541cdfcfa48db2f")); executeTest("not passing it any reads", spec); } @@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testDBTagWithDbsnp() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + testDir + "vcfexample3empty.vcf -L " + testDir + "vcfexample3empty.vcf", 1, - Arrays.asList("ade9354a4cdd6cc92c169f252fb36f3f")); + Arrays.asList("21d696ea8c55d2fd4cbb4dcd5f7f7db6")); executeTest("getting DB tag with dbSNP", spec); } @@ -114,7 +114,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testMultipleIdsWithDbsnp() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + testDir + "vcfexample3withIDs.vcf -L " + testDir + "vcfexample3withIDs.vcf", 1, - Arrays.asList("f496f40e1e9efa743e3b473f6fe6e6d3")); + Arrays.asList("ef95394c14d5c16682a322f3dfb9000c")); executeTest("adding multiple IDs with dbSNP", spec); } @@ -122,7 +122,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testDBTagWithHapMap() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --comp:H3 " + testDir + "fakeHM3.vcf -G Standard --variant " + testDir + "vcfexample3empty.vcf -L " + testDir + "vcfexample3empty.vcf", 1, - Arrays.asList("d383fbd741d604625c9507d4da1c5a27")); + Arrays.asList("e6e276b7d517d57626c8409589cd286f")); executeTest("getting DB tag with HM3", spec); } @@ -130,23 +130,23 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoQuals() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --variant " + testDir + "noQual.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L " + testDir + "noQual.vcf -A QualByDepth", 1, - Arrays.asList("4a247f039dfb16ac05b38a0dd5f98da6")); + Arrays.asList("a99e8315571ed1b6bce942451b3d8612")); executeTest("test file doesn't have QUALs", spec); } @Test public void testUsingExpression() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " --resource:foo " + testDir + "targetAnnotations.vcf -G Standard --variant:VCF3 " + testDir + "vcfexample3empty.vcf -E foo.AF -L " + testDir + "vcfexample3empty.vcf", 1, - Arrays.asList("067792efcffea93ade632e52a80d0d8f")); + baseTestString() + " --resource:foo " + testDir + "targetAnnotations.vcf -G Standard --variant " + testDir + "vcfexample3empty.vcf -E foo.AF -L " + testDir + "vcfexample3empty.vcf", 1, + Arrays.asList("7d6ea3b54210620cbc7e14dad8836bcb")); executeTest("using expression", spec); } @Test public void testUsingExpressionWithID() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " --resource:foo " + testDir + "targetAnnotations.vcf -G Standard --variant:VCF3 " + testDir + "vcfexample3empty.vcf -E foo.ID -L " + testDir + "vcfexample3empty.vcf", 1, - Arrays.asList("66c68deb0508348324eb47d524e756de")); + baseTestString() + " --resource:foo " + testDir + "targetAnnotations.vcf -G Standard --variant " + testDir + "vcfexample3empty.vcf -E foo.ID -L " + testDir + "vcfexample3empty.vcf", 1, + Arrays.asList("35ce4fb0288dfc5c01ec6ce8b14c6157")); executeTest("using expression with ID", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java index 5c84f4cdc..f10b9a960 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java @@ -41,7 +41,8 @@ public class BeagleIntegrationTest extends WalkerTest { "--beagleR2:BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " + "--beagleProbs:BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " + "--beaglePhased:BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " + - "-o %s --no_cmdline_in_header", 1, Arrays.asList("cdbf8cc557f5be9ac778e52338c0d906")); + "-o %s --no_cmdline_in_header --allowMissingVCFHeaders", 1, Arrays.asList("c5522304abf0633041c7772dd7dafcea")); + spec.disableShadowBCF(); executeTest("test BeagleOutputToVCF", spec); } @@ -50,7 +51,8 @@ public class BeagleIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T ProduceBeagleInput -R " + hg19Reference + " " + "--variant:VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " + - "-o %s", 1, Arrays.asList("f301b089d21da259873f04bdc468835d")); + "-o %s --allowMissingVCFHeaders", 1, Arrays.asList("f301b089d21da259873f04bdc468835d")); + spec.disableShadowBCF(); executeTest("test BeagleInput", spec); } @@ -59,8 +61,9 @@ public class BeagleIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T ProduceBeagleInput --variant:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+ "--validation:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_OMNI_chr22_14-16m.vcf "+ - "-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta --no_cmdline_in_header ",2, + "-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 --allowMissingVCFHeaders -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta --no_cmdline_in_header ",2, Arrays.asList("660986891b30cdc937e0f2a3a5743faa","4b6417f892ccfe5c63b8a60cb0ef3740")); + spec.disableShadowBCF(); executeTest("test BeagleInputWithBootstrap",spec); } @@ -72,8 +75,8 @@ public class BeagleIntegrationTest extends WalkerTest { "--beagleR2:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+ "--beagleProbs:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+ "--beaglePhased:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+ - "-L 20:1-70000 -o %s --no_cmdline_in_header ",1,Arrays.asList("fbbbebfda35bab3f6f62eea2f0be1c01")); - + "-L 20:1-70000 -o %s --no_cmdline_in_header --allowMissingVCFHeaders",1,Arrays.asList("fbbbebfda35bab3f6f62eea2f0be1c01")); + spec.disableShadowBCF(); executeTest("testBeagleChangesSitesToRef",spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index f886651f5..bd6af5337 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -15,80 +15,80 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testNoAction() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("dfa5dff09fa964b06da19c0f4aff6928")); + baseTestString() + " --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + Arrays.asList("49471b44ac165929d3ff81f98ce19063")); executeTest("test no action", spec); } @Test public void testClusteredSnps() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -window 10 --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("4a4596929f9fe983d8868ca142567781")); + baseTestString() + " -window 10 --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + Arrays.asList("8b45895d7ae1f36b70e7fd26aa9451d3")); executeTest("test clustered SNPs", spec); } @Test public void testMask1() { WalkerTestSpec spec1 = new WalkerTestSpec( - baseTestString() + " -maskName foo --mask:VCF3 " + testDir + "vcfexample2.vcf --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("1719462cd17986c33e59e45b69df0270")); + baseTestString() + " -maskName foo --mask " + testDir + "vcfexample2.vcf --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + Arrays.asList("06307029f5da87ae4edd9804063a98f9")); executeTest("test mask all", spec1); } @Test public void testMask2() { WalkerTestSpec spec2 = new WalkerTestSpec( - baseTestString() + " -maskName foo --mask:VCF " + testDir + "vcfMask.vcf --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("db19ff7d90c82cda09fb3c3878100eb5")); + baseTestString() + " -maskName foo --mask:VCF " + testDir + "vcfMask.vcf --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + Arrays.asList("1fd06f6b2642685093ed36342f002b58")); executeTest("test mask some", spec2); } @Test public void testMask3() { WalkerTestSpec spec3 = new WalkerTestSpec( - baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + testDir + "vcfMask.vcf --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("a9e417cba21585c786d4b9930265ea31")); + baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + testDir + "vcfMask.vcf --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + Arrays.asList("d8c5206d5d13477a5929fb1ae5a6bfc4")); executeTest("test mask extend", spec3); } @Test public void testFilter1() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("4160904b180d1f62a6bf50de6728ce00")); + baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + Arrays.asList("a3be095e8aa75d9ef4235b9487527307")); executeTest("test filter #1", spec); } @Test public void testFilter2() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("df80db30c7836731ac7c8c3d4fc005b4")); + baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + Arrays.asList("bd1361ddc52d73b8cd7adeb9e5c47200")); executeTest("test filter #2", spec); } @Test public void testFilterWithSeparateNames() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("71ce6c0952831cb68f575aa0173dce2b")); + baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + Arrays.asList("4a43ec0285433df426ab482f88cf7ca6")); executeTest("test filter with separate names #2", spec); } @Test public void testGenotypeFilters1() { WalkerTestSpec spec1 = new WalkerTestSpec( - baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("179f7f2a90c0e6c656109aac9b775476")); + baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + Arrays.asList("5ee4485a022e163645c08b9691384f67")); executeTest("test genotype filter #1", spec1); } @Test public void testGenotypeFilters2() { WalkerTestSpec spec2 = new WalkerTestSpec( - baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("22e07c27feb9017a130dfb045c5b29b9")); + baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + Arrays.asList("d0a068c8cfb0758d2a8d471383f39b68")); executeTest("test genotype filter #2", spec2); } @@ -96,7 +96,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testDeletions() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo --variant:VCF " + testDir + "twoDeletions.vcf", 1, - Arrays.asList("637256ee5348c1c57f1dadf581b06ed9")); + Arrays.asList("a1c02a5a90f1262e9eb3d2cad1fd08f2")); executeTest("test deletions", spec); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 422f2a524..477297a21 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -8,6 +8,7 @@ import org.testng.annotations.Test; import java.io.File; import java.util.Arrays; import java.util.List; +import java.util.Map; // ********************************************************************************** // // Note that this class also serves as an integration test for the VariantAnnotator! // @@ -28,7 +29,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("1c6ea045819b151bcd9d98947c5d4c4d")); + Arrays.asList("a4c520b56f85513423c1c0204cabb5e1")); executeTest("test MultiSample Pilot1", spec); } @@ -36,7 +37,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("f9f2912c63e3253495702099bde5de0f")); + Arrays.asList("26ec9db9c7ad4b9a2ef25a8b1cb0d45c")); executeTest("test MultiSample Pilot2 with alleles passed in", spec1); } @@ -44,7 +45,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn2() { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("c51d037e0b1cd0ed3a1cd6c6b29646cf")); + Arrays.asList("f2624782525929384d9f2c59f3c65529")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -52,7 +53,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("bd7d25f6c6142837e3bc4c0d5dced2ed")); + Arrays.asList("a71d4abbad9c31e66aeb21b1fe2cfe9a")); executeTest("test SingleSample Pilot2", spec); } @@ -60,7 +61,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + testDir + "multiallelic.snps.bam -o %s -L " + testDir + "multiallelic.snps.intervals", 1, - Arrays.asList("dba580e8b5e96a28d673b437b4da1c70")); + Arrays.asList("2429c0f24da57ab1a1313e807e53e48e")); executeTest("test Multiple SNP alleles", spec); } @@ -68,7 +69,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testBadRead() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH -I " + testDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1, - Arrays.asList("174905e2547e94c3eee07ce84497692b")); + Arrays.asList("995c8f57d1f211e004ce81d356a80d16")); executeTest("test bad read", spec); } @@ -76,7 +77,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("29b15e2017b13e6cb3ad56cc74c719e7")); + Arrays.asList("53f60fe15ebffdf85183426b93d48b10")); executeTest("test reverse trim", spec); } @@ -86,7 +87,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "6f14394e90fdacd29390a1f3521f5ca8"; + private final static String COMPRESSED_OUTPUT_MD5 = "65846f5a8591d591ffbd1f85afadd9d5"; @Test public void testCompressedOutput() { @@ -107,7 +108,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // Note that we need to turn off any randomization for this to work, so no downsampling and no annotations - String md5 = "7824468b8290ffb7795a1ec3e493c1a4"; + String md5 = "36e6c8b0f30b159915eedaa5926ebbad"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, @@ -139,7 +140,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinBaseQualityScore() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1, - Arrays.asList("86121f5094f26c8b2e320c1f5dea4ae3")); + Arrays.asList("efc4882c1150b246be163e08d81f428f")); executeTest("test min_base_quality_score 26", spec); } @@ -147,7 +148,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSLOD() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("3712dd35b0e630977c8c5226ccc532ae")); + Arrays.asList("b47b08b514acf5e96fb4994754e0e9ce")); executeTest("test SLOD", spec); } @@ -155,7 +156,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testNDA() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("8a5bb0cca3004848dbca9c08fc2afed9")); + Arrays.asList("08db1413ed6a04fcb03d58e3ece9f366")); executeTest("test NDA", spec); } @@ -163,23 +164,23 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testCompTrack() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("9863ecd2576c7a962f9d05a7dc670169")); + Arrays.asList("d78f95c225db2a4b21c99a688330df52")); executeTest("test using comp track", spec); } @Test public void testOutputParameterSitesOnly() { - testOutputParameters("-sites_only", "fe204cef499e5aceb2732ba2e45903ad"); + testOutputParameters("-sites_only", "1e4a98213ec00479cc090f53620317e4"); } @Test public void testOutputParameterAllConfident() { - testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "1ab8b68891d1531923a40d594250e8e0"); + testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "5580e3e7c2b358ed416bc03409c54c1d"); } @Test public void testOutputParameterAllSites() { - testOutputParameters("--output_mode EMIT_ALL_SITES", "ab179ef6ece3ab9e6b1ff5800cb89ebd"); + testOutputParameters("--output_mode EMIT_ALL_SITES", "23ab7f15a01dd6dbf9f09a7560a2055b"); } private void testOutputParameters(final String args, final String md5) { @@ -193,7 +194,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("afdba62ff773ee77dd5ec947f7cf280f")); + Arrays.asList("87c55fece67a562d208c538868307d7b")); executeTest("test confidence 1", spec1); } @@ -201,7 +202,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence2() { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, - Arrays.asList("d81007a1718d2e16c2d8cd5bbc0d7bf3")); + Arrays.asList("87c55fece67a562d208c538868307d7b")); executeTest("test confidence 2", spec2); } @@ -212,12 +213,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // -------------------------------------------------------------------------------------------------------------- @Test public void testHeterozyosity1() { - testHeterozosity( 0.01, "7f3fcbe491284b321d6b92ef197644c3" ); + testHeterozosity( 0.01, "481b17c5541f758a49f84263e5b0f795" ); } @Test public void testHeterozyosity2() { - testHeterozosity( 1.0 / 1850, "04d970a174dcfaccab58f2943326251d" ); + testHeterozosity( 1.0 / 1850, "70ad4b50a22de917eb91a95ca191eb17" ); } private void testHeterozosity(final double arg, final String md5) { @@ -241,7 +242,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("ba5b511efd3d99575620f14ba2ba259e")); + Arrays.asList("3d20dbf7912e49cdfa929eb04840d351")); executeTest(String.format("test multiple technologies"), spec); } @@ -260,7 +261,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("4c71fbe45faf6e2b7da0eb8ae9dd0c0f")); + Arrays.asList("0b141419428831b598813272cb7af055")); executeTest(String.format("test calling with BAQ"), spec); } @@ -279,7 +280,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("e6c116225319f505d680beeeb2063bf1")); + Arrays.asList("9a54f9f820efa74e5a719e5ca44bc04d")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -294,7 +295,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("d46e1f465c649927fb3c4ec85df35d09")); + Arrays.asList("7f6c4e55b8e77c19199e8ad8b3594280")); executeTest(String.format("test indel caller in SLX with low min allele count"), spec); } @@ -307,7 +308,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("0b9d7998f222e55e82c1a7022d62a508")); + Arrays.asList("7ef98a593945f0269ac2d29982a2a72b")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -317,7 +318,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("9bd02b2c648695138f2645e955bf4d8d")); + Arrays.asList("656185ebade2db034441c787d6a363c1")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -327,7 +328,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("59e874d76e42eafd98ad961eb70706bc")); + Arrays.asList("eb9624642e814a0b8962acc89422be23")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -335,13 +336,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSampleIndels1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("e84f82e12deb9773dae21595b3531a07")); + Arrays.asList("e7b471d2a0eada2c7f37f120f2f1fa88")); List result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("b4df2bf0d820c6fc11fabcafe18bb769")); + Arrays.asList("5c7db047ae9417d37c6bbda1d8ea6019")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -351,7 +352,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + testDir + vcf + " -I " + validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1, - Arrays.asList("95226301a014347efc90e5f750a0db60")); + Arrays.asList("e362dc0488c8ee3013fa636d929db688")); executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec); } @@ -384,7 +385,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.0", 1, - Arrays.asList("a3ea0eea74f2031ebb2ea0edfa14c945")); + Arrays.asList("3c9786453eb59013c70d99ee74f957a9")); executeTest("test minIndelFraction 0.0", spec); } @@ -392,7 +393,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinIndelFraction25() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.25", 1, - Arrays.asList("59c8f66eadd45c56f09291bf64f611e1")); + Arrays.asList("72b82f04dd7f9b9318ef7f8604f8085a")); executeTest("test minIndelFraction 0.25", spec); } @@ -400,7 +401,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinIndelFraction100() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 1", 1, - Arrays.asList("c1911f6ede7b4e8e83209ead66329596")); + Arrays.asList("0e9f485edabbed613e50c699cfa8822f")); executeTest("test minIndelFraction 1.0", spec); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java index b5431c519..f0fb01892 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java @@ -11,7 +11,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { return "-T ReadBackedPhasing" + " -R " + reference + " -I " + validationDataLocation + reads + - " --variant " + validationDataLocation + VCF + + " --variant " + ( VCF.contains("phasing_test") ? testDir : validationDataLocation) + VCF + " --cacheWindowSize " + cacheWindowSize + " --maxPhaseSites " + maxPhaseSites + " --phaseQualityThresh " + phaseQualityThresh + @@ -26,7 +26,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:332341-382503", 1, - Arrays.asList("0a41b96b04a87fdb99bc3342d48d2eba")); + Arrays.asList("442c819569417c1b7d6be9f41ce05394")); executeTest("MAX 10 het sites [TEST ONE]; require PQ >= 10", spec); } @@ -36,7 +36,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:1232503-1332503", 1, - Arrays.asList("f7517896c899a872c24d8e823ac9deae")); + Arrays.asList("2a51ee7d3c024f2410dcee40c5412993")); executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec); } @@ -46,7 +46,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30) + " -L chr20:332341-382503", 1, - Arrays.asList("cdbdd2f68c232012b6fe9a322b0ea24c")); + Arrays.asList("85bc9b03e24159f746dbd0cb988f9ec8")); executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec); } @@ -56,7 +56,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100) + " -L chr20:332341-382503", 1, - Arrays.asList("6b70e3e4e28f9583d35d98bf8a7d0d59")); + Arrays.asList("96bb413a83c777ebbe622438e4565e8f")); executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec); } @@ -66,7 +66,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10) + " -L chr20:332341-482503", 1, - Arrays.asList("6163a1fba27532da77765a7a11c55332")); + Arrays.asList("7d2402f055d243e2208db9ea47973e13")); executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec); } @@ -76,7 +76,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10) + " -L chr20:652810-681757", 1, - Arrays.asList("94f2fc24c3ac1ddbecb2e0bf7ed1597c")); + Arrays.asList("72682b3f27c33580d2d4515653ba6de7")); executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index 356edbb4b..d93b7c4d9 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -271,7 +271,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -knownSites:anyNameABCD,VCF3 " + testDir + "vcfexample3.vcf" + + " -knownSites:anyNameABCD,VCF " + testDir + "vcfexample3.vcf" + " -T CountCovariates" + " -I " + bam + " -knownSites " + b36dbSNP129 + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index 3acf8ea25..239c2fac8 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -133,7 +133,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { " -o %s" + " -tranchesFile " + testDir + "VQSR.mixedTest.tranches" + " -recalFile " + testDir + "VQSR.mixedTest.recal", - Arrays.asList("1370d7701a6231633d43a8062b7aff7f")); + Arrays.asList("beadf841bbf39c3f0d0bc7fb55462b37")); executeTest("testApplyRecalibrationSnpAndIndelTogether", spec); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index dce60e228..b4ff275d8 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -72,7 +72,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { public void combinePLs(String file1, String file2, String md5) { WalkerTestSpec spec = new WalkerTestSpec( - "-T CombineVariants --no_cmdline_in_header -o %s -R " + b36KGReference + " -priority v1,v2 -V:v1 " + validationDataLocation + file1 + " -V:v2 " + validationDataLocation + file2, + "-T CombineVariants --no_cmdline_in_header -o %s -R " + b36KGReference + " -priority v1,v2 -V:v1 " + testDir + file1 + " -V:v2 " + testDir + file2, 1, Arrays.asList(md5)); executeTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); @@ -120,8 +120,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest { String file2 = "combine.2.vcf"; WalkerTestSpec spec = new WalkerTestSpec( "-T CombineVariants --no_cmdline_in_header -o %s -R " + b37KGReference - + " -V:one " + validationDataLocation + file1 - + " -V:two " + validationDataLocation + file2 + args, + + " -V:one " + testDir + file1 + + " -V:two " + testDir + file2 + args, 1, Arrays.asList(md5)); executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java index cb541c895..e44d47438 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java @@ -40,14 +40,14 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + testDir + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, - Arrays.asList("70aeaca5b74cc7ba8e2da7b71ff0fbfd")); + Arrays.asList("b8f4171b0c39954b283dfed4afed87d7")); executeTest("test b36 to hg19", spec); } @Test public void testb36Tohg19UnsortedSamples() { WalkerTestSpec spec = new WalkerTestSpec( - "-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", + "-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + testDir + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, Arrays.asList("07d1bf52125d1f9a25e260e13ec7b010")); executeTest("test b36 to hg19, unsorted samples", spec); @@ -58,7 +58,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T LiftoverVariants -o %s -R " + hg18Reference + " --variant:vcf " + testDir + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, - Arrays.asList("ab2c6254225d7e2ecf52eee604d5673b")); + Arrays.asList("e0b813ff873185ab51995a151f80ec98")); executeTest("test hg18 to hg19, unsorted", spec); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index 1acf33f0b..1288e97d7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -122,7 +122,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp " + testFile + " -o %s --no_cmdline_in_header", 1, - Arrays.asList("9162a67ccb4201c0542f30d14967f2d5") + Arrays.asList("2a0436eecc2bc29fe559e4d1b9e13580") ); executeTest("testUsingDbsnpName--" + testFile, spec); diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index fe2f07a1b..067a8ff9c 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -35,7 +35,7 @@ public class VCFIntegrationTest extends WalkerTest { String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s "; String test1 = baseCommand + "-T SelectVariants -V " + testVCF; - WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("d2604faad0613932453395c54cc68369")); + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("355b029487c3b4c499140d71310ca37e")); executeTest("Test reading and writing breakpoint VCF", spec1); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java index 4e765c4e1..48939ffe8 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java @@ -370,6 +370,35 @@ public class VariantContextTestProvider { GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)), GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1))); } + + + // + // + // TESTING PHASE + // + // + final Genotype gUnphased = new GenotypeBuilder("gUnphased", Arrays.asList(ref, alt1)).make(); + final Genotype gPhased = new GenotypeBuilder("gPhased", Arrays.asList(ref, alt1)).phased(true).make(); + final Genotype gPhased2 = new GenotypeBuilder("gPhased2", Arrays.asList(alt1, alt1)).phased(true).make(); + final Genotype gPhased3 = new GenotypeBuilder("gPhased3", Arrays.asList(ref, ref)).phased(true).make(); + final Genotype haploidNoPhase = new GenotypeBuilder("haploidNoPhase", Arrays.asList(ref)).make(); + addGenotypeTests(site, gUnphased, gPhased); + addGenotypeTests(site, gUnphased, gPhased2); + addGenotypeTests(site, gUnphased, gPhased3); + addGenotypeTests(site, gPhased, gPhased2); + addGenotypeTests(site, gPhased, gPhased3); + addGenotypeTests(site, gPhased2, gPhased3); + addGenotypeTests(site, haploidNoPhase, gPhased); + addGenotypeTests(site, haploidNoPhase, gPhased2); + addGenotypeTests(site, haploidNoPhase, gPhased3); + addGenotypeTests(site, haploidNoPhase, gPhased, gPhased2); + addGenotypeTests(site, haploidNoPhase, gPhased, gPhased3); + addGenotypeTests(site, haploidNoPhase, gPhased2, gPhased3); + addGenotypeTests(site, haploidNoPhase, gPhased, gPhased2, gPhased3); + + final Genotype gUnphasedTet = new GenotypeBuilder("gUnphasedTet", Arrays.asList(ref, alt1, ref, alt1)).make(); + final Genotype gPhasedTet = new GenotypeBuilder("gPhasedTet", Arrays.asList(ref, alt1, alt1, alt1)).phased(true).make(); + addGenotypeTests(site, gUnphasedTet, gPhasedTet); } if ( ENABLE_PL_TESTS ) { @@ -484,8 +513,6 @@ public class VariantContextTestProvider { new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make(), new GenotypeBuilder("g3-xy", Arrays.asList(ref, ref)).filters("X", "Y").make()); } - - // TODO -- test test Integer, Float, Flag, String atomic, vector, and missing types of different lengths per sample } private static Genotype attr(final String name, final Allele ref, final String key, final Object ... value) { @@ -649,7 +676,6 @@ public class VariantContextTestProvider { // filters are the same Assert.assertEquals(actual.getFilters(), expected.getFilters()); Assert.assertEquals(actual.isFiltered(), expected.isFiltered()); - Assert.assertEquals(actual.filtersWereApplied(), expected.filtersWereApplied()); // inline attributes Assert.assertEquals(actual.getDP(), expected.getDP());