From 71b47a6148778b4ff0067ff6fba089702e8a11fc Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 5 Feb 2014 22:01:12 -0500 Subject: [PATCH 1/3] Rename CombineReferenceCalculationVariants to GenotypeGVCFs --- ...ReferenceCalculationVariants.java => GenotypeGVCFs.java} | 6 +++--- ...tegrationTest.java => GenotypeGVCFsIntegrationTest.java} | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) rename protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/{CombineReferenceCalculationVariants.java => GenotypeGVCFs.java} (98%) rename protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/{CombineReferenceCalculationVariantsIntegrationTest.java => GenotypeGVCFsIntegrationTest.java} (97%) diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineReferenceCalculationVariants.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFs.java similarity index 98% rename from protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineReferenceCalculationVariants.java rename to protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFs.java index c702b3548..07072c4c2 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineReferenceCalculationVariants.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFs.java @@ -79,7 +79,7 @@ import java.util.*; * Combines gVCF records that were produced by the Haplotype Caller from single sample sources. * *

- * CombineReferenceCalculationVariants combines gVCF records that were produced as part of the "single sample discovery" + * GenotypeGVCFs combines gVCF records that were produced as part of the "single sample discovery" * pipeline using the '-ERC GVCF' mode of the Haplotype Caller. This tools performs the multi-sample joint aggregation * step and merges the records together in a sophisticated manner. * @@ -101,7 +101,7 @@ import java.util.*; *

  * java -Xmx2g -jar GenomeAnalysisTK.jar \
  *   -R ref.fasta \
- *   -T CombineReferenceCalculationVariants \
+ *   -T GenotypeGVCFs \
  *   --variant input1.vcf \
  *   --variant input2.vcf \
  *   -o output.vcf
@@ -110,7 +110,7 @@ import java.util.*;
  */
 @DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
 @Reference(window=@Window(start=-10,stop=10))
-public class CombineReferenceCalculationVariants extends RodWalker implements AnnotatorCompatible, TreeReducible {
+public class GenotypeGVCFs extends RodWalker implements AnnotatorCompatible, TreeReducible {
 
     /**
      * The VCF files to merge together
diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineReferenceCalculationVariantsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
similarity index 97%
rename from protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineReferenceCalculationVariantsIntegrationTest.java
rename to protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
index 4a34793ee..f3a56355e 100644
--- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineReferenceCalculationVariantsIntegrationTest.java
+++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java
@@ -51,10 +51,10 @@ import org.testng.annotations.Test;
 
 import java.util.Arrays;
 
-public class CombineReferenceCalculationVariantsIntegrationTest extends WalkerTest {
+public class GenotypeGVCFsIntegrationTest extends WalkerTest {
 
     private static String baseTestString(String args, String ref) {
-        return "-T CombineReferenceCalculationVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -R " + ref + args;
+        return "-T GenotypeGVCFs --no_cmdline_in_header -L 1:1-50,000,000 -o %s -R " + ref + args;
     }
 
     @Test(enabled = true)

From 2648219c42fca650131da7c18d9aa78086f26290 Mon Sep 17 00:00:00 2001
From: Eric Banks 
Date: Thu, 6 Feb 2014 01:32:24 -0500
Subject: [PATCH 2/3] Implementation of a hierarchical merger for gVCFs, called
 CombineGVCFs.

This tool will take any number of gVCFs and create a merged gVCF (as opposed to
GenotypeGVCFs which produces a standard VCF).

Added unit/integration tests and fixed up GATK docs.
---
 .../walkers/variantutils/CombineGVCFs.java    | 283 ++++++++++++++++++
 .../walkers/variantutils/GenotypeGVCFs.java   |  22 +-
 .../CombineGVCFsIntegrationTest.java          | 168 +++++++++++
 .../GenotypeGVCFsIntegrationTest.java         |   2 +-
 .../variant/GATKVariantContextUtils.java      |  26 +-
 .../GATKVariantContextUtilsUnitTest.java      |   2 +-
 6 files changed, 484 insertions(+), 19 deletions(-)
 create mode 100644 protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java
 create mode 100644 protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java

diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java
new file mode 100644
index 000000000..a4c64fa39
--- /dev/null
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java
@@ -0,0 +1,283 @@
+/*
+*  By downloading the PROGRAM you agree to the following terms of use:
+*
+*  BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
+*
+*  This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
+*
+*  WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
+*  WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
+*  NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
+*
+*  1. DEFINITIONS
+*  1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
+*
+*  2. LICENSE
+*  2.1   Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
+*  The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only.  For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
+*  2.2  No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD.  LICENSEE shall ensure that all of its users agree to the terms of this Agreement.  LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
+*  2.3  License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
+*
+*  3. OWNERSHIP OF INTELLECTUAL PROPERTY
+*  LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies.  LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
+*  Copyright 2012 Broad Institute, Inc.
+*  Notice of attribution:  The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
+*  LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
+*
+*  4. INDEMNIFICATION
+*  LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
+*
+*  5. NO REPRESENTATIONS OR WARRANTIES
+*  THE PROGRAM IS DELIVERED AS IS.  BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
+*  IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
+*
+*  6. ASSIGNMENT
+*  This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
+*
+*  7. MISCELLANEOUS
+*  7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
+*  7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
+*  7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
+*  7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested.  All notices under this Agreement shall be deemed effective upon receipt.
+*  7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
+*  7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
+*  7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
+*/
+
+package org.broadinstitute.sting.gatk.walkers.variantutils;
+
+import org.broadinstitute.sting.commandline.*;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.Reference;
+import org.broadinstitute.sting.gatk.walkers.RodWalker;
+import org.broadinstitute.sting.gatk.walkers.Window;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.SampleUtils;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
+import org.broadinstitute.sting.utils.help.HelpConstants;
+import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
+import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
+import org.broadinstitute.variant.variantcontext.*;
+import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
+import org.broadinstitute.variant.vcf.*;
+
+import java.util.*;
+
+/**
+ * Combines any number of gVCF files that were produced by the Haplotype Caller into a single joint gVCF file.
+ *
+ * 

+ * CombineGVCFs is meant to be used for hierarchical merging of gVCFs that will eventually be input into GenotypeGVCFs. + * One would use this tool when needing to genotype too large a number of individual gVCFs; instead of passing them + * all in to GenotypeGVCFs, one would first use CombineGVCFs on smaller batches of samples and then pass these combined + * gVCFs to GenotypeGVCFs. + * + * Note that this tool cannot work with just any gVCF files - they must have been produced with the Haplotype Caller + * as part of the "single sample discovery" pipeline using the '-ERC GVCF' mode, which uses a sophisticated reference + * model to produce accurate genotype likelihoods for every position in the target. + * + *

Input

+ *

+ * One or more Haplotype Caller gVCFs to combine. + *

+ * + *

Output

+ *

+ * A combined VCF. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T CombineGVCFs \
+ *   --variant gvcf1.vcf \
+ *   --variant gvcf2.vcf \
+ *   -o mergeGvcf.vcf
+ * 
+ * + */ +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} ) +@Reference(window=@Window(start=0,stop=1)) +public class CombineGVCFs extends RodWalker> { + + protected final class PositionalState { + final List VCs; + final byte[] refBases; + final GenomeLoc loc; + public PositionalState(final List VCs, final byte[] refBases, final GenomeLoc loc) { + this.VCs = VCs; + this.refBases = refBases; + this.loc = loc; + } + } + + /** + * The gVCF files to merge together + */ + @Input(fullName="variant", shortName = "V", doc="One or more input gVCF files", required=true) + public List> variantCollections; + final private List> variants = new ArrayList<>(); + + @Output(doc="File to which the combined gVCF should be written") + protected VariantContextWriter vcfWriter = null; + + private GenomeLocParser genomeLocParser; + + public void initialize() { + // take care of the VCF headers + final Map vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit()); + final Set headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true); + + final Set samples = SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); + final VCFHeader vcfHeader = new VCFHeader(headerLines, samples); + vcfWriter.writeHeader(vcfHeader); + + // collect the actual rod bindings into a list for use later + for ( final RodBindingCollection variantCollection : variantCollections ) + variants.addAll(variantCollection.getRodBindings()); + + genomeLocParser = getToolkit().getGenomeLocParser(); + } + + public PositionalState map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { + if ( tracker == null ) // RodWalkers can make funky map calls + return null; + + final GenomeLoc loc = ref.getLocus(); + return new PositionalState(tracker.getValues(variants, loc), ref.getBases(), loc); + } + + public LinkedList reduceInit() { + return new LinkedList<>(); + } + + public LinkedList reduce(final PositionalState startingStates, final LinkedList previousState) { + if ( startingStates == null ) + return previousState; + + final int currentPos = startingStates.loc.getStart(); + + if ( !startingStates.VCs.isEmpty() ) { + endPreviousStates(previousState, currentPos - 1, startingStates.refBases[0]); + previousState.addAll(startingStates.VCs); + } + + if ( containsEndingContext(previousState, currentPos) ) { + endPreviousStates(previousState, currentPos, startingStates.refBases.length > 1 ? startingStates.refBases[1] : (byte)'N'); + } + + return previousState; + } + + /** + * Does the given list of VariantContexts contain any whose context ends at the given position? + * + * @param VCs list of VariantContexts + * @param pos the position to check against + * @return true if there are one or more VCs that end at pos, false otherwise + */ + private boolean containsEndingContext(final List VCs, final int pos) { + if ( VCs == null ) throw new IllegalArgumentException("The list of VariantContexts cannot be null"); + + for ( final VariantContext vc : VCs ) { + if ( vc.getEnd() == pos ) + return true; + } + return false; + } + + /** + * Disrupt the VariantContexts so that they all stop at the given pos, write them out, and put the remainder back in the list. + * + * @param VCs list of VariantContexts + * @param pos the target ending position + * @param refBase the reference base to use at the position AFTER pos + */ + private void endPreviousStates(final LinkedList VCs, final int pos, final byte refBase) { + if ( VCs == null ) throw new IllegalArgumentException("The list of VariantContexts cannot be null"); + + final List stoppedVCs = new ArrayList<>(VCs.size()); + + for ( int i = VCs.size() - 1; i >= 0; i-- ) { + final VariantContext vc = VCs.get(i); + if ( vc.getStart() > pos ) + continue; + + // if it was ending anyways, then just remove it as is; + // note that for the purposes of this method, deletions are considered to be single base events (as opposed + // to ref blocks), hence the check for the number of alleles (because we know there will always be a allele) + if ( vc.getNAlleles() > 2 || vc.getEnd() == pos ) { + stoppedVCs.add(vc); + VCs.remove(i); + } + // otherwise we need to split it into two pieces + else { + // the first half + final Map attrs = new HashMap<>(vc.getAttributes()); + if ( attrs.containsKey(VCFConstants.END_KEY) ) + attrs.put(VCFConstants.END_KEY, Integer.toString(pos)); + stoppedVCs.add(new VariantContextBuilder(vc).stop(pos).attributes(attrs).make()); + + // the second half + final Allele refAllele = Allele.create(refBase, true); + final List alleles = new ArrayList<>(); + alleles.add(refAllele); + alleles.addAll(vc.getAlternateAlleles()); + final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples()); + for ( final Genotype g : vc.getGenotypes() ) + genotypes.add(new GenotypeBuilder(g).alleles(Arrays.asList(refAllele, refAllele)).make()); + VCs.set(i, new VariantContextBuilder(vc).start(pos + 1).alleles(alleles).genotypes(genotypes).make()); + } + } + + if ( !stoppedVCs.isEmpty() ) { + final VariantContext mergedVC = mergeVCs(stoppedVCs); + vcfWriter.add(mergedVC); + } + } + + /** + * Combine (and re-annotate) a list of VariantContexts + * + * @param VCs the VariantContexts to merge + * @return a new VariantContext + */ + private VariantContext mergeVCs(final List VCs) { + // we need the specialized merge if the site contains anything other than ref blocks + if ( containsTrueAltAllele(VCs) ) + return GATKVariantContextUtils.referenceConfidenceMerge(VCs, genomeLocParser.createGenomeLoc(VCs.get(0)), null, false); + + // otherwise we can drop down to the generic simple merge + return GATKVariantContextUtils.simpleMerge(VCs, null, VCs.size(), + GATKVariantContextUtils.FilteredRecordMergeType.KEEP_UNCONDITIONAL, + GATKVariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false); + } + + /** + * Does the given list of VariantContexts contain any with an alternate allele other than ? + * + * @param VCs list of VariantContexts + * @return true if there are one or more VCs that contain a true alternate allele, false otherwise + */ + private boolean containsTrueAltAllele(final List VCs) { + if ( VCs == null ) throw new IllegalArgumentException("The list of VariantContexts cannot be null"); + + for ( final VariantContext vc : VCs ) { + if ( vc.getNAlleles() > 2 ) + return true; + } + return false; + } + + @Override + public void onTraversalDone(final LinkedList state) { + // there shouldn't be any state left unless the user cut in the middle of a gVCF block + if ( !state.isEmpty() ) + logger.warn("You have asked for an interval that cuts in the middle of one or more gVCF blocks. Please note that this will cause you to lose records that don't end within your interval."); + } +} diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFs.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFs.java index 07072c4c2..4620481af 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFs.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFs.java @@ -76,25 +76,27 @@ import org.broadinstitute.variant.vcf.*; import java.util.*; /** - * Combines gVCF records that were produced by the Haplotype Caller from single sample sources. + * Genotypes any number of gVCF files that were produced by the Haplotype Caller into a single joint VCF file. * *

- * GenotypeGVCFs combines gVCF records that were produced as part of the "single sample discovery" - * pipeline using the '-ERC GVCF' mode of the Haplotype Caller. This tools performs the multi-sample joint aggregation + * GenotypeGVCFs merges gVCF records that were produced as part of the "single sample discovery" pipeline using + * the '-ERC GVCF' mode of the Haplotype Caller. This tool performs the multi-sample joint aggregation * step and merges the records together in a sophisticated manner. * * At all positions of the target, this tool will combine all spanning records, produce correct genotype likelihoods, * re-genotype the newly merged record, and then re-annotate it. * + * Note that this tool cannot work with just any gVCF files - they must have been produced with the Haplotype Caller, + * which uses a sophisticated reference model to produce accurate genotype likelihoods for every position in the target. * *

Input

*

- * One or more Haplotype Caller gVCFs to combine. + * One or more Haplotype Caller gVCFs to genotype. *

* *

Output

*

- * A combined VCF. + * A combined, genotyped VCF. *

* *

Examples

@@ -102,8 +104,8 @@ import java.util.*; * java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ * -T GenotypeGVCFs \ - * --variant input1.vcf \ - * --variant input2.vcf \ + * --variant gvcf1.vcf \ + * --variant gvcf2.vcf \ * -o output.vcf *
* @@ -113,9 +115,9 @@ import java.util.*; public class GenotypeGVCFs extends RodWalker implements AnnotatorCompatible, TreeReducible { /** - * The VCF files to merge together + * The gVCF files to merge together */ - @Input(fullName="variant", shortName = "V", doc="One or more input VCF files", required=true) + @Input(fullName="variant", shortName = "V", doc="One or more input gVCF files", required=true) public List> variantCollections; final private List> variants = new ArrayList<>(); @@ -183,7 +185,7 @@ public class GenotypeGVCFs extends RodWalker allVCs = GATKVCFUtils.readVCF(gVCF).getSecond(); + + Assert.assertEquals(allVCs.size(), 2, "Observed: " + allVCs); + + final VariantContext first = allVCs.get(0); + Assert.assertEquals(first.getStart(), 69491); + Assert.assertEquals(first.getEnd(), 69497); + Assert.assertEquals(first.getGenotypes().size(), 2); + Assert.assertTrue(first.getGenotype("NA1").isCalled()); + Assert.assertTrue(first.getGenotype("NA2").isNoCall()); + + final VariantContext second = allVCs.get(1); + Assert.assertEquals(second.getStart(), 69498); + Assert.assertEquals(second.getEnd(), 69506); + Assert.assertEquals(second.getGenotypes().size(), 2); + Assert.assertTrue(second.getGenotype("NA1").isCalled()); + Assert.assertTrue(second.getGenotype("NA2").isCalled()); + } + + @Test + public void testTwoSpansManyBlocksInOne() throws Exception { + final String cmd = baseTestString(" -L 1:69512-69634"); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("")); + spec.disableShadowBCF(); + final File gVCF = executeTest("testTwoSpansManyBlocksInOne", spec).first.get(0); + final List allVCs = GATKVCFUtils.readVCF(gVCF).getSecond(); + + Assert.assertEquals(allVCs.size(), 5); + } + + @Test + public void testOneHasAltAndTwoHasNothing() throws Exception { + final String cmd = baseTestString(" -L 1:69511"); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("")); + spec.disableShadowBCF(); + final File gVCF = executeTest("testOneHasAltAndTwoHasNothing", spec).first.get(0); + final List allVCs = GATKVCFUtils.readVCF(gVCF).getSecond(); + + Assert.assertEquals(allVCs.size(), 1); + + final VariantContext first = allVCs.get(0); + Assert.assertEquals(first.getStart(), 69511); + Assert.assertEquals(first.getEnd(), 69511); + Assert.assertEquals(first.getGenotypes().size(), 2); + Assert.assertTrue(first.getGenotype("NA1").isCalled()); + Assert.assertTrue(first.getGenotype("NA2").isNoCall()); + } + + @Test + public void testOneHasAltAndTwoHasRefBlock() throws Exception { + final String cmd = baseTestString(" -L 1:69635"); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("")); + spec.disableShadowBCF(); + final File gVCF = executeTest("testOneHasAltAndTwoHasRefBlock", spec).first.get(0); + final List allVCs = GATKVCFUtils.readVCF(gVCF).getSecond(); + + Assert.assertEquals(allVCs.size(), 1); + + final VariantContext first = allVCs.get(0); + Assert.assertEquals(first.getStart(), 69635); + Assert.assertEquals(first.getEnd(), 69635); + Assert.assertEquals(first.getNAlleles(), 3); + Assert.assertEquals(first.getGenotypes().size(), 2); + Assert.assertTrue(first.getGenotype("NA1").isHet()); + } + + @Test + public void testOneHasDeletionAndTwoHasRefBlock() throws Exception { + final String cmd = baseTestString(" -L 1:69772-69783"); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("")); + spec.disableShadowBCF(); + final File gVCF = executeTest("testOneHasDeletionAndTwoHasRefBlock", spec).first.get(0); + final List allVCs = GATKVCFUtils.readVCF(gVCF).getSecond(); + + Assert.assertEquals(allVCs.size(), 2); + + final VariantContext first = allVCs.get(0); + Assert.assertEquals(first.getStart(), 69772); + Assert.assertEquals(first.getEnd(), 69776); + Assert.assertEquals(first.getNAlleles(), 3); + Assert.assertEquals(first.getGenotypes().size(), 2); + Assert.assertTrue(first.getGenotype("NA1").isHet()); + + final VariantContext second = allVCs.get(1); + Assert.assertEquals(second.getStart(), 69773); + Assert.assertEquals(second.getEnd(), 69783); + Assert.assertEquals(second.getGenotypes().size(), 2); + Assert.assertTrue(second.getGenotype("NA1").isHomRef()); + } + + @Test + public void testMD5s() throws Exception { + final String cmd = baseTestString(" -L 1:69485-69791"); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("d90227fd360761d9534b1080b17159dd")); + spec.disableShadowBCF(); + executeTest("testMD5s", spec); + } +} diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java index f3a56355e..0478f3377 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java @@ -65,7 +65,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" + " -L 20:10,000,000-20,000,000", b37KGReference), 1, - Arrays.asList("66e3b512de9de64b03c708386736cc2f")); + Arrays.asList("10670f6f04d3d662aa38c20ac74af35c")); executeTest("combineSingleSamplePipelineGVCF", spec); } diff --git a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java index 5853ae0ef..89fb2f738 100644 --- a/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java +++ b/public/gatk-framework/src/main/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java @@ -1041,9 +1041,10 @@ public class GATKVariantContextUtils { * @param VCs collection of unsorted genomic VCs * @param loc the current location * @param refBase the reference allele to use if all contexts in the VC are spanning (i.e. don't start at the location in loc); if null, we'll return null in this case + * @param removeNonRefSymbolicAllele if true, remove the allele from the merged VC * @return new VariantContext representing the merge of all VCs or null if it not relevant */ - public static VariantContext referenceConfidenceMerge(final List VCs, final GenomeLoc loc, final Byte refBase) { + public static VariantContext referenceConfidenceMerge(final List VCs, final GenomeLoc loc, final Byte refBase, final boolean removeNonRefSymbolicAllele) { // this can happen if e.g. you are using a dbSNP file that spans a region with no gVCFs if ( VCs == null || VCs.size() == 0 ) return null; @@ -1059,6 +1060,9 @@ public class GATKVariantContextUtils { // alt alleles final AlleleMapper alleleMapper = determineAlternateAlleleMapping(VCs, refAllele, loc); + // the allele list will not include the symbolic allele, so add it if needed + if ( !removeNonRefSymbolicAllele ) + alleleMapper.map.put(NON_REF_SYMBOLIC_ALLELE, NON_REF_SYMBOLIC_ALLELE); final List alleles = getAllelesListFromMapper(refAllele, alleleMapper); final Map attributes = new LinkedHashMap<>(); @@ -1079,6 +1083,8 @@ public class GATKVariantContextUtils { // special case DP (add it up) for all events if ( vc.hasAttribute(VCFConstants.DEPTH_KEY) ) depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0); + else if ( vc.getNSamples() == 1 && vc.getGenotype(0).hasExtendedAttribute("MIN_DP") ) // handle the gVCF case from the HaplotypeCaller + depth += vc.getGenotype(0).getAttributeAsInt("MIN_DP", 0); if ( isSpanningEvent ) continue; @@ -1106,7 +1112,7 @@ public class GATKVariantContextUtils { final String ID = rsIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(",", rsIDs); final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(ID).alleles(alleles) - .chr(loc.getContig()).start(loc.getStart()).computeEndFromAlleles(alleles, loc.getStart()) + .chr(loc.getContig()).start(loc.getStart()).computeEndFromAlleles(alleles, loc.getStart(), loc.getStart()) .genotypes(genotypes).unfiltered().attributes(new TreeMap<>(attributes)).log10PError(CommonInfo.NO_LOG10_PERROR); // we will need to regenotype later return builder.make(); @@ -1500,17 +1506,23 @@ public class GATKVariantContextUtils { final List remappedAlleles, final List targetAlleles) { for ( final Genotype g : VC.getGenotypes() ) { - if ( !g.hasPL() ) - throw new UserException("cannot merge genotypes from samples without PLs; sample " + g.getSampleName() + " does not have likelihoods at position " + VC.getChr() + ":" + VC.getStart()); - // only add if the name is new final String name = g.getSampleName(); if ( !mergedGenotypes.containsSample(name) ) { + + if ( !g.hasPL() ) { + if ( g.isNoCall() ) { + mergedGenotypes.add(g); + continue; + } + throw new UserException("cannot merge genotypes from samples without PLs; sample " + g.getSampleName() + " does not have likelihoods at position " + VC.getChr() + ":" + VC.getStart()); + } + // we need to modify it even if it already contains all of the alleles because we need to purge the PLs out anyways final int[] indexesOfRelevantAlleles = getIndexesOfRelevantAlleles(remappedAlleles, targetAlleles, VC.getStart()); final int[] PLs = generatePLs(g, indexesOfRelevantAlleles); - // note that we set the alleles to null here (as we expect it to be re-genotyped) - final Genotype newG = new GenotypeBuilder(g).name(name).alleles(null).PL(PLs).noAD().noGQ().make(); + + final Genotype newG = new GenotypeBuilder(g).name(name).alleles(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).PL(PLs).noAD().noGQ().make(); mergedGenotypes.add(newG); } } diff --git a/public/gatk-framework/src/test/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java b/public/gatk-framework/src/test/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java index ab81352e2..ae77b840a 100644 --- a/public/gatk-framework/src/test/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java +++ b/public/gatk-framework/src/test/java/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java @@ -1614,7 +1614,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { @Test(dataProvider = "referenceConfidenceMergeData") public void testReferenceConfidenceMerge(final List toMerge, final GenomeLoc loc, final boolean returnSiteEvenIfMonomorphic, final VariantContext expectedResult) { - final VariantContext result = GATKVariantContextUtils.referenceConfidenceMerge(toMerge, loc, returnSiteEvenIfMonomorphic ? (byte)'A' : null); + final VariantContext result = GATKVariantContextUtils.referenceConfidenceMerge(toMerge, loc, returnSiteEvenIfMonomorphic ? (byte)'A' : null, true); if ( result == null ) { Assert.assertTrue(expectedResult == null); return; From eb463b505dbe5559e8a8da9a870ed096b23ef769 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 6 Feb 2014 16:17:15 -0500 Subject: [PATCH 3/3] Remove a whole bunch of unused annotations from gVCF output. AC,AF,AN,FS,QD - they'll all be recomputed later. BLOCK_SIZE and MIN_GQ were not necessary. I also made the StrandBiasBySample annotation forced on when in gVCF mode. It turns out that its output wasn't compatible with BCF so I patched it (and the variant jar too). --- .../sting/gatk/walkers/annotator/FisherStrand.java | 14 +++++++------- .../gatk/walkers/annotator/StrandBiasBySample.java | 3 +-- .../walkers/haplotypecaller/HaplotypeCaller.java | 8 ++++++++ .../sting/utils/gvcf/GVCFWriter.java | 14 ++++++++++---- .../HaplotypeCallerGVCFIntegrationTest.java | 4 ++-- .../variantutils/CombineGVCFsIntegrationTest.java | 7 +------ 6 files changed, 29 insertions(+), 21 deletions(-) diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 95be967a2..f3785d63a 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -195,15 +195,15 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat * @param table the table used by the FisherStrand annotation * @return the array used by the per-sample Strand Bias annotation */ - public static int[] getContingencyArray( final int[][] table ) { + public static List getContingencyArray( final int[][] table ) { if(table.length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); } if(table[0].length != 2) { throw new IllegalArgumentException("Expecting a 2x2 strand bias table."); } - final int[] array = new int[4]; // TODO - if we ever want to do something clever with multi-allelic sites this will need to change - array[0] = table[0][0]; - array[1] = table[0][1]; - array[2] = table[1][0]; - array[3] = table[1][1]; - return array; + final List list = new ArrayList<>(4); // TODO - if we ever want to do something clever with multi-allelic sites this will need to change + list.add(table[0][0]); + list.add(table[0][1]); + list.add(table[1][0]); + list.add(table[1][1]); + return list; } /** diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandBiasBySample.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandBiasBySample.java index 4b1e48a36..ec1c1e729 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandBiasBySample.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandBiasBySample.java @@ -50,7 +50,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.variant.variantcontext.Genotype; @@ -67,7 +66,7 @@ import java.util.*; * Date: 8/28/13 */ -public class StrandBiasBySample extends GenotypeAnnotation implements ExperimentalAnnotation { +public class StrandBiasBySample extends GenotypeAnnotation { public final static String STRAND_BIAS_BY_SAMPLE_KEY_NAME = "SB"; diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 6a7c60825..2db37fc03 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -549,6 +549,14 @@ public class HaplotypeCaller extends ActiveRegionWalker, In SCAC.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0; SCAC.STANDARD_CONFIDENCE_FOR_CALLING = -0.0; logger.info("Standard Emitting and Calling confidence set to 0.0 for gVCF output"); + + // also, we don't need to output several of the annotations + annotationsToExclude.add("ChromosomeCounts"); + annotationsToExclude.add("FisherStrand"); + annotationsToExclude.add("QualByDepth"); + + // but we definitely want certain other ones + annotationsToUse.add("StrandBiasBySample"); } if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY ) diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/gvcf/GVCFWriter.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/gvcf/GVCFWriter.java index 4eabded4b..aa269779b 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/gvcf/GVCFWriter.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/gvcf/GVCFWriter.java @@ -145,9 +145,11 @@ public class GVCFWriter implements VariantContextWriter { public void writeHeader(VCFHeader header) { if ( header == null ) throw new IllegalArgumentException("header cannot be null"); header.addMetaDataLine(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY)); - header.addMetaDataLine(new VCFInfoHeaderLine(BLOCK_SIZE_INFO_FIELD, 1, VCFHeaderLineType.Integer, "Size of the homozygous reference GVCF block")); header.addMetaDataLine(new VCFFormatHeaderLine(MIN_DP_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum DP observed within the GVCF block")); - header.addMetaDataLine(new VCFFormatHeaderLine(MIN_GQ_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum GQ observed within the GVCF block")); + + // These annotations are no longer standard + //header.addMetaDataLine(new VCFInfoHeaderLine(BLOCK_SIZE_INFO_FIELD, 1, VCFHeaderLineType.Integer, "Size of the homozygous reference GVCF block")); + //header.addMetaDataLine(new VCFFormatHeaderLine(MIN_GQ_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum GQ observed within the GVCF block")); for ( final HomRefBlock partition : GQPartitions ) { header.addMetaDataLine(partition.toVCFHeaderLine()); @@ -225,7 +227,9 @@ public class GVCFWriter implements VariantContextWriter { vcb.attributes(new HashMap(2)); // clear the attributes vcb.stop(block.getStop()); vcb.attribute(VCFConstants.END_KEY, block.getStop()); - vcb.attribute(BLOCK_SIZE_INFO_FIELD, block.getSize()); + + // This annotation is no longer standard + //vcb.attribute(BLOCK_SIZE_INFO_FIELD, block.getSize()); // create the single Genotype with GQ and DP annotations final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Collections.nCopies(2, block.getRef())); @@ -233,9 +237,11 @@ public class GVCFWriter implements VariantContextWriter { gb.GQ(block.getMedianGQ()); gb.DP(block.getMedianDP()); gb.attribute(MIN_DP_FORMAT_FIELD, block.getMinDP()); - gb.attribute(MIN_GQ_FORMAT_FIELD, block.getMinGQ()); gb.PL(block.getMinPLs()); + // This annotation is no longer standard + //gb.attribute(MIN_GQ_FORMAT_FIELD, block.getMinGQ()); + return vcb.genotypes(gb.make()).make(); } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index c6229cd89..b533d391f 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -67,10 +67,10 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { // this functionality can be adapted to provide input data for whatever you might want in your data tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "53aa13711a1ceec1453f21c705723f04"}); tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "7735be71f57e62929947c289cd48bb9c"}); - tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "f0a761c310519133ed4f3ad465d986fc"}); + tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "1b5697be7ae90723368677d4d66a440a"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "aa7c0e3bec4ac307068f85f58d48625f"}); - tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "cf2167a563f86af4df26733e2aa6ced6"}); + tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "83ddc16e4f0900429b2da30e582994aa"}); return tests.toArray(new Object[][]{}); } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java index 10d1f206f..04582686d 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java @@ -112,8 +112,6 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { Assert.assertEquals(first.getStart(), 69511); Assert.assertEquals(first.getEnd(), 69511); Assert.assertEquals(first.getGenotypes().size(), 2); - Assert.assertTrue(first.getGenotype("NA1").isCalled()); - Assert.assertTrue(first.getGenotype("NA2").isNoCall()); } @Test @@ -131,7 +129,6 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { Assert.assertEquals(first.getEnd(), 69635); Assert.assertEquals(first.getNAlleles(), 3); Assert.assertEquals(first.getGenotypes().size(), 2); - Assert.assertTrue(first.getGenotype("NA1").isHet()); } @Test @@ -149,19 +146,17 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { Assert.assertEquals(first.getEnd(), 69776); Assert.assertEquals(first.getNAlleles(), 3); Assert.assertEquals(first.getGenotypes().size(), 2); - Assert.assertTrue(first.getGenotype("NA1").isHet()); final VariantContext second = allVCs.get(1); Assert.assertEquals(second.getStart(), 69773); Assert.assertEquals(second.getEnd(), 69783); Assert.assertEquals(second.getGenotypes().size(), 2); - Assert.assertTrue(second.getGenotype("NA1").isHomRef()); } @Test public void testMD5s() throws Exception { final String cmd = baseTestString(" -L 1:69485-69791"); - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("d90227fd360761d9534b1080b17159dd")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("ad4916ff9ab1479845558ddaaae131a6")); spec.disableShadowBCF(); executeTest("testMD5s", spec); }