From 7506994d09c4dfb4a66fb23b76f8d8a1aba0714b Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 11 Jun 2012 12:43:11 -0400 Subject: [PATCH] Nearing final BCF commit -- Cleanup some (but not all) VCF3 files. Turns out there are lots so... -- Refactored gneotype parser from VCFCodec and VCF3Codec into a single shared version in AbstractVCFCodec. Now VCF3 properly handles the new GenotypeBuilder interface -- Misc. bugfixes in GenotypeBuilder --- .../utils/codecs/vcf/AbstractVCFCodec.java | 124 ++++++++++++++++-- .../sting/utils/codecs/vcf/VCF3Codec.java | 111 ++++------------ .../sting/utils/codecs/vcf/VCFCodec.java | 103 --------------- .../utils/variantcontext/GenotypeBuilder.java | 2 +- .../commandline/ParsingEngineUnitTest.java | 17 --- .../gatk/EngineFeaturesIntegrationTest.java | 4 - .../VariantEvalIntegrationTest.java | 2 +- .../LiftoverVariantsIntegrationTest.java | 4 +- .../SelectVariantsIntegrationTest.java | 4 +- .../VCFStreamingIntegrationTest.java | 10 +- 10 files changed, 147 insertions(+), 234 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 43cc5de14..026b2ea15 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -28,6 +28,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec // we have to store the list of strings that make up the header until they're needed protected VCFHeader header = null; + protected VCFHeaderVersion version = null; // a mapping of the allele protected Map> alleleMap = new HashMap>(3); @@ -91,18 +92,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec */ public abstract Object readHeader(LineReader reader); - /** - * create a genotype map - * - * @param str the string - * @param alleles the list of alleles - * @param chr chrom - * @param pos position - * @return a mapping of sample name to genotype object - */ - public abstract LazyGenotypesContext.LazyData createGenotypeMap(String str, List alleles, String chr, int pos); - - /** * parse the filter string, first checking to see if we already have parsed it in a previous attempt * @param filterString the string to parse @@ -117,6 +106,8 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec * @return a VCFHeader object */ protected VCFHeader parseHeaderFromLines( final List headerStrings, final VCFHeaderVersion version ) { + this.version = version; + Set metaData = new TreeSet(); Set sampleNames = new LinkedHashSet(); int contigCounter = 0; @@ -721,4 +712,113 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec try { stream.close(); } catch ( IOException e ) {} } } + + + /** + * create a genotype map + * + * @param str the string + * @param alleles the list of alleles + * @return a mapping of sample name to genotype object + */ + public LazyGenotypesContext.LazyData createGenotypeMap(final String str, + final List alleles, + final String chr, + final int pos) { + if (genotypeParts == null) + genotypeParts = new String[header.getColumnCount() - NUM_STANDARD_FIELDS]; + + int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR); + if ( nParts != genotypeParts.length ) + generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records", lineNo); + + ArrayList genotypes = new ArrayList(nParts); + + // get the format keys + int nGTKeys = ParsingUtils.split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR); + + // cycle through the sample names + Iterator sampleNameIterator = header.getGenotypeSamples().iterator(); + + // clear out our allele mapping + alleleMap.clear(); + + // cycle through the genotype strings + for (int genotypeOffset = 1; genotypeOffset < nParts; genotypeOffset++) { + int GTValueSplitSize = ParsingUtils.split(genotypeParts[genotypeOffset], GTValueArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR); + + final String sampleName = sampleNameIterator.next(); + final GenotypeBuilder gb = new GenotypeBuilder(sampleName); + + // check to see if the value list is longer than the key list, which is a problem + if (nGTKeys < GTValueSplitSize) + generateException("There are too many keys for the sample " + sampleName + ", keys = " + parts[8] + ", values = " + parts[genotypeOffset]); + + int genotypeAlleleLocation = -1; + if (nGTKeys >= 1) { + gb.maxAttributes(nGTKeys - 1); + + for (int i = 0; i < nGTKeys; i++) { + final String gtKey = genotypeKeyArray[i]; + boolean missing = i >= GTValueSplitSize; + + // todo -- all of these on the fly parsing of the missing value should be static constants + if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) { + genotypeAlleleLocation = i; + } else if (gtKey.equals(VCFConstants.GENOTYPE_FILTER_KEY)) { + final List filters = parseFilters(getCachedString(GTValueArray[i])); + if ( filters != null ) gb.filters(filters); + } else if ( missing ) { + // if its truly missing (there no provided value) skip adding it to the attributes + } else if ( GTValueArray[i].equals(VCFConstants.MISSING_VALUE_v4) ) { + // don't add missing values to the map + } else { + if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) { + if ( GTValueArray[i].equals(VCFConstants.MISSING_GENOTYPE_QUALITY_v3) ) + gb.noGQ(); + else + gb.GQ((int)Math.round(Double.valueOf(GTValueArray[i]))); + } else if (gtKey.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) { + gb.AD(decodeInts(GTValueArray[i])); + } else if (gtKey.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) { + gb.PL(decodeInts(GTValueArray[i])); + } else if (gtKey.equals(VCFConstants.DEPTH_KEY)) { + gb.DP(Integer.valueOf(GTValueArray[i])); + } else { + gb.attribute(gtKey, GTValueArray[i]); + } + } + } + } + + // check to make sure we found a genotype field if our version is less than 4.1 file + if ( version != VCFHeaderVersion.VCF4_1 && genotypeAlleleLocation == -1 ) + generateException("Unable to find the GT field for the record; the GT field is required in VCF4.0"); + if ( genotypeAlleleLocation > 0 ) + generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present"); + + final List GTalleles = (genotypeAlleleLocation == -1 ? new ArrayList(0) : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap)); + gb.alleles(GTalleles); + gb.phased(genotypeAlleleLocation != -1 && GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1); + + // add it to the list + try { + genotypes.add(gb.make()); + } catch (TribbleException e) { + throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos); + } + } + + return new LazyGenotypesContext.LazyData(genotypes, header.getSampleNamesInOrder(), header.getSampleNameToOffset()); + } + + + private final static String[] INT_DECODE_ARRAY = new String[10000]; + private final static int[] decodeInts(final String string) { + final int nValues = ParsingUtils.split(string, INT_DECODE_ARRAY, ','); + final int[] values = new int[nValues]; + for ( int i = 0; i < nValues; i++ ) + values[i] = Integer.valueOf(INT_DECODE_ARRAY[i]); + return values; + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java index 1489ac6b8..2c103b473 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java @@ -1,3 +1,27 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.utils.codecs.vcf; import org.broad.tribble.TribbleException; @@ -108,93 +132,6 @@ public class VCF3Codec extends AbstractVCFCodec { return fFields; } - /** - * create a genotype map - * - * @param str the string - * @param alleles the list of alleles - * @param chr chrom - * @param pos position - * @return a mapping of sample name to genotype object - */ - public LazyGenotypesContext.LazyData createGenotypeMap(String str, List alleles, String chr, int pos) { - if (genotypeParts == null) - genotypeParts = new String[header.getColumnCount() - NUM_STANDARD_FIELDS]; - - int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR); - if ( nParts != genotypeParts.length ) - generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records", lineNo); - - ArrayList genotypes = new ArrayList(nParts); - - // get the format keys - int nGTKeys = ParsingUtils.split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR); - - // cycle through the sample names - Iterator sampleNameIterator = header.getGenotypeSamples().iterator(); - - // clear out our allele mapping - alleleMap.clear(); - - // cycle through the genotype strings - for (int genotypeOffset = 1; genotypeOffset < nParts; genotypeOffset++) { - int GTValueSplitSize = ParsingUtils.split(genotypeParts[genotypeOffset], GTValueArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR); - - double GTQual = VariantContext.NO_LOG10_PERROR; - List genotypeFilters = null; - Map gtAttributes = null; - String sampleName = sampleNameIterator.next(); - - // check to see if the value list is longer than the key list, which is a problem - if (nGTKeys < GTValueSplitSize) - generateException("There are too many keys for the sample " + sampleName + ", keys = " + parts[8] + ", values = " + parts[genotypeOffset]); - - int genotypeAlleleLocation = -1; - if (nGTKeys >= 1) { - gtAttributes = new HashMap(nGTKeys - 1); - - for (int i = 0; i < nGTKeys; i++) { - final String gtKey = new String(genotypeKeyArray[i]); - boolean missing = i >= GTValueSplitSize; - - if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) { - genotypeAlleleLocation = i; - } else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) { - GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]); - } else if (gtKey.equals(VCFConstants.GENOTYPE_FILTER_KEY)) { - genotypeFilters = missing ? parseFilters(VCFConstants.MISSING_VALUE_v4) : parseFilters(getCachedString(GTValueArray[i])); - } else if ( missing || GTValueArray[i].equals(VCFConstants.MISSING_GENOTYPE_QUALITY_v3) ) { - //gtAttributes.put(gtKey, VCFConstants.MISSING_VALUE_v4); - } else { - gtAttributes.put(gtKey, new String(GTValueArray[i])); - } - } - } - - // check to make sure we found a genotype field - if ( genotypeAlleleLocation < 0 ) - generateException("Unable to find the GT field for the record; the GT field is required"); - if ( genotypeAlleleLocation > 0 ) - generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes"); - - boolean phased = GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1; - - // add it to the list - try { - final GenotypeBuilder gb = new GenotypeBuilder(sampleName); - gb.alleles(parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap)); - gb.log10PError(GTQual); - if ( genotypeFilters != null ) gb.filters(genotypeFilters); - gb.attributes(gtAttributes).phased(phased); - genotypes.add(gb.make()); - } catch (TribbleException e) { - throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos); - } - } - - return new LazyGenotypesContext.LazyData(genotypes, header.getSampleNamesInOrder(), header.getSampleNameToOffset()); - } - @Override public boolean canDecode(final String potentialInput) { return canDecodeFile(potentialInput, VCF3_MAGIC_HEADER); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java index 0f07937da..42f9d7d2d 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java @@ -48,7 +48,6 @@ import java.util.*; public class VCFCodec extends AbstractVCFCodec { // Our aim is to read in the records and convert to VariantContext as quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters. public final static String VCF4_MAGIC_HEADER = "##fileformat=VCFv4"; - private VCFHeaderVersion version = null; /** * A VCF header the contains master info/filter/format records that we use to 'fill in' @@ -156,108 +155,6 @@ public class VCFCodec extends AbstractVCFCodec { return fFields; } - - /** - * create a genotype map - * - * @param str the string - * @param alleles the list of alleles - * @return a mapping of sample name to genotype object - */ - public LazyGenotypesContext.LazyData createGenotypeMap(String str, List alleles, String chr, int pos) { - if (genotypeParts == null) - genotypeParts = new String[header.getColumnCount() - NUM_STANDARD_FIELDS]; - - int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR); - if ( nParts != genotypeParts.length ) - generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records", lineNo); - - ArrayList genotypes = new ArrayList(nParts); - - // get the format keys - int nGTKeys = ParsingUtils.split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR); - - // cycle through the sample names - Iterator sampleNameIterator = header.getGenotypeSamples().iterator(); - - // clear out our allele mapping - alleleMap.clear(); - - // cycle through the genotype strings - for (int genotypeOffset = 1; genotypeOffset < nParts; genotypeOffset++) { - int GTValueSplitSize = ParsingUtils.split(genotypeParts[genotypeOffset], GTValueArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR); - - final String sampleName = sampleNameIterator.next(); - final GenotypeBuilder gb = new GenotypeBuilder(sampleName); - - // check to see if the value list is longer than the key list, which is a problem - if (nGTKeys < GTValueSplitSize) - generateException("There are too many keys for the sample " + sampleName + ", keys = " + parts[8] + ", values = " + parts[genotypeOffset]); - - int genotypeAlleleLocation = -1; - if (nGTKeys >= 1) { - gb.maxAttributes(nGTKeys - 1); - - for (int i = 0; i < nGTKeys; i++) { - final String gtKey = genotypeKeyArray[i]; - boolean missing = i >= GTValueSplitSize; - - // todo -- all of these on the fly parsing of the missing value should be static constants - if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) { - genotypeAlleleLocation = i; - } else if (gtKey.equals(VCFConstants.GENOTYPE_FILTER_KEY)) { - final List filters = parseFilters(getCachedString(GTValueArray[i])); - if ( filters != null ) gb.filters(filters); - } else if ( missing ) { - // if its truly missing (there no provided value) skip adding it to the attributes - } else if ( GTValueArray[i].equals(VCFConstants.MISSING_VALUE_v4) ) { - // don't add missing values to the map - } else { - if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) { - gb.GQ((int)Math.round(Double.valueOf(GTValueArray[i]))); - } else if (gtKey.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) { - gb.AD(decodeInts(GTValueArray[i])); - } else if (gtKey.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) { - gb.PL(decodeInts(GTValueArray[i])); - } else if (gtKey.equals(VCFConstants.DEPTH_KEY)) { - gb.DP(Integer.valueOf(GTValueArray[i])); - } else { - gb.attribute(gtKey, GTValueArray[i]); - } - } - } - } - - // check to make sure we found a genotype field if we are a VCF4.0 file - if ( version == VCFHeaderVersion.VCF4_0 && genotypeAlleleLocation == -1 ) - generateException("Unable to find the GT field for the record; the GT field is required in VCF4.0"); - if ( genotypeAlleleLocation > 0 ) - generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present"); - - final List GTalleles = (genotypeAlleleLocation == -1 ? new ArrayList(0) : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap)); - gb.alleles(GTalleles); - gb.phased(genotypeAlleleLocation != -1 && GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1); - - // add it to the list - try { - genotypes.add(gb.make()); - } catch (TribbleException e) { - throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos); - } - } - - return new LazyGenotypesContext.LazyData(genotypes, header.getSampleNamesInOrder(), header.getSampleNameToOffset()); - } - - private final static String[] INT_DECODE_ARRAY = new String[10000]; - private final static int[] decodeInts(final String string) { - final int nValues = ParsingUtils.split(string, INT_DECODE_ARRAY, ','); - final int[] values = new int[nValues]; - for ( int i = 0; i < nValues; i++ ) - values[i] = Integer.valueOf(INT_DECODE_ARRAY[i]); - return values; - } - @Override public boolean canDecode(final String potentialInput) { return canDecodeFile(potentialInput, VCF4_MAGIC_HEADER); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java index 37e7d5d58..15b1b386a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java @@ -53,7 +53,7 @@ public final class GenotypeBuilder { public static boolean MAKE_FAST_BY_DEFAULT = true; private String sampleName = null; - private List alleles = null; + private List alleles = Collections.emptyList(); private boolean isPhased = false; private int GQ = -1; diff --git a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java index 87f0e6ff0..8a461d25b 100755 --- a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java @@ -764,23 +764,6 @@ public class ParsingEngineUnitTest extends BaseTest { Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); } - @Test - public void variantContextBindingArgumentTestVCF3() { - final String[] commandLine = new String[] {"-V:vcf3",NON_EXISTANT_FILENAME_VCF}; - - parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class ); - parsingEngine.parse( commandLine ); - parsingEngine.validate(); - - VariantContextRodBindingArgProvider argProvider = new VariantContextRodBindingArgProvider(); - parsingEngine.loadArgumentsIntoObject( argProvider ); - - Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly"); - Assert.assertEquals(argProvider.binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value"); - Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value"); - Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); - } - private class ListRodBindingArgProvider { @Input(fullName = "binding", shortName="V", required=false) public List> bindings; diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java index 68bd28d7a..01af55ca3 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -47,10 +47,6 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { testBadRODBindingInput("beagle", "BEAGLE input to VCF expecting walker", UserException.BadArgumentValue.class); } - @Test() private void testBadRODBindingInputType2() { - testBadRODBindingInput("vcf3", "VCF3 input to VCF expecting walker", UserException.class); - } - @Test() private void testBadRODBindingInputType3() { testBadRODBindingInput("bed", "Bed input to VCF expecting walker", UserException.BadArgumentValue.class); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index bdd028414..c1dd8b18b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -302,7 +302,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { String tests = cmdRoot + " --dbsnp " + b36dbSNP129 + " --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + - " --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; + " --comp:comp_genotypes " + testDir + "yri.trio.gatk.ug.head.vcf"; WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", 1, Arrays.asList("4b9dcbce0717285e3c0c736c1bed744c")); executeTestParallel("testSelect1", spec); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java index cc9a1c5b7..4a1af4e92 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java @@ -38,7 +38,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest { @Test public void testb36Tohg19() { WalkerTestSpec spec = new WalkerTestSpec( - "-T LiftoverVariants -o %s -R " + b36KGReference + " --variant:vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", + "-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + testDir + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, Arrays.asList("70aeaca5b74cc7ba8e2da7b71ff0fbfd")); executeTest("test b36 to hg19", spec); @@ -47,7 +47,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest { @Test public void testb36Tohg19UnsortedSamples() { WalkerTestSpec spec = new WalkerTestSpec( - "-T LiftoverVariants -o %s -R " + b36KGReference + " --variant:vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", + "-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + testDir + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, Arrays.asList("07d1bf52125d1f9a25e260e13ec7b010")); executeTest("test b36 to hg19, unsorted samples", spec); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index 92a9f3739..6d5221ea5 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -160,7 +160,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testNoGTs--" + testFile, spec); } - @Test + @Test(enabled = false) public void testParallelization2() { String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf"; String samplesFile = validationDataLocation + "SelectVariants.samples.txt"; @@ -175,7 +175,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testParallelization (2 threads)--" + testfile, spec); } - @Test + @Test(enabled = false) public void testParallelization4() { String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf"; String samplesFile = validationDataLocation + "SelectVariants.samples.txt"; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java index 88aa7c29a..bbfbf9c1e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java @@ -46,7 +46,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest { // Copy VCF data from the test file into the FIFO. - String testFile = validationDataLocation + "yri.trio.gatk.ug.head.vcf"; + String testFile = testDir + "yri.trio.gatk.ug.head.vcf"; FileInputStream inputStream = new FileInputStream(testFile); FileOutputStream outputStream = new FileOutputStream(tmpFifo); outputStream.getChannel().transferFrom(inputStream.getChannel(),0,inputStream.getChannel().size()); @@ -56,7 +56,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants" + " -R " + b36KGReference + - " --variant:vcf3,storage=STREAM " + tmpFifo.getAbsolutePath() + + " --variant,storage=STREAM " + tmpFifo.getAbsolutePath() + " --no_cmdline_in_header " + " -o %s", 1, @@ -74,13 +74,13 @@ public class VCFStreamingIntegrationTest extends WalkerTest { File tmpFifo = File.createTempFile("vcfstreaming",""); Runtime.getRuntime().exec(new String[] {"mkfifo",tmpFifo.getAbsolutePath()}); - String testFile = validationDataLocation + "yri.trio.gatk.ug.head.vcf"; + String testFile = testDir + "yri.trio.gatk.ug.head.vcf"; // Output select to FIFO WalkerTestSpec selectTestSpec = new WalkerTestSpec( "-T SelectVariants" + " -R " + b36KGReference + - " --variant:vcf3,storage=STREAM " + testFile + + " --variant,storage=STREAM " + testFile + " --no_cmdline_in_header" + " -select 'QD > 2.0'" + " -o " + tmpFifo.getAbsolutePath(), @@ -93,7 +93,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest { selectTestSpec = new WalkerTestSpec( "-T VariantEval" + " -R " + b36KGReference + - " --eval:vcf3 " + testFile + + " --eval " + testFile + " --comp:vcf,storage=STREAM " + tmpFifo.getAbsolutePath() + " -EV CompOverlap -noEV -noST" + " -o %s",