From 5d19fca6490db8e07a1634074d30094576019859 Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Tue, 11 Sep 2012 23:01:00 -0400 Subject: [PATCH 01/11] A couple of bug-fixy changes. 1) SelectVariants could throw a ReviewedStingException (one of the nasty "Bug:") ones if the user requested a sample that wasn't present in the VCF. The walker now checks for this in the initialize() phase, and throws a more informative error if the situation is detected. If the user simply wants to subset the VCF to all the samples requested that are actually present in the VCF, the --ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES flag changes this UserException to a Warning, and does the appropriate subsetting. Added integration tests for this. 2) GenotypeLikelihoods has an unsafe method getLog10GQ(GenotypeType), which is completely broken for multi-allelic sites. I marked that method as deprecated, and added methods that use the context of the allele ordering (either directly specified or as a VC) to retrieve the appropriate GQ, and added a unit test to cover this case. VariantsToBinaryPed needs to dynamically calculate the GQ field sometimes (because I have some VCFs with PLs but no GQ). --- .../walkers/variantutils/SelectVariants.java | 29 +++++++++++++++++-- .../variantutils/VariantsToBinaryPed.java | 15 +++++++++- .../variantcontext/GenotypeLikelihoods.java | 27 +++++++++++++++++ .../SelectVariantsIntegrationTest.java | 29 +++++++++++++++++++ .../GenotypeLikelihoodsUnitTest.java | 26 +++++++++++++++++ 5 files changed, 122 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 3d14308b6..7bad19775 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -40,6 +40,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; @@ -325,6 +326,9 @@ public class SelectVariants extends RodWalker implements TreeR @Argument(doc="indel size select",required=false,fullName="maxIndelSize") private int maxIndelSize = Integer.MAX_VALUE; + @Argument(doc="Allow a samples other than those in the VCF to be specified on the command line. These samples will be ignored.",required=false,fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES") + private boolean ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES = false; + /* Private class used to store the intermediate variants in the integer random selection process */ private static class RandomVariantStructure { @@ -386,10 +390,29 @@ public class SelectVariants extends RodWalker implements TreeR Collection samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles); Collection samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions); - // first, add any requested samples - samples.addAll(samplesFromFile); - samples.addAll(samplesFromExpressions); + // first, check overlap between requested and present samples + Set commandLineUniqueSamples = new HashSet(samplesFromFile.size()+samplesFromExpressions.size()+sampleNames.size()); + commandLineUniqueSamples.addAll(samplesFromFile); + commandLineUniqueSamples.addAll(samplesFromExpressions); + commandLineUniqueSamples.addAll(sampleNames); + commandLineUniqueSamples.removeAll(vcfSamples); + if ( commandLineUniqueSamples.size() > 0 && ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES ) { + logger.warn("Samples present on command line input that are not present in the VCF. These samples will be ignored."); + samplesFromFile.removeAll(commandLineUniqueSamples); + samplesFromExpressions.retainAll(commandLineUniqueSamples); + } else if (commandLineUniqueSamples.size() > 0 ) { + throw new UserException.BadInput(String.format("%s%n%n%s%n%n%s%n%n%s", + "Samples entered on command line (through -sf or -sn) that are not present in the VCF.", + "A list of these samples:", + Utils.join(",",commandLineUniqueSamples), + "To ignore these samples, run with --ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES")); + } + + // second, add the requested samples samples.addAll(sampleNames); + samples.addAll(samplesFromExpressions); + samples.addAll(samplesFromFile); + samples.removeAll(commandLineUniqueSamples); // if none were requested, we want all of them if ( samples.isEmpty() ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java index 2e6a80462..6bc6153df 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java @@ -7,7 +7,9 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -15,6 +17,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @@ -278,7 +281,7 @@ public class VariantsToBinaryPed extends RodWalker { private byte getFlippedEncoding(Genotype g, int offset) { byte b; - if ( g.hasGQ() && g.getGQ() < minGenotypeQuality ) { + if ( ! checkGQIsGood(g) ) { b = NO_CALL; } else if ( g.isHomRef() ) { b = HOM_VAR; @@ -293,6 +296,16 @@ public class VariantsToBinaryPed extends RodWalker { return (byte) (b << (2*offset)); } + private boolean checkGQIsGood(Genotype genotype) { + if ( genotype.hasGQ() ) { + return genotype.getGQ() >= minGenotypeQuality; + } else if ( genotype.hasLikelihoods() ) { + return GenotypeLikelihoods.getGQLog10FromLikelihoods(genotype.getType().ordinal()-1,genotype.getLikelihoods().getAsVector()) >= minGenotypeQuality; + } + + return false; + } + private static String getID(VariantContext v) { if ( v.hasID() ) { return v.getID(); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java index 7b4256b70..641eb5449 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.Arrays; import java.util.EnumMap; +import java.util.List; public class GenotypeLikelihoods { private final static int NUM_LIKELIHOODS_CACHE_N_ALLELES = 5; @@ -167,10 +168,36 @@ public class GenotypeLikelihoods { //Return the neg log10 Genotype Quality (GQ) for the given genotype //Returns Double.NEGATIVE_INFINITY in case of missing genotype + + /** + * This is really dangerous and returns completely wrong results for genotypes from a multi-allelic context. + * Use getLog10GQ(Genotype,VariantContext) or getLog10GQ(Genotype,List) in place of it. + * + * If you **know** you're biallelic, use getGQLog10FromLikelihoods directly. + * @param genotype - actually a genotype type (no call, hom ref, het, hom var) + * @return an unsafe quantity that could be negative. In the bi-allelic case, the GQ resulting from best minus next best (if the type is the best). + */ + @Deprecated public double getLog10GQ(GenotypeType genotype){ return getGQLog10FromLikelihoods(genotype.ordinal() - 1 /* NO_CALL IS FIRST */, getAsVector()); } + @Requires({"genotypeAlleles != null","genotypeAlleles.size()==2","contextAlleles != null","contextAlleles.size() >= 1"}) + private double getLog10GQ(List genotypeAlleles,List contextAlleles) { + int allele1Index = contextAlleles.indexOf(genotypeAlleles.get(0)); + int allele2Index = contextAlleles.indexOf(genotypeAlleles.get(1)); + int plIndex = calculatePLindex(allele1Index,allele2Index); + return getGQLog10FromLikelihoods(plIndex,getAsVector()); + } + + public double getLog10GQ(Genotype genotype, List vcAlleles ) { + return getLog10GQ(genotype.getAlleles(),vcAlleles); + } + + public double getLog10GQ(Genotype genotype, VariantContext context) { + return getLog10GQ(genotype,context.getAlleles()); + } + public static double getGQLog10FromLikelihoods(int iOfChoosenGenotype, double[] likelihoods){ if(likelihoods == null) return Double.NEGATIVE_INFINITY; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index 77e29f87b..ffd9c9b4a 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -70,6 +70,20 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testComplexSelection--" + testfile, spec); } + @Test + public void testComplexSelectionWithNonExistingSamples() { + String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf"; + String samplesFile = validationDataLocation + "SelectVariants.samples.txt"; + + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString(" --ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES -sn A -se '[CDH]' -sn Z -sn T -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile), + 1, + Arrays.asList("4386fbb258dcef4437495a37f5a83c53") + ); + spec.disableShadowBCF(); + executeTest("testComplexSelectionWithNonExistingSamples--" + testfile, spec); + } + @Test public void testNonExistingFieldSelection() { String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf"; @@ -98,6 +112,21 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testSampleExclusion--" + testfile, spec); } + @Test + public void testSampleInclusionWithNonexistingSamples() { + String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf"; + String samplesFile = validationDataLocation + "SelectVariants.samples.txt"; + + WalkerTestSpec spec = new WalkerTestSpec( + "-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -sn A -sn Z -sn Q -sf " + samplesFile + " --variant " + testfile, + 1, + UserException.BadInput.class + ); + spec.disableShadowBCF(); + + executeTest("testSampleInclusionWithNonexistingSamples--" + testfile, spec); + } + @Test public void testConcordance() { diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoodsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoodsUnitTest.java index 69f42e1f9..4ce32cee7 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoodsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoodsUnitTest.java @@ -29,12 +29,15 @@ package org.broadinstitute.sting.utils.variantcontext; // the imports for unit testing. +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.Assert; import org.testng.annotations.Test; +import java.util.Arrays; import java.util.EnumMap; +import java.util.List; /** @@ -44,6 +47,7 @@ public class GenotypeLikelihoodsUnitTest { double [] v = new double[]{-10.5, -1.25, -5.11}; final static String vGLString = "-10.50,-1.25,-5.11"; final static String vPLString = "93,0,39"; + double[] triAllelic = new double[]{-4.2,-2.0,-3.0,-1.6,0.0,-4.0}; //AA,AB,AC,BB,BC,CC @Test public void testFromVector2() { @@ -139,6 +143,28 @@ public class GenotypeLikelihoodsUnitTest { } } + // this test is completely broken, the method is wrong. + public void testGetQualFromLikelihoodsMultiAllelicBroken() { + GenotypeLikelihoods gl = GenotypeLikelihoods.fromLog10Likelihoods(triAllelic); + double actualGQ = gl.getLog10GQ(GenotypeType.HET); + double expectedGQ = 1.6; + Assert.assertEquals(actualGQ,expectedGQ); + } + + public void testGetQualFromLikelihoodsMultiAllelic() { + GenotypeLikelihoods gl = GenotypeLikelihoods.fromLog10Likelihoods(triAllelic); + Allele ref = Allele.create(BaseUtils.A,true); + Allele alt1 = Allele.create(BaseUtils.C); + Allele alt2 = Allele.create(BaseUtils.T); + List allAlleles = Arrays.asList(ref,alt1,alt2); + List gtAlleles = Arrays.asList(alt1,alt2); + GenotypeBuilder gtBuilder = new GenotypeBuilder(); + gtBuilder.alleles(gtAlleles); + double actualGQ = gl.getLog10GQ(gtBuilder.make(),allAlleles); + double expectedGQ = 1.6; + Assert.assertEquals(actualGQ,expectedGQ); + } + private void assertDoubleArraysAreEqual(double[] v1, double[] v2) { Assert.assertEquals(v1.length, v2.length); for ( int i = 0; i < v1.length; i++ ) { From 91f320453491c3981cb980b617d352121a5b8705 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 11 Sep 2012 16:52:54 -0400 Subject: [PATCH 06/11] VCF/BCF writers once again automatically write out no-call genotypes for samples in the VCFHeader but not in the VC itself -- Turns out this was consuming 30% of the UG runtime, and causing problems elsewhere. -- Removed addMissingSamples from VariantcontextUtils, and calls to it -- Updated VCF / BCF writers to automatically write out a diploid no call for missing samples -- Added unit tests for this behavior in VariantContextWritersUnitTest --- .../genotyper/UnifiedGenotyperEngine.java | 19 +------- .../walkers/variantutils/CombineVariants.java | 2 +- .../walkers/variantutils/VariantsToVCF.java | 1 - .../utils/variantcontext/GenotypeBuilder.java | 13 ++++++ .../variantcontext/VariantContextUtils.java | 28 +----------- .../variantcontext/writer/BCF2Writer.java | 9 +++- .../variantcontext/writer/VCFWriter.java | 12 +---- .../VariantContextTestProvider.java | 45 +++++++++++++++++++ .../writer/VariantContextWritersUnitTest.java | 10 +++++ 9 files changed, 79 insertions(+), 60 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index a57c877e0..469d63b8a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -198,23 +197,7 @@ public class UnifiedGenotyperEngine { } } - return addMissingSamples(results, allSamples); - } - - private List addMissingSamples(final List calls, final Set allSamples) { - if ( calls.isEmpty() || allSamples == null ) return calls; - - final List withAllSamples = new ArrayList(calls.size()); - for ( final VariantCallContext call : calls ) { - if ( call == null ) - withAllSamples.add(null); - else { - final VariantContext withoutMissing = VariantContextUtils.addMissingSamples(call, allSamples); - withAllSamples.add(new VariantCallContext(withoutMissing, call.confidentlyCalled, call.shouldEmit)); - } - } - - return withAllSamples; + return results; } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 555999bdb..b1d8dc91d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -313,7 +313,7 @@ public class CombineVariants extends RodWalker implements Tree VariantContextUtils.calculateChromosomeCounts(builder, false); if ( minimalVCF ) VariantContextUtils.pruneVariantContext(builder, Arrays.asList(SET_KEY)); - vcfWriter.add(VariantContextUtils.addMissingSamples(builder.make(), samples)); + vcfWriter.add(builder.make()); } return vcs.isEmpty() ? 0 : 1; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 78c9c4a1c..5f80f77a4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -246,7 +246,6 @@ public class VariantsToVCF extends RodWalker { } vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings); - vc = VariantContextUtils.addMissingSamples(vc, samples); vcfwriter.add(vc); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java index 0ee32fa2e..9337a78f9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java @@ -53,6 +53,8 @@ import java.util.*; */ @Invariant({"alleles != null"}) public final class GenotypeBuilder { + private static final List DIPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); + private String sampleName = null; private List alleles = Collections.emptyList(); @@ -90,6 +92,17 @@ public final class GenotypeBuilder { return new GenotypeBuilder(sampleName, alleles).PL(gls).make(); } + /** + * Create a new Genotype object for a sample that's missing from the VC (i.e., in + * the output header). Defaults to a diploid no call genotype ./. + * + * @param sampleName the name of this sample + * @return an initialized Genotype with sampleName that's a diploid ./. no call genotype + */ + public static Genotype createMissing(final String sampleName) { + return new GenotypeBuilder(sampleName).alleles(DIPLOID_NO_CALL).make(); + } + /** * Create a empty builder. Both a sampleName and alleles must be provided * before trying to make a Genotype from this builder. diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index d7e4a7135..8abcf115a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -32,8 +32,8 @@ import org.apache.log4j.Logger; import org.broad.tribble.util.popgen.HardyWeinbergCalculation; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -47,7 +47,6 @@ public class VariantContextUtils { public final static String MERGE_REF_IN_ALL = "ReferenceInAll"; public final static String MERGE_FILTER_PREFIX = "filterIn"; - private static final List DIPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); private static Set MISSING_KEYS_WARNED_ABOUT = new HashSet(); final public static JexlEngine engine = new JexlEngine(); @@ -60,31 +59,6 @@ public class VariantContextUtils { engine.setDebug(false); } - /** - * Ensures that VC contains all of the samples in allSamples by adding missing samples to - * the resulting VC with default diploid ./. genotypes - * - * @param vc the VariantContext - * @param allSamples all of the samples needed - * @return a new VariantContext with missing samples added - */ - public static VariantContext addMissingSamples(final VariantContext vc, final Set allSamples) { - // TODO -- what's the fastest way to do this calculation? - final Set missingSamples = new HashSet(allSamples); - missingSamples.removeAll(vc.getSampleNames()); - - if ( missingSamples.isEmpty() ) - return vc; - else { - //logger.warn("Adding " + missingSamples.size() + " missing samples to called context"); - final GenotypesContext gc = GenotypesContext.copy(vc.getGenotypes()); - for ( final String missing : missingSamples ) { - gc.add(new GenotypeBuilder(missing).alleles(DIPLOID_NO_CALL).make()); - } - return new VariantContextBuilder(vc).genotypes(gc).make(); - } - } - /** * Update the attributes of the attributes map given the VariantContext to reflect the * proper chromosome-based VCF tags diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index e4c64b26b..a338c7c0d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -32,7 +32,10 @@ import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils; import org.broadinstitute.sting.utils.codecs.bcf2.BCFVersion; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFContigHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.*; @@ -345,10 +348,12 @@ class BCF2Writer extends IndexingVariantContextWriter { final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field); if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT"); + assert writer != null; + writer.start(encoder, vc); for ( final String name : sampleNames ) { Genotype g = vc.getGenotype(name); - if ( g == null ) VCFWriter.missingSampleError(vc, header); + if ( g == null ) g = GenotypeBuilder.createMissing(name); writer.addGenotype(encoder, vc, g); } writer.done(encoder, vc); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java index db74f2263..93b3b603f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.utils.variantcontext.writer; import net.sf.samtools.SAMSequenceDictionary; import org.broad.tribble.TribbleException; import org.broad.tribble.util.ParsingUtils; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -343,9 +342,7 @@ class VCFWriter extends IndexingVariantContextWriter { mWriter.write(VCFConstants.FIELD_SEPARATOR); Genotype g = vc.getGenotype(sample); - if ( g == null ) { - missingSampleError(vc, mHeader); - } + if ( g == null ) g = GenotypeBuilder.createMissing(sample); final List attrs = new ArrayList(genotypeFormatKeys.size()); for ( String field : genotypeFormatKeys ) { @@ -426,13 +423,6 @@ class VCFWriter extends IndexingVariantContextWriter { } } - public static final void missingSampleError(final VariantContext vc, final VCFHeader header) { - final List badSampleNames = new ArrayList(); - for ( final String x : header.getGenotypeSamples() ) - if ( ! vc.hasGenotype(x) ) badSampleNames.add(x); - throw new ReviewedStingException("BUG: we now require all samples in VCFheader to have genotype objects. Missing samples are " + Utils.join(",", badSampleNames)); - } - private boolean isMissingValue(String s) { // we need to deal with the case that it's a list of missing values return (countOccurrences(VCFConstants.MISSING_VALUE_v4.charAt(0), s) + countOccurrences(',', s) == s.length()); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java index 26e2dbfbc..6785fa816 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java @@ -596,6 +596,51 @@ public class VariantContextTestProvider { return TEST_DATAs; } + public static void testReaderWriterWithMissingGenotypes(final VariantContextIOTest tester, final VariantContextTestData data) throws IOException { + final int nSamples = data.header.getNGenotypeSamples(); + if ( nSamples > 2 ) { + for ( final VariantContext vc : data.vcs ) + if ( vc.isSymbolic() ) + // cannot handle symbolic alleles because they may be weird non-call VCFs + return; + + final File tmpFile = File.createTempFile("testReaderWriter", tester.getExtension()); + tmpFile.deleteOnExit(); + + // write expected to disk + final EnumSet options = EnumSet.of(Options.INDEX_ON_THE_FLY); + final VariantContextWriter writer = tester.makeWriter(tmpFile, options); + + final Set samplesInVCF = new HashSet(data.header.getGenotypeSamples()); + final List missingSamples = Arrays.asList("MISSING1", "MISSING2"); + final List allSamples = new ArrayList(missingSamples); + allSamples.addAll(samplesInVCF); + + final VCFHeader header = new VCFHeader(data.header.getMetaDataInInputOrder(), allSamples); + writeVCsToFile(writer, header, data.vcs); + + // ensure writing of expected == actual + final Pair> p = readAllVCs(tmpFile, tester.makeCodec()); + final Iterable actual = p.getSecond(); + + int i = 0; + for ( final VariantContext readVC : actual ) { + if ( readVC == null ) continue; // sometimes we read null records... + final VariantContext expected = data.vcs.get(i++); + for ( final Genotype g : readVC.getGenotypes() ) { + Assert.assertTrue(allSamples.contains(g.getSampleName())); + if ( samplesInVCF.contains(g.getSampleName()) ) { + assertEquals(g, expected.getGenotype(g.getSampleName())); + } else { + // missing + Assert.assertTrue(g.isNoCall()); + } + } + } + + } + } + public static void testReaderWriter(final VariantContextIOTest tester, final VariantContextTestData data) throws IOException { testReaderWriter(tester, data.header, data.vcs, data.vcs, true); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWritersUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWritersUnitTest.java index 1b791bf6c..adf3eb235 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWritersUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWritersUnitTest.java @@ -82,6 +82,11 @@ public class VariantContextWritersUnitTest extends BaseTest { VariantContextTestProvider.testReaderWriter(new BCFIOTester(), testData); } + @Test(dataProvider = "VariantContextTest_SingleContexts") + public void testBCF2WriterReaderMissingGenotypes(final VariantContextTestProvider.VariantContextTestData testData) throws IOException { + VariantContextTestProvider.testReaderWriterWithMissingGenotypes(new BCFIOTester(), testData); + } + private class BCFIOTester extends VariantContextTestProvider.VariantContextIOTest { @Override public String getExtension() { @@ -110,6 +115,11 @@ public class VariantContextWritersUnitTest extends BaseTest { VariantContextTestProvider.testReaderWriter(new VCFIOTester(), testData); } + @Test(enabled = true, dataProvider = "VariantContextTest_SingleContexts") + public void testVCF4WriterReaderMissingGenotypes(final VariantContextTestProvider.VariantContextTestData testData) throws IOException { + VariantContextTestProvider.testReaderWriterWithMissingGenotypes(new VCFIOTester(), testData); + } + private class VCFIOTester extends VariantContextTestProvider.VariantContextIOTest { @Override public String getExtension() { From d1ba17df5dfb2294f17873e918674627aec30a77 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 12 Sep 2012 06:41:36 -0400 Subject: [PATCH 07/11] Fixed nasty bug in BCF2 writer for case where all genotypes are missing -- Previous code was looking for a -1 result from maxPloidy() but the result as actually 0, so instead of writing a diploid no call we were actually writing "unavailable" genotypes, and failing the BCF == VCF test in integration tests. Fixed. --- .../sting/utils/variantcontext/GenotypesContext.java | 6 +++++- .../sting/utils/variantcontext/writer/BCF2FieldWriter.java | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java index ba8668fa9..02ea1a1f2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.utils.variantcontext; import com.google.java.contract.Ensures; -import com.google.java.contract.Invariant; import com.google.java.contract.Requires; import java.util.*; @@ -413,6 +412,11 @@ public class GenotypesContext implements List { return getGenotypes().get(i); } + /** + * What is the max ploidy among all samples? Returns 0 if no genotypes are present + * + * @return + */ @Ensures("result >= 0") public int getMaxPloidy() { if ( maxPloidy == -1 ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java index 5b81e7117..497c68c0c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java @@ -275,7 +275,7 @@ public abstract class BCF2FieldWriter { nValuesPerGenotype = vc.getMaxPloidy(); // deal with the case where we have no call everywhere, in which case we write out diploid - if ( nValuesPerGenotype == -1 ) + if ( nValuesPerGenotype == 0 ) nValuesPerGenotype = 2; super.start(encoder, vc); From bfbf1686cd0f71c94dea59c84b6c74c71f0ae1af Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 12 Sep 2012 07:08:03 -0400 Subject: [PATCH 08/11] Fixed nasty bug with defaulting to diploid no-call genotypes -- For the pooled caller we were writing diploid no-calls even when other samples were haploid. Changed maxPloidy function to return a defaultPloidy, rather than 0, in the case where all samples are missing. -- VCF/BCF Writers now create missing genotypes with the ploidy of other samples, or 2 if none are available at all. -- Updating integration tests for general ploidy, as previously we wrote ./. even when other calls were 0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/0/1/1/1/1/1, but now we write ./././././././././././././././././././././././. (ugly but correct) --- .../UnifiedGenotyperGeneralPloidyIntegrationTest.java | 6 +++--- .../sting/utils/codecs/vcf/VCFCompoundHeaderLine.java | 4 ++-- .../sting/utils/variantcontext/GenotypeBuilder.java | 11 +++++++++-- .../sting/utils/variantcontext/GenotypesContext.java | 11 +++++++++-- .../sting/utils/variantcontext/VariantContext.java | 9 +++++---- .../utils/variantcontext/writer/BCF2FieldWriter.java | 6 +----- .../sting/utils/variantcontext/writer/BCF2Writer.java | 2 +- .../sting/utils/variantcontext/writer/VCFWriter.java | 4 +++- 8 files changed, 33 insertions(+), 20 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java index e0bf07809..a4a618887 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; import java.util.Arrays; -import org.testng.annotations.Test; /** * Created by IntelliJ IDEA. @@ -52,7 +52,7 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { @Test(enabled = true) public void testINDEL_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","90af837f372e3d5143af30bf5c8c2b75"); + PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","567ae6b2a7f839b1307d4087c2f59cca"); } @Test(enabled = true) @@ -62,7 +62,7 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","26598044436c8044f22ffa767b06a0f0"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","d2a22e12f1969ae199557947e5039b58"); } @Test(enabled = true) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java index 667de3dea..5273806a7 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java @@ -88,8 +88,8 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF case UNBOUNDED: return -1; case A: return vc.getNAlleles() - 1; case G: - final int ploidy = vc.getMaxPloidy(); - return GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), ploidy == 0 ? 2 : ploidy); + final int ploidy = vc.getMaxPloidy(2); + return GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), ploidy); default: throw new ReviewedStingException("Unknown count type: " + countType); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java index 9337a78f9..8fd792d3b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java @@ -53,6 +53,7 @@ import java.util.*; */ @Invariant({"alleles != null"}) public final class GenotypeBuilder { + private static final List HAPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL); private static final List DIPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); private String sampleName = null; @@ -99,8 +100,14 @@ public final class GenotypeBuilder { * @param sampleName the name of this sample * @return an initialized Genotype with sampleName that's a diploid ./. no call genotype */ - public static Genotype createMissing(final String sampleName) { - return new GenotypeBuilder(sampleName).alleles(DIPLOID_NO_CALL).make(); + public static Genotype createMissing(final String sampleName, final int ploidy) { + final GenotypeBuilder builder = new GenotypeBuilder(sampleName); + switch ( ploidy ) { + case 1: builder.alleles(HAPLOID_NO_CALL); break; + case 2: builder.alleles(DIPLOID_NO_CALL); break; + default: builder.alleles(Collections.nCopies(ploidy, Allele.NO_CALL)); break; + } + return builder.make(); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java index 02ea1a1f2..f306bac4d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java @@ -413,18 +413,25 @@ public class GenotypesContext implements List { } /** - * What is the max ploidy among all samples? Returns 0 if no genotypes are present + * What is the max ploidy among all samples? Returns defaultPloidy if no genotypes are present * + * @param defaultPloidy the default ploidy, if all samples are no-called * @return */ @Ensures("result >= 0") - public int getMaxPloidy() { + public int getMaxPloidy(final int defaultPloidy) { + if ( defaultPloidy < 0 ) throw new IllegalArgumentException("defaultPloidy must be greater than or equal to 0"); + if ( maxPloidy == -1 ) { maxPloidy = 0; // necessary in the case where there are no genotypes for ( final Genotype g : getGenotypes() ) { maxPloidy = Math.max(g.getPloidy(), maxPloidy); } + + // everything is no called so we return the default ploidy + if ( maxPloidy == 0 ) maxPloidy = defaultPloidy; } + return maxPloidy; } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index dd16cf7e1..abac84202 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -642,14 +642,15 @@ public class VariantContext implements Feature { // to enable tribble integratio } /** - * Returns the maximum ploidy of all samples in this VC, or -1 if there are no genotypes + * Returns the maximum ploidy of all samples in this VC, or default if there are no genotypes * * This function is caching, so it's only expensive on the first call * - * @return -1, or the max ploidy + * @param defaultPloidy the default ploidy, if all samples are no-called + * @return default, or the max ploidy */ - public int getMaxPloidy() { - return genotypes.getMaxPloidy(); + public int getMaxPloidy(final int defaultPloidy) { + return genotypes.getMaxPloidy(defaultPloidy); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java index 497c68c0c..61c0129bb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java @@ -272,11 +272,7 @@ public abstract class BCF2FieldWriter { encodingType = BCF2Type.INT8; buildAlleleMap(vc); - nValuesPerGenotype = vc.getMaxPloidy(); - - // deal with the case where we have no call everywhere, in which case we write out diploid - if ( nValuesPerGenotype == 0 ) - nValuesPerGenotype = 2; + nValuesPerGenotype = vc.getMaxPloidy(2); super.start(encoder, vc); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index a338c7c0d..536f07f90 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -353,7 +353,7 @@ class BCF2Writer extends IndexingVariantContextWriter { writer.start(encoder, vc); for ( final String name : sampleNames ) { Genotype g = vc.getGenotype(name); - if ( g == null ) g = GenotypeBuilder.createMissing(name); + if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype); writer.addGenotype(encoder, vc, g); } writer.done(encoder, vc); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java index 93b3b603f..f5306b6da 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java @@ -338,11 +338,13 @@ class VCFWriter extends IndexingVariantContextWriter { */ private void addGenotypeData(VariantContext vc, Map alleleMap, List genotypeFormatKeys) throws IOException { + final int ploidy = vc.getMaxPloidy(2); + for ( String sample : mHeader.getGenotypeSamples() ) { mWriter.write(VCFConstants.FIELD_SEPARATOR); Genotype g = vc.getGenotype(sample); - if ( g == null ) g = GenotypeBuilder.createMissing(sample); + if ( g == null ) g = GenotypeBuilder.createMissing(sample, ploidy); final List attrs = new ArrayList(genotypeFormatKeys.size()); for ( String field : genotypeFormatKeys ) { From 96be1cbea9eefb1d6bd5797c7979aae84a26d04c Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Wed, 12 Sep 2012 10:11:06 -0400 Subject: [PATCH 09/11] My own integration test isn't passing with a clean checkout. This fix to the walker ought to do it. --- .../walkers/variantutils/SelectVariants.java | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 7bad19775..9664a5bde 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -396,10 +396,17 @@ public class SelectVariants extends RodWalker implements TreeR commandLineUniqueSamples.addAll(samplesFromExpressions); commandLineUniqueSamples.addAll(sampleNames); commandLineUniqueSamples.removeAll(vcfSamples); + + // second, add the requested samples + samples.addAll(sampleNames); + samples.addAll(samplesFromExpressions); + samples.addAll(samplesFromFile); + + logger.debug(Utils.join(",",commandLineUniqueSamples)); + if ( commandLineUniqueSamples.size() > 0 && ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES ) { logger.warn("Samples present on command line input that are not present in the VCF. These samples will be ignored."); - samplesFromFile.removeAll(commandLineUniqueSamples); - samplesFromExpressions.retainAll(commandLineUniqueSamples); + samples.removeAll(commandLineUniqueSamples); } else if (commandLineUniqueSamples.size() > 0 ) { throw new UserException.BadInput(String.format("%s%n%n%s%n%n%s%n%n%s", "Samples entered on command line (through -sf or -sn) that are not present in the VCF.", @@ -408,11 +415,6 @@ public class SelectVariants extends RodWalker implements TreeR "To ignore these samples, run with --ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES")); } - // second, add the requested samples - samples.addAll(sampleNames); - samples.addAll(samplesFromExpressions); - samples.addAll(samplesFromFile); - samples.removeAll(commandLineUniqueSamples); // if none were requested, we want all of them if ( samples.isEmpty() ) { From 849a2b883950abc25d8390aecc69de7a5d44c167 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 12 Sep 2012 12:23:00 -0400 Subject: [PATCH 10/11] Adding HC integration test for _structural_ insertions and deletions. --- .../haplotypecaller/HaplotypeCallerIntegrationTest.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index b45c027a7..b4ac2b86d 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -73,4 +73,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("8d092b25f40456e618eef91fdce8adf0")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } + + @Test + public void HCTestStructuralIndels() { + final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c29e61810c056b52a47baae0696931ea")); + executeTest("HCTestStructuralIndels: ", spec); + } + }