From 8072bd9831e0000bf49dbcf6ba689068765789f3 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 8 Aug 2011 12:35:39 -0400 Subject: [PATCH 1/5] Updating resource bundle generation qscript for changeover to git --- .../queue/qscripts/GATKResourcesBundle.scala | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index 4f1fe741a..59c00b8cd 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -13,7 +13,7 @@ class GATKResourcesBundle extends QScript { var gatkJarFile: File = new File("dist/GenomeAnalysisTK.jar") @Argument(doc="liftOverPerl", required=false) - var liftOverPerl: File = new File("./perl/liftOverVCF.pl") + var liftOverPerl: File = new File("./public/perl/liftOverVCF.pl") @Argument(shortName = "ver", doc="The SVN version of this release", required=true) var VERSION: String = _ @@ -57,11 +57,11 @@ class GATKResourcesBundle extends QScript { //Console.printf("liftover(%s => %s)%n", inRef.name, outRef.name) (inRef.name, outRef.name) match { case ("b37", "hg19") => - return new LiftOverPerl(in, out, new File("chainFiles/b37tohg19.chain"), inRef, outRef) + return new LiftOverPerl(in, out, new File("public/chainFiles/b37tohg19.chain"), inRef, outRef) case ("b37", "hg18") => - return new LiftOverPerl(in, out, new File("chainFiles/b37tohg18.chain"), inRef, outRef) + return new LiftOverPerl(in, out, new File("public/chainFiles/b37tohg18.chain"), inRef, outRef) case ("b37", "b36") => - return new LiftOverPerl(in, out, new File("chainFiles/b37tob36.chain"), inRef, outRef) + return new LiftOverPerl(in, out, new File("public/chainFiles/b37tob36.chain"), inRef, outRef) case _ => return null } } @@ -85,7 +85,7 @@ class GATKResourcesBundle extends QScript { // b37 = new Reference("b37", new File("/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta")) hg18 = new Reference("hg18", new File("/Users/depristo/Desktop/broadLocal/localData/Homo_sapiens_assembly18.fasta")) - exampleFASTA = new Reference("exampleFASTA", new File("testdata/exampleFASTA.fasta")) + exampleFASTA = new Reference("exampleFASTA", new File("public/testdata/exampleFASTA.fasta")) refs = List(b37, hg18, exampleFASTA) val DATAROOT = "/Users/depristo/Desktop/broadLocal/localData/" @@ -94,7 +94,7 @@ class GATKResourcesBundle extends QScript { addResource(new Resource(DATAROOT + "dbsnp_132_b37.vcf", "dbsnp_132", b37, true, false)) addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false)) - addResource(new Resource("testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false)) + addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false)) } def initializeStandardDataFiles() = { @@ -105,7 +105,7 @@ class GATKResourcesBundle extends QScript { b37 = new Reference("b37", new File("/humgen/1kg/reference/human_g1k_v37.fasta")) hg18 = new Reference("hg18", new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")) b36 = new Reference("b36", new File("/humgen/1kg/reference/human_b36_both.fasta")) - exampleFASTA = new Reference("exampleFASTA", new File("testdata/exampleFASTA.fasta")) + exampleFASTA = new Reference("exampleFASTA", new File("public/testdata/exampleFASTA.fasta")) refs = List(hg19, b37, hg18, b36, exampleFASTA) addResource(new Resource(b37.file, "", b37, false)) @@ -155,8 +155,8 @@ class GATKResourcesBundle extends QScript { addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/refGene_b37.sorted.txt", "refGene", b37, true, false)) - addResource(new Resource("chainFiles/hg18tob37.chain", "", hg18, false, false)) - addResource(new Resource("chainFiles/b36tob37.chain", "", b36, false, false)) + addResource(new Resource("public/chainFiles/hg18tob37.chain", "", hg18, false, false)) + addResource(new Resource("public/chainFiles/b36tob37.chain", "", b36, false, false)) // todo -- chain files? // todo 1000G SNP and indel call sets? @@ -165,7 +165,7 @@ class GATKResourcesBundle extends QScript { // exampleFASTA file // addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false)) - addResource(new Resource("testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false)) + addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false)) } def createBundleDirectories(dir: File) = { From 197169e47b093e80e357a79f3249e2445c9241b0 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 8 Aug 2011 13:34:04 -0400 Subject: [PATCH 3/5] Submitting patch from Larry Singh to make MathUtils compatible with java 1.7 --- .../org/broadinstitute/sting/utils/MathUtils.java | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) mode change 100755 => 100644 public/java/src/org/broadinstitute/sting/utils/MathUtils.java diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java old mode 100755 new mode 100644 index 36ed506aa..cbe2948aa --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -147,13 +147,13 @@ public class MathUtils { return Math.log10(sum) + maxValue; } - public static double sum(List values) { + public static double sumDoubles(List values) { double s = 0.0; for ( double v : values) s += v; return s; } - public static int sum(List values) { + public static int sumIntegers(List values) { int s = 0; for ( int v : values) s += v; return s; @@ -428,7 +428,7 @@ public class MathUtils { // for precision purposes, we need to add (or really subtract, since they're // all negative) the largest value; also, we need to convert to normal-space. - double maxValue = MathUtils.arrayMax( array ); + double maxValue = MathUtils.arrayMaxDouble( array ); for (int i = 0; i < array.size(); i++) normalized[i] = Math.pow(10, array.get(i) - maxValue); @@ -507,7 +507,7 @@ public class MathUtils { return minI; } - public static int arrayMax(List array) { + public static int arrayMaxInt(List array) { if ( array == null ) throw new IllegalArgumentException("Array cannot be null!"); if ( array.size() == 0 ) throw new IllegalArgumentException("Array size cannot be 0!"); @@ -516,7 +516,7 @@ public class MathUtils { return m; } - public static double arrayMax(List array) { + public static double arrayMaxDouble(List array) { if ( array == null ) throw new IllegalArgumentException("Array cannot be null!"); if ( array.size() == 0 ) throw new IllegalArgumentException("Array size cannot be 0!"); @@ -1274,5 +1274,4 @@ public class MathUtils { public static double log10Factorial (int x) { return log10Gamma(x+1); } - -} \ No newline at end of file +} From d7813db217fc5d0728d7654e985e9ee04669ed5f Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 8 Aug 2011 16:25:35 -0400 Subject: [PATCH 5/5] Combine Variants was actually outputting invalid VCFs in cases where it was combining Variant Contexts with different alternate alleles: if any of the genotypes had PLs they were no longer valid/correct. Added a check for such cases (the combined VC has more alleles than an original VC) and strip out the PLs when triggered; added integration test to cover it. I also added the check to Select Variants, although it currently doesn't remove unused alleles so it should never trigger. Is there any reason not to strip out unused alleles after a select? --- .../walkers/variantutils/SelectVariants.java | 4 ++++ .../sting/utils/variantcontext/Genotype.java | 7 +++++++ .../variantcontext/VariantContextUtils.java | 18 ++++++++++++++++++ .../CombineVariantsIntegrationTest.java | 10 ++++++++++ .../SelectVariantsIntegrationTest.java | 12 ++++++++++++ 5 files changed, 51 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 41374a349..b2bce2e59 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -544,6 +544,10 @@ public class SelectVariants extends RodWalker { VariantContext sub = vc.subContextFromGenotypes(genotypes, vc.getAlleles()); + // if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate) + if ( vc.getAlleles().size() != sub.getAlleles().size() ) + sub = VariantContext.modifyGenotypes(sub, VariantContextUtils.stripPLs(vc.getGenotypes())); + HashMap attributes = new HashMap(sub.getAttributes()); int depth = 0; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 0b5976c3c..fdf3d97db 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -57,6 +57,13 @@ public class Genotype { return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased()); } + public static Genotype removePLs(Genotype g) { + Map attrs = new HashMap(g.getAttributes()); + attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY); + attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY); + return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased()); + } + public static Genotype modifyAlleles(Genotype g, List alleles) { return new Genotype(g.getSampleName(), alleles, g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased()); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 7d10749ee..fa039b42e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -588,6 +588,14 @@ public class VariantContextUtils { } } + // if we have more alternate alleles in the merged VC than in one or more of the original VCs, we need to strip out the GL/PLs (because they are no longer accurate) + for ( VariantContext vc : VCs ) { + if ( vc.alleles.size() != alleles.size() ) { + genotypes = stripPLs(genotypes); + break; + } + } + // take the VC with the maxAC and pull the attributes into a modifiable map if ( mergeInfoWithMaxAC && vcWithMaxAC != null ) { attributesWithMaxAC.putAll(vcWithMaxAC.getAttributes()); @@ -633,6 +641,16 @@ public class VariantContextUtils { return merged; } + public static Map stripPLs(Map genotypes) { + Map newGs = new HashMap(genotypes.size()); + + for ( Map.Entry g : genotypes.entrySet() ) { + newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? Genotype.removePLs(g.getValue()) : g.getValue()); + } + + return newGs; + } + public static Map> separateVariantContextsByType(Collection VCs) { HashMap> mappedVCs = new HashMap>(); for ( VariantContext vc : VCs ) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 9b152bc71..9d5add172 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -70,6 +70,14 @@ public class CombineVariantsIntegrationTest extends WalkerTest { executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); } + public void combinePLs(String file1, String file2, String md5) { + WalkerTestSpec spec = new WalkerTestSpec( + "-T CombineVariants -NO_HEADER -o %s -R " + b36KGReference + " -priority v1,v2 -B:v1,VCF " + validationDataLocation + file1 + " -B:v2,VCF " + validationDataLocation + file2, + 1, + Arrays.asList(md5)); + executeTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); + } + @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0", true); } @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo", true); } @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null", true); } @@ -78,6 +86,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "7593be578d4274d672fc22fced38012b", false); } @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "1cd467863c4e948fadd970681552d57e", false); } + @Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "0f873fed02aa99db5b140bcd6282c10a"); } + @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a", false); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index b5f41542e..564400f75 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -63,4 +63,16 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testConcordance--" + testFile, spec); } + @Test(enabled=false) + public void testRemovePLs() { + String testFile = validationDataLocation + "combine.3.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec( + "-T SelectVariants -R " + b36KGReference + " -sn NA12892 -B:variant,VCF " + testFile + " -o %s -NO_HEADER", + 1, + Arrays.asList("") + ); + + executeTest("testWithPLs--" + testFile, spec); + } }