From 616ff8ea017888945e4dd5dcdde6dc196e9b6bd2 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Fri, 6 Jan 2012 10:36:11 -0500 Subject: [PATCH 01/18] fixed typo in help text --- .../sting/gatk/walkers/variantutils/CombineVariants.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 096085330..af05c0dc4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -105,7 +105,7 @@ public class CombineVariants extends RodWalker { * and each named argument will be labeled as such in the output (i.e., set=name rather than * set=variants2). The order of arguments does not matter unless except for the naming, so * if you provide an rod priority list and no explicit names than variants, variants2, etc - * are techincally order dependent. It is strongly recommended to provide explicit names when + * are technically order dependent. It is strongly recommended to provide explicit names when * a rod priority list is provided. */ @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) From ae259f81cc46dbb4966b52c1affe73951ea8494d Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Tue, 17 Jan 2012 14:39:27 -0500 Subject: [PATCH 02/18] Bug fixing for merging of read fragments when one fragment contained an indel --- .../variantrecalibration/VariantRecalibratorEngine.java | 3 +-- .../java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java index 6d2ac643b..378765051 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.List; @@ -126,7 +125,7 @@ public class VariantRecalibratorEngine { iteration++; model.maximizationStep( data ); currentChangeInMixtureCoefficients = model.normalizePMixtureLog10(); - model.expectationStep(data); + model.expectationStep( data ); if( iteration % 5 == 0 ) { // cut down on the number of output lines so that users can read the warning messages logger.info("Finished iteration " + iteration + ". \tCurrent change in mixture coefficients = " + String.format("%.5f", currentChangeInMixtureCoefficients)); } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index cc0b1ae67..d1e3ce26b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -608,7 +608,7 @@ public class ReadUtils { * Example: Locus => {read1, read2, ..., readN} * * - Second: readToLocusMap -- a HashMap that describes for each read what loci it contributes to the coverage. - * Note: Locus is a boolean array, indexed from 0 (= startLocation) to N (= stopLocation), with true meaning it contributes to the coverage. + * Note: Locus is a boolean array, indexed from 0 (= startLocation) to N (= stopLocation), with value==true meaning it contributes to the coverage. * Example: Read => {true, true, false, ... false} * * @param readList the list of reads to generate the association mappings From 8b0ddf0aaf5462986e405de2b966f146ac00a880 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Tue, 17 Jan 2012 16:13:13 -0500 Subject: [PATCH 03/18] Adding notes to CountCovariates docs about using interval lists as database of known variation --- .../gatk/walkers/recalibration/CountCovariatesWalker.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index 88a9668cc..bdf25419f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -155,7 +155,9 @@ public class CountCovariatesWalker extends LocusWalker> knownSites = Collections.emptyList(); From 2eb45340e1b198cb58b581a63e9b4b49e29a19a1 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Wed, 18 Jan 2012 20:54:10 -0500 Subject: [PATCH 04/18] Initial, raw, mostly untested version of new pool caller that also does allele discovery. Still needs debugging/refining. Main modification is that there is a new operation mode, set by argument -ALLELE_DISCOVERY_MODE, which if true will determine optimal alt allele at each computable site and will compute AC distribution on it. Current implementation is not working yet if there's more than one pool and it will only output biallelic sites, no functionality for true multi-allelics yet --- .../broadinstitute/sting/utils/MathUtils.java | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 5ffd634cc..5c952b13a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -1557,4 +1557,48 @@ public class MathUtils { } return shuffled; } + + /** + * Vector operations + */ + public static double[] vectorSum(double v1[], double v2[]) { + if (v1.length != v2.length) + throw new UserException("BUG: vectors v1, v2 of different size in vectorSum()"); + + double result[] = new double[v1.length]; + for (int k=0; k < v1.length; k++) + result[k] = v1[k]+v2[k]; + + return result; + } + + public static double[] scalarTimesIntVector(double a, int[] v1) { + + double result[] = new double[v1.length]; + for (int k=0; k < v1.length; k++) + result[k] = a*v1[k]; + + return result; + } + + public static double dotProduct(double v1[], double v2[]) { + if (v1.length != v2.length) + throw new UserException("BUG: vectors v1, v2 of different size in vectorSum()"); + + double result = 0.0; + for (int k=0; k < v1.length; k++) + result += v1[k]*v2[k]; + + return result; + + } + + public static double[] vectorLog10(double v1[]) { + double result[] = new double[v1.length]; + for (int k=0; k < v1.length; k++) + result[k] = Math.log10(v1[k]); + + return result; + + } } From ab8f499bc32982d000d5b9e291659d56927d9bfc Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 18 Jan 2012 22:04:51 -0500 Subject: [PATCH 06/18] Annotate with FS even for filtered sites --- .../sting/gatk/walkers/annotator/FisherStrand.java | 2 +- .../walkers/genotyper/UnifiedGenotyperIntegrationTest.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index c4025a25c..7d728dd5e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -54,7 +54,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat private static final double MIN_PVALUE = 1E-320; public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { - if ( ! vc.isVariant() || vc.isFiltered() ) + if ( !vc.isVariant() ) return null; int[][] table; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 32fa8679e..5cdf12f1b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -44,7 +44,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn2() { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("43e7a17d95b1a0cf72e669657794d802")); + Arrays.asList("1899bdb956c62bbcbf160b18cd3aea60")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -275,7 +275,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("36ce53ae4319718ad9c8ae391deebc8c")); + Arrays.asList("320f61c87253aba77d6dc782242cba8b")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2); } From cf9b1d350a373fde820b4d97b2df3f321646601a Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Thu, 19 Jan 2012 00:20:49 -0500 Subject: [PATCH 07/18] Some minor changes to in-process functions that nobody else uses. CGL now properly ignores no-calls for external VCFs. --- .../sting/queue/library/ipf/vcf/VCFExtractIntervals.scala | 1 + .../sting/queue/library/ipf/vcf/VCFExtractSamples.scala | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala b/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala index 3935c2138..e661f1bb5 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala @@ -44,6 +44,7 @@ class VCFExtractIntervals(inVCF: File, outList: File, useFilterSites: Boolean) e } } } + out.printf("%s%n",cur) out.close } diff --git a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractSamples.scala b/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractSamples.scala index 54e541142..3179c327f 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractSamples.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractSamples.scala @@ -6,7 +6,7 @@ import collection.JavaConversions._ import org.broadinstitute.sting.commandline._ import java.io.{PrintWriter, PrintStream, File} -class VCFExtractSamples(inVCF: File, outVCF: File, samples: List[String]) extends InProcessFunction { +class VCFExtractSamples(inVCF: File, outVCF: File, samples: List[String]) extends InProcessFunction { def this(in: File, out: File, samples: File) = this(in,out, (new XReadLines(samples)).readLines.toList) @Input(doc="VCF from which to extract samples") var inputVCF : File = inVCF From 9946853039d2370d043d284f61356e9eaf66be1f Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Thu, 19 Jan 2012 00:25:22 -0500 Subject: [PATCH 08/18] Remove duplicated line --- .../sting/queue/library/ipf/vcf/VCFExtractIntervals.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala b/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala index e661f1bb5..a7b324a2b 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala @@ -39,8 +39,6 @@ class VCFExtractIntervals(inVCF: File, outList: File, useFilterSites: Boolean) e if ( elems.hasNext ) { prev = cur cur = elems.next - } else { - out.printf("%s%n",cur) } } } From 1e037a0ecf0c40b6ed472dbb5786825e8e9882d2 Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Thu, 19 Jan 2012 00:33:08 -0500 Subject: [PATCH 09/18] Ensure second-to-last line printed --- .../sting/queue/library/ipf/vcf/VCFExtractIntervals.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala b/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala index a7b324a2b..03d31a217 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala @@ -42,6 +42,7 @@ class VCFExtractIntervals(inVCF: File, outList: File, useFilterSites: Boolean) e } } } + out.printf("%s%n",prev) out.printf("%s%n",cur) out.close From 39e6df5aa9d1008a7318fa900137b0c5efa5546d Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Thu, 19 Jan 2012 00:51:28 -0500 Subject: [PATCH 10/18] Fix edge case for very small VCFs --- .../library/ipf/vcf/VCFExtractIntervals.scala | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala b/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala index 03d31a217..3c7cd0a2d 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala @@ -26,24 +26,24 @@ class VCFExtractIntervals(inVCF: File, outList: File, useFilterSites: Boolean) e var cur : String = null if ( elems.hasNext ) { cur = elems.next + while ( elems.hasNext ) { + out.printf("%s%n",prev) + while ( cur.equals(prev) && elems.hasNext && !cur.equals("") ) { + cur = elems.next + } + + if ( ! cur.equals(prev) ) { + if ( elems.hasNext ) { + prev = cur + cur = elems.next + } + } + } + out.printf("%s%n",prev) + out.printf("%s%n",cur) } else { out.printf("%s%n",prev) } - while ( elems.hasNext ) { - out.printf("%s%n",prev) - while ( cur.equals(prev) && elems.hasNext && !cur.equals("") ) { - cur = elems.next - } - - if ( ! cur.equals(prev) ) { - if ( elems.hasNext ) { - prev = cur - cur = elems.next - } - } - } - out.printf("%s%n",prev) - out.printf("%s%n",cur) out.close } From 7f3ad25b0109e91a82cf41378c33830f169fb705 Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Thu, 19 Jan 2012 10:54:48 -0500 Subject: [PATCH 13/18] Adding a mode to VariantFiltration to invalidate previously-applied filters to allow complete re-filtering of a VCF. T2D VQSR: re-calling now done with appropriate quality settings and using BAQ. --- .../gatk/walkers/filters/VariantFiltrationWalker.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 8278dbab7..7eb6fad54 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -139,6 +139,12 @@ public class VariantFiltrationWalker extends RodWalker { @Argument(fullName="missingValuesInExpressionsShouldEvaluateAsFailing", doc="When evaluating the JEXL expressions, missing values should be considered failing the expression", required=false) protected Boolean FAIL_MISSING_VALUES = false; + /** + * Invalidate previous filters applied to the VariantContext, applying only the filters here + */ + @Argument(fullName="invalidatePreviousFilters",doc="Remove previous filters applied to the VCF",required=false) + boolean invalidatePrevious = false; + // JEXL expressions for the filters List filterExps; List genotypeFilterExps; @@ -215,6 +221,9 @@ public class VariantFiltrationWalker extends RodWalker { for ( VariantContext vc : VCs ) { + if ( invalidatePrevious ) { + vc = (new VariantContextBuilder(vc)).filters(new HashSet()).make(); + } // filter based on previous mask position if ( previousMaskPosition != null && // we saw a previous mask site previousMaskPosition.getContig().equals(vc.getChr()) && // it's on the same contig From ced6775de3af5b5df967ea96ada5c73300aa3bc1 Mon Sep 17 00:00:00 2001 From: Aaron McKenna Date: Wed, 18 Jan 2012 16:34:42 -0500 Subject: [PATCH 15/18] Changes to allow for external tests Changes to the build script that allow the external directory to have tests. This means groups like CGA don't have to reinvent the wheel on testing, and can instead use the GATKs unit and integration tests. Signed-off-by: David Roazen --- build.xml | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/build.xml b/build.xml index dbdafa3d9..ffb564cd2 100644 --- a/build.xml +++ b/build.xml @@ -281,6 +281,10 @@ + + + + @@ -331,7 +335,7 @@ - + @@ -761,6 +765,7 @@ + @@ -811,7 +816,23 @@ - + + + + + + + + + + + + + + + + + @@ -852,6 +873,7 @@ + @@ -934,6 +956,9 @@ + + + From 066da80a3d9a83ec5e59cd3e545c6c2bf102a625 Mon Sep 17 00:00:00 2001 From: Menachem Fromer Date: Thu, 19 Jan 2012 18:19:58 -0500 Subject: [PATCH 17/18] Added KEEP_UNCONDTIONAL option which permits even sites with only filtered records to be included as unfiltered sites in the output --- .../sting/utils/variantcontext/VariantContextUtils.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index c9a4965c1..39045ea21 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -464,7 +464,11 @@ public class VariantContextUtils { /** * Requires all records present at site to be unfiltered. VCF files that don't contain the record don't influence this. */ - KEEP_IF_ALL_UNFILTERED + KEEP_IF_ALL_UNFILTERED, + /** + * If any record is present at this site (regardless of possibly being filtered), then all such records are kept and the filters are reset. + */ + KEEP_UNCONDITIONAL } /** @@ -635,7 +639,7 @@ public class VariantContextUtils { } // if at least one record was unfiltered and we want a union, clear all of the filters - if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() ) + if ( (filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size()) || filteredRecordMergeType == FilteredRecordMergeType.KEEP_UNCONDITIONAL ) filters.clear();