diff --git a/build.xml b/build.xml index dbdafa3d9..ffb564cd2 100644 --- a/build.xml +++ b/build.xml @@ -281,6 +281,10 @@ + + + + @@ -331,7 +335,7 @@ - + @@ -761,6 +765,7 @@ + @@ -811,7 +816,23 @@ - + + + + + + + + + + + + + + + + + @@ -852,6 +873,7 @@ + @@ -934,6 +956,9 @@ + + + diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index cb9bc103f..0dda02421 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -54,7 +54,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat private static final double MIN_PVALUE = 1E-320; public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { - if ( ! vc.isVariant() || vc.isFiltered() ) + if ( !vc.isVariant() ) return null; int[][] table; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 8278dbab7..7eb6fad54 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -139,6 +139,12 @@ public class VariantFiltrationWalker extends RodWalker { @Argument(fullName="missingValuesInExpressionsShouldEvaluateAsFailing", doc="When evaluating the JEXL expressions, missing values should be considered failing the expression", required=false) protected Boolean FAIL_MISSING_VALUES = false; + /** + * Invalidate previous filters applied to the VariantContext, applying only the filters here + */ + @Argument(fullName="invalidatePreviousFilters",doc="Remove previous filters applied to the VCF",required=false) + boolean invalidatePrevious = false; + // JEXL expressions for the filters List filterExps; List genotypeFilterExps; @@ -215,6 +221,9 @@ public class VariantFiltrationWalker extends RodWalker { for ( VariantContext vc : VCs ) { + if ( invalidatePrevious ) { + vc = (new VariantContextBuilder(vc)).filters(new HashSet()).make(); + } // filter based on previous mask position if ( previousMaskPosition != null && // we saw a previous mask site previousMaskPosition.getContig().equals(vc.getChr()) && // it's on the same contig diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index 88a9668cc..bdf25419f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -155,7 +155,9 @@ public class CountCovariatesWalker extends LocusWalker> knownSites = Collections.emptyList(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java index 6d2ac643b..378765051 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.List; @@ -126,7 +125,7 @@ public class VariantRecalibratorEngine { iteration++; model.maximizationStep( data ); currentChangeInMixtureCoefficients = model.normalizePMixtureLog10(); - model.expectationStep(data); + model.expectationStep( data ); if( iteration % 5 == 0 ) { // cut down on the number of output lines so that users can read the warning messages logger.info("Finished iteration " + iteration + ". \tCurrent change in mixture coefficients = " + String.format("%.5f", currentChangeInMixtureCoefficients)); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 096085330..af05c0dc4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -105,7 +105,7 @@ public class CombineVariants extends RodWalker { * and each named argument will be labeled as such in the output (i.e., set=name rather than * set=variants2). The order of arguments does not matter unless except for the naming, so * if you provide an rod priority list and no explicit names than variants, variants2, etc - * are techincally order dependent. It is strongly recommended to provide explicit names when + * are technically order dependent. It is strongly recommended to provide explicit names when * a rod priority list is provided. */ @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 408faf61a..2da54ca42 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -1569,4 +1569,48 @@ public class MathUtils { } return shuffled; } + + /** + * Vector operations + */ + public static double[] vectorSum(double v1[], double v2[]) { + if (v1.length != v2.length) + throw new UserException("BUG: vectors v1, v2 of different size in vectorSum()"); + + double result[] = new double[v1.length]; + for (int k=0; k < v1.length; k++) + result[k] = v1[k]+v2[k]; + + return result; + } + + public static double[] scalarTimesIntVector(double a, int[] v1) { + + double result[] = new double[v1.length]; + for (int k=0; k < v1.length; k++) + result[k] = a*v1[k]; + + return result; + } + + public static double dotProduct(double v1[], double v2[]) { + if (v1.length != v2.length) + throw new UserException("BUG: vectors v1, v2 of different size in vectorSum()"); + + double result = 0.0; + for (int k=0; k < v1.length; k++) + result += v1[k]*v2[k]; + + return result; + + } + + public static double[] vectorLog10(double v1[]) { + double result[] = new double[v1.length]; + for (int k=0; k < v1.length; k++) + result[k] = Math.log10(v1[k]); + + return result; + + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index cc0b1ae67..d1e3ce26b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -608,7 +608,7 @@ public class ReadUtils { * Example: Locus => {read1, read2, ..., readN} * * - Second: readToLocusMap -- a HashMap that describes for each read what loci it contributes to the coverage. - * Note: Locus is a boolean array, indexed from 0 (= startLocation) to N (= stopLocation), with true meaning it contributes to the coverage. + * Note: Locus is a boolean array, indexed from 0 (= startLocation) to N (= stopLocation), with value==true meaning it contributes to the coverage. * Example: Read => {true, true, false, ... false} * * @param readList the list of reads to generate the association mappings diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index c9a4965c1..39045ea21 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -464,7 +464,11 @@ public class VariantContextUtils { /** * Requires all records present at site to be unfiltered. VCF files that don't contain the record don't influence this. */ - KEEP_IF_ALL_UNFILTERED + KEEP_IF_ALL_UNFILTERED, + /** + * If any record is present at this site (regardless of possibly being filtered), then all such records are kept and the filters are reset. + */ + KEEP_UNCONDITIONAL } /** @@ -635,7 +639,7 @@ public class VariantContextUtils { } // if at least one record was unfiltered and we want a union, clear all of the filters - if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() ) + if ( (filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size()) || filteredRecordMergeType == FilteredRecordMergeType.KEEP_UNCONDITIONAL ) filters.clear(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 32fa8679e..5cdf12f1b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -44,7 +44,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn2() { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("43e7a17d95b1a0cf72e669657794d802")); + Arrays.asList("1899bdb956c62bbcbf160b18cd3aea60")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -275,7 +275,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("36ce53ae4319718ad9c8ae391deebc8c")); + Arrays.asList("320f61c87253aba77d6dc782242cba8b")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2); } diff --git a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala b/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala index 3935c2138..3c7cd0a2d 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala @@ -26,23 +26,23 @@ class VCFExtractIntervals(inVCF: File, outList: File, useFilterSites: Boolean) e var cur : String = null if ( elems.hasNext ) { cur = elems.next - } else { - out.printf("%s%n",prev) - } - while ( elems.hasNext ) { - out.printf("%s%n",prev) - while ( cur.equals(prev) && elems.hasNext && !cur.equals("") ) { - cur = elems.next - } - - if ( ! cur.equals(prev) ) { - if ( elems.hasNext ) { - prev = cur + while ( elems.hasNext ) { + out.printf("%s%n",prev) + while ( cur.equals(prev) && elems.hasNext && !cur.equals("") ) { cur = elems.next - } else { - out.printf("%s%n",cur) + } + + if ( ! cur.equals(prev) ) { + if ( elems.hasNext ) { + prev = cur + cur = elems.next + } } } + out.printf("%s%n",prev) + out.printf("%s%n",cur) + } else { + out.printf("%s%n",prev) } out.close diff --git a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractSamples.scala b/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractSamples.scala index 54e541142..3179c327f 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractSamples.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractSamples.scala @@ -6,7 +6,7 @@ import collection.JavaConversions._ import org.broadinstitute.sting.commandline._ import java.io.{PrintWriter, PrintStream, File} -class VCFExtractSamples(inVCF: File, outVCF: File, samples: List[String]) extends InProcessFunction { +class VCFExtractSamples(inVCF: File, outVCF: File, samples: List[String]) extends InProcessFunction { def this(in: File, out: File, samples: File) = this(in,out, (new XReadLines(samples)).readLines.toList) @Input(doc="VCF from which to extract samples") var inputVCF : File = inVCF