From f603d4c48c3448c57793ccfc51eeec721a5fb7ec Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 6 Jul 2012 11:48:04 -0400 Subject: [PATCH 01/14] Fixing PairHMMIndelErrorModel boundary issue When checking the limits of a read to clip, it wasn't considering reads that may already been clipped before. --- .../walkers/indels/PairHMMIndelErrorModel.java | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 3ac09d2a7..f49e78469 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -198,21 +198,19 @@ public class PairHMMIndelErrorModel { } } else { + final int refWindowStart = ref.getWindow().getStart(); + final int refWindowStop = ref.getWindow().getStop(); + if (DEBUG) { System.out.format("Read Name:%s, aln start:%d aln stop:%d orig cigar:%s\n",p.getRead().getReadName(), p.getRead().getAlignmentStart(), p.getRead().getAlignmentEnd(), p.getRead().getCigarString()); } + GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead()); - if (read.isEmpty()) - continue; - - if (read.getSoftEnd() > ref.getWindow().getStop()) + if (!read.isEmpty() && (read.getSoftEnd() > refWindowStop && read.getSoftStart() < refWindowStop)) read = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, ref.getWindow().getStop()); - if (read.isEmpty()) - continue; - - if (read.getSoftStart() < ref.getWindow().getStart()) + if (!read.isEmpty() && (read.getSoftStart() < refWindowStart && read.getSoftEnd() > refWindowStart)) read = ReadClipper.hardClipByReferenceCoordinatesLeftTail (read, ref.getWindow().getStart()); if (read.isEmpty()) From e93b025b393af24dabbaf336ed405482635645a6 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 6 Jul 2012 12:07:57 -0400 Subject: [PATCH 02/14] Fixing unit test with the new clipping behavior for weird cigars, we no longer can assert the final number of bases in the unit test, so I'm taking this bit off the unit test. --- .../sting/utils/clipping/ReadClipperUnitTest.java | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java index a819e41c7..22989b328 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java @@ -119,7 +119,6 @@ public class ReadClipperUnitTest extends BaseTest { GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i); if (!clipLeft.isEmpty()) { -// System.out.println(String.format("Left Tail [%d]: %s (%d,%d,%d : %d,%d,%d) -> %s (%d,%d,%d : %d,%d,%d)", i, cigar.toString(), read.getUnclippedStart(), read.getSoftStart(), read.getAlignmentStart(), read.getAlignmentEnd(), read.getSoftEnd(), read.getUnclippedEnd(), clipLeft.getCigarString(), clipLeft.getUnclippedStart(), clipLeft.getSoftStart(), clipLeft.getAlignmentStart(), clipLeft.getAlignmentEnd(), clipLeft.getSoftEnd(), clipLeft.getUnclippedEnd())); Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString())); assertUnclippedLimits(read, clipLeft); } @@ -137,7 +136,7 @@ public class ReadClipperUnitTest extends BaseTest { if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side for (int i = alnStart; i <= alnEnd; i++) { GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i); - if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. + if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); assertUnclippedLimits(read, clipRight); } @@ -278,7 +277,6 @@ public class ReadClipperUnitTest extends BaseTest { private void checkClippedReadsForLowQualEnds(GATKSAMRecord read, GATKSAMRecord clippedRead, byte lowQual, int nLowQualBases) { assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed assertNoLowQualBases(clippedRead, lowQual); // Make sure the low qualities are gone - assertNumberOfBases(read, clippedRead, nLowQualBases); // Make sure only low quality bases were clipped } /** @@ -294,12 +292,6 @@ public class ReadClipperUnitTest extends BaseTest { } } - private void assertNumberOfBases(GATKSAMRecord read, GATKSAMRecord clipLeft, int nLowQualBases) { - if (read.getCigarString().contains("M")) - Assert.assertEquals(clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, read.getCigarString(), clipLeft.getCigarString())); - } - - private boolean startsWithInsertion(Cigar cigar) { return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0; } From 125e6c1a475a25388cd2ad32a3f2397772673d65 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 6 Jul 2012 15:03:20 -0400 Subject: [PATCH 03/14] added BinaryTagCovariate for ancient dna analysis --- .../gatk/walkers/bqsr/BinaryTagCovariate.java | 61 +++++++++++++++++++ .../sting/gatk/walkers/bqsr/Covariate.java | 2 +- .../bqsr/RecalibrationArgumentCollection.java | 7 +++ .../walkers/bqsr/RecalibrationReport.java | 3 + 4 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java new file mode 100644 index 000000000..424d076b7 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java @@ -0,0 +1,61 @@ +package org.broadinstitute.sting.gatk.walkers.bqsr; + +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +/** + * Binary covariate allows BQSR to recalibrate based on a binary covariate in the BAM file. This covariate should assume values of 1 and 0. + * + * @author Mauricio Carneiro + * @since 7/6/12 + */ +public class BinaryTagCovariate implements StandardCovariate { + + private String tag; + + @Override + public void initialize(RecalibrationArgumentCollection RAC) { + tag = RAC.BINARY_TAG_NAME; + } + + @Override + public void recordValues(GATKSAMRecord read, ReadCovariates values) { + final Object tagObject = read.getAttribute(tag); + + byte[] binaryTag; + if (tagObject instanceof byte[]) + binaryTag = (byte[]) tagObject; + else if (tagObject instanceof String) { + int readLength = ((String) tagObject).length(); + binaryTag = new byte[readLength]; + for (int i = 0; i Date: Sun, 8 Jul 2012 10:24:12 -0400 Subject: [PATCH 05/14] Update VariantRecalibator error message to use -resource not old -B syntax --- .../walkers/variantrecalibration/VariantRecalibrator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 5bd7cccd2..d1eced5eb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -225,10 +225,10 @@ public class VariantRecalibrator extends RodWalker Date: Sun, 8 Jul 2012 10:29:13 -0400 Subject: [PATCH 06/14] Fixed nasty Rscript typo in VariantRecalibrator when compactPDF is available --- .../gatk/walkers/variantrecalibration/VariantRecalibrator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index d1eced5eb..244c5d109 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -463,7 +463,7 @@ public class VariantRecalibrator extends RodWalker Date: Sun, 8 Jul 2012 11:41:07 -0400 Subject: [PATCH 07/14] Bugfix for set key in mergeInfoWithMaxAC. -- Previous version was always setting set=source of info with highest AC. Should actually have been set to the set annotation value itself. --- .../sting/utils/variantcontext/VariantContextUtils.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 843838972..d7e072980 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -644,7 +644,9 @@ public class VariantContextUtils { if ( setKey != null ) { attributes.put(setKey, setValue); - if( mergeInfoWithMaxAC && vcWithMaxAC != null ) { attributesWithMaxAC.put(setKey, vcWithMaxAC.getSource()); } + if( mergeInfoWithMaxAC && vcWithMaxAC != null ) { + attributesWithMaxAC.put(setKey, setValue); + } } } From 63f5262e456465068537992743934506f6b9d7d3 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 8 Jul 2012 11:41:47 -0400 Subject: [PATCH 08/14] mergeInfoWithMaxAC is no longer hidden in CombineVariants --- .../sting/gatk/walkers/variantutils/CombineVariants.java | 1 - 1 file changed, 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 629c7f84c..94fa29173 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -166,7 +166,6 @@ public class CombineVariants extends RodWalker { @Argument(fullName="suppressCommandLineHeader", shortName="suppressCommandLineHeader", doc="If true, do not output the header containing the command line used", required=false) public boolean SUPPRESS_COMMAND_LINE_HEADER = false; - @Hidden @Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records.", required=false) public boolean MERGE_INFO_WITH_MAX_AC = false; From 5b0ade67c8b56b1c879f1a67344d880105da7854 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 8 Jul 2012 15:43:03 -0700 Subject: [PATCH 09/14] Updates to VCF processing for better BCF processing -- getMetaData now split into getMetaDataInSortedOrder() [old functionality] and getMetaDataInOriginalOrder() [according to the header order]. Important as BCF uses the order of elements in the header in the offsets to keys, and we were automatically sorting the BCF2 header which is out of order in samtools and the whole system was going crazy -- Updating GATK code to use the appropriate header function (this is why so many files have changed) -- BCF2 code was busted in not differentiating PASS from . from FILTER in VC (tests coming that will actually stress this) -- Bugfix for adding contig lines to BCF2 header dictionary -- VCFHeader metaData no longer sorted internally. The system now maintains the data in header order, and only sorts output as requested in API -- VCFWriter and BCF2Writer now explictly sort their header lines -- Don't allow filters to be added that are PASS in the contract --- .../walkers/diffengine/VCFDiffableReader.java | 5 +---- ...eSegregatingAlternateAllelesVCFWriter.java | 2 +- .../variantutils/FilterLiftedVariants.java | 2 +- .../variantutils/LeftAlignVariants.java | 2 +- .../variantutils/LiftoverVariants.java | 2 +- .../sting/utils/codecs/bcf2/BCF2Codec.java | 10 +++++++--- .../sting/utils/codecs/bcf2/BCF2Utils.java | 9 +++------ .../utils/codecs/vcf/AbstractVCFCodec.java | 2 +- .../sting/utils/codecs/vcf/VCFHeader.java | 20 +++++++++++++------ .../codecs/vcf/VCFStandardHeaderLines.java | 5 ++--- .../sting/utils/codecs/vcf/VCFUtils.java | 9 +++------ .../variantcontext/VariantContextBuilder.java | 1 + .../variantcontext/writer/BCF2Writer.java | 5 ++++- .../variantcontext/writer/VCFWriter.java | 4 ++-- .../utils/codecs/vcf/VCFHeaderUnitTest.java | 5 ++--- .../utils/codecs/vcf/VCFIntegrationTest.java | 2 +- .../VariantContextTestProvider.java | 10 +++++----- .../writer/VCFWriterUnitTest.java | 5 +---- 18 files changed, 51 insertions(+), 49 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java index e3f9b8ee3..5726eeed9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -27,15 +27,12 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; import org.apache.log4j.Logger; import org.broad.tribble.AbstractFeatureReader; import org.broad.tribble.FeatureReader; -import org.broad.tribble.readers.AsciiLineReader; -import org.broad.tribble.readers.LineReader; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.*; -import java.util.Arrays; import java.util.Iterator; import java.util.Map; @@ -69,7 +66,7 @@ public class VCFDiffableReader implements DiffableReader { FeatureReader reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), vcfCodec, false); VCFHeader header = (VCFHeader)reader.getHeader(); - for ( VCFHeaderLine headerLine : header.getMetaData() ) { + for ( VCFHeaderLine headerLine : header.getMetaDataInInputOrder() ) { String key = headerLine.getKey(); if ( headerLine instanceof VCFIDHeaderLine) key += "_" + ((VCFIDHeaderLine) headerLine).getID(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java index 9a22e8cf6..5bbc6dacc 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java @@ -102,7 +102,7 @@ class MergeSegregatingAlternateAllelesVCFWriter implements VariantContextWriter if (useSingleSample != null) { // only want to output context for one sample Set singSampSet = new TreeSet(); singSampSet.add(useSingleSample); - header = new VCFHeader(header.getMetaData(), singSampSet); + header = new VCFHeader(header.getMetaDataInSortedOrder(), singSampSet); } innerWriter.writeHeader(header); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index 485f3394b..43816b0fa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -63,7 +63,7 @@ public class FilterLiftedVariants extends RodWalker { Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)); Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); - final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaData() : null, samples); + final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaDataInSortedOrder() : null, samples); writer.writeHeader(vcfHeader); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index 484400025..c1755aa00 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -92,7 +92,7 @@ public class LeftAlignVariants extends RodWalker { Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)); Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); - Set headerLines = vcfHeaders.get(trackName).getMetaData(); + Set headerLines = vcfHeaders.get(trackName).getMetaDataInSortedOrder(); baseWriter.writeHeader(new VCFHeader(headerLines, samples)); writer = VariantContextWriterFactory.sortOnTheFly(baseWriter, 200); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 21965afcd..60d41abd5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -95,7 +95,7 @@ public class LiftoverVariants extends RodWalker { Set metaData = new HashSet(); if ( vcfHeaders.containsKey(trackName) ) - metaData.addAll(vcfHeaders.get(trackName).getMetaData()); + metaData.addAll(vcfHeaders.get(trackName).getMetaDataInSortedOrder()); if ( RECORD_ORIGINAL_LOCATION ) { metaData.add(new VCFInfoHeaderLine("OriginalChr", 1, VCFHeaderLineType.String, "Original contig name for the record")); metaData.add(new VCFInfoHeaderLine("OriginalStart", 1, VCFHeaderLineType.Integer, "Original start position for the record")); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 0503e417a..18b4d0b6c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -385,10 +385,14 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD if ( value == null ) builder.unfiltered(); else { - if ( value instanceof Integer ) + if ( value instanceof Integer ) { // fast path for single integer result - builder.filter(getDictionaryString((Integer)value)); - else { + final String filterString = getDictionaryString((Integer)value); + if ( VCFConstants.PASSES_FILTERS_v4.equals(filterString)) + builder.passFilters(); + else + builder.filter(filterString); + } else { for ( final int offset : (List)value ) builder.filter(getDictionaryString(offset)); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java index 21deb4158..143ab52df 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java @@ -26,10 +26,7 @@ package org.broadinstitute.sting.utils.codecs.bcf2; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFIDHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.*; @@ -84,8 +81,8 @@ public final class BCF2Utils { boolean sawPASS = false; // set up the strings dictionary - for ( VCFHeaderLine line : header.getMetaData() ) { - if ( line instanceof VCFIDHeaderLine) { + for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) { + if ( line instanceof VCFIDHeaderLine && ! (line instanceof VCFContigHeaderLine) ) { final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line; if ( ! seen.contains(idLine.getID())) { sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 5ad939e76..b3420514b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -115,7 +115,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec protected VCFHeader parseHeaderFromLines( final List headerStrings, final VCFHeaderVersion version ) { this.version = version; - Set metaData = new TreeSet(); + Set metaData = new LinkedHashSet(); Set sampleNames = new LinkedHashSet(); int contigCounter = 0; // iterate over all the passed in strings diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java index 296be7873..7a9329583 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java @@ -52,7 +52,7 @@ public class VCFHeader { } // the associated meta data - private final Set mMetaData = new TreeSet(); + private final Set mMetaData = new LinkedHashSet(); private final Map mInfoMetaData = new HashMap(); private final Map mFormatMetaData = new HashMap(); private final Map mFilterMetaData = new HashMap(); @@ -230,14 +230,22 @@ public class VCFHeader { } /** - * get the meta data, associated with this header + * get the meta data, associated with this header, in sorted order * * @return a set of the meta data */ - public Set getMetaData() { - Set lines = new LinkedHashSet(); + public Set getMetaDataInInputOrder() { + return makeGetMetaDataSet(mMetaData); + } + + public Set getMetaDataInSortedOrder() { + return makeGetMetaDataSet(new TreeSet(mMetaData)); + } + + private static Set makeGetMetaDataSet(final Set headerLinesInSomeOrder) { + final Set lines = new LinkedHashSet(); lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_1.getFormatString(), VCFHeaderVersion.VCF4_1.getVersionString())); - lines.addAll(mMetaData); + lines.addAll(headerLinesInSomeOrder); return Collections.unmodifiableSet(lines); } @@ -247,7 +255,7 @@ public class VCFHeader { * @return */ public VCFHeaderLine getMetaDataLine(final String key) { - for (final VCFHeaderLine line: getMetaData()) { + for (final VCFHeaderLine line: mMetaData) { if ( line.getKey().equals(key) ) return line; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFStandardHeaderLines.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFStandardHeaderLines.java index 40d22f46f..b2e8cc100 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFStandardHeaderLines.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFStandardHeaderLines.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.utils.codecs.vcf; import com.google.java.contract.Ensures; -import com.google.java.contract.Invariant; import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -60,8 +59,8 @@ public class VCFStandardHeaderLines { @Requires("header != null") @Ensures("result != null") public static VCFHeader repairStandardHeaderLines(final VCFHeader header) { - final Set newLines = new LinkedHashSet(header.getMetaData().size()); - for ( VCFHeaderLine line : header.getMetaData() ) { + final Set newLines = new LinkedHashSet(header.getMetaDataInInputOrder().size()); + for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) { if ( line instanceof VCFFormatHeaderLine ) { line = formatStandards.repair((VCFFormatHeaderLine) line); } else if ( line instanceof VCFInfoHeaderLine) { diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java index dc7bcd926..f80b0eae4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java @@ -25,8 +25,6 @@ package org.broadinstitute.sting.utils.codecs.vcf; -import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; @@ -34,7 +32,6 @@ import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; @@ -129,7 +126,7 @@ public class VCFUtils { if ( source.getRecordType().equals(VariantContext.class)) { VCFHeader header = (VCFHeader)source.getHeader(); if ( header != null ) - fields.addAll(header.getMetaData()); + fields.addAll(header.getMetaDataInSortedOrder()); } } @@ -160,7 +157,7 @@ public class VCFUtils { // todo -- needs to remove all version headers from sources and add its own VCF version line for ( VCFHeader source : headers ) { //System.out.printf("Merging in header %s%n", source); - for ( VCFHeaderLine line : source.getMetaData()) { + for ( VCFHeaderLine line : source.getMetaDataInSortedOrder()) { String key = line.getKey(); if ( line instanceof VCFIDHeaderLine ) @@ -250,7 +247,7 @@ public class VCFUtils { * @param refDict the SAM formatted reference sequence dictionary */ public final static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) { - return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaData(), referenceFile, refDict), oldHeader.getGenotypeSamples()); + return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaDataInInputOrder(), referenceFile, refDict), oldHeader.getGenotypeSamples()); } public final static Set withUpdatedContigsAsLines(final Set oldLines, final File referenceFile, final SAMSequenceDictionary refDict) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java index 0590329c4..f2375f6f9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java @@ -260,6 +260,7 @@ public class VariantContextBuilder { return this; } + @Requires({"filter != null", "!filter.equals(\"PASS\")"}) public VariantContextBuilder filter(final String filter) { if ( this.filters == null ) this.filters = new LinkedHashSet(1); this.filters.add(filter); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index 7d9a18d14..45610bbf9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -109,7 +109,10 @@ class BCF2Writer extends IndexingVariantContextWriter { // -------------------------------------------------------------------------------- @Override - public void writeHeader(final VCFHeader header) { + public void writeHeader(VCFHeader header) { + // make sure the header is sorted correctly + header = new VCFHeader(header.getMetaDataInSortedOrder(), header.getGenotypeSamples()); + // create the config offsets map if ( header.getContigLines().isEmpty() ) { if ( ALLOW_MISSING_CONTIG_LINES ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java index 8ccb79744..4548e026e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java @@ -87,13 +87,13 @@ class VCFWriter extends IndexingVariantContextWriter { final boolean doNotWriteGenotypes, final String versionLine, final String streamNameForError) { - header = doNotWriteGenotypes ? new VCFHeader(header.getMetaData()) : header; + header = doNotWriteGenotypes ? new VCFHeader(header.getMetaDataInSortedOrder()) : header; try { // the file format field needs to be written first writer.write(versionLine + "\n"); - for ( VCFHeaderLine line : header.getMetaData() ) { + for ( VCFHeaderLine line : header.getMetaDataInSortedOrder() ) { if ( VCFHeaderVersion.isFormatString(line.getKey()) ) continue; diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderUnitTest.java index b8d6f2d1d..62d584ef6 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderUnitTest.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.codecs.vcf; import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.PositionalBufferedStream; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; @@ -26,7 +25,7 @@ public class VCFHeaderUnitTest extends BaseTest { private VCFHeader createHeader(String headerStr) { VCFCodec codec = new VCFCodec(); VCFHeader header = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(headerStr)))); - Assert.assertEquals(header.getMetaData().size(), VCF4headerStringCount); + Assert.assertEquals(header.getMetaDataInInputOrder().size(), VCF4headerStringCount); return header; } @@ -98,7 +97,7 @@ public class VCFHeaderUnitTest extends BaseTest { } catch (IOException e) { Assert.fail("Unable to make a temp file!"); } - for (VCFHeaderLine line : header.getMetaData()) + for (VCFHeaderLine line : header.getMetaDataInSortedOrder()) pw.println(line); pw.close(); Assert.assertEquals(md5SumFile(myTempFile), md5sum); diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index 1e3c799fe..b271d8c84 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -59,7 +59,7 @@ public class VCFIntegrationTest extends WalkerTest { executeTest("Test writing samtools WEx BCF example", spec1); } - @Test(enabled = false) // TODO disabled because current BCF2 is 1 based + @Test(enabled = false) public void testReadingSamtoolsWExBCFExample() { String testVCF = privateTestDir + "ex2.bcf"; String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s "; diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java index 94ed2ce5f..ca4cdf306 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java @@ -106,7 +106,7 @@ public class VariantContextTestProvider { for ( final VariantContext vc : vcs ) if ( vc.hasGenotypes() ) samples.addAll(vc.getSampleNames()); - this.header = samples.isEmpty() ? header : new VCFHeader(header.getMetaData(), samples); + this.header = samples.isEmpty() ? header : new VCFHeader(header.getMetaDataInSortedOrder(), samples); this.vcs = vcs; } @@ -885,12 +885,12 @@ public class VariantContextTestProvider { } public static void assertEquals(final VCFHeader actual, final VCFHeader expected) { - Assert.assertEquals(actual.getMetaData().size(), expected.getMetaData().size(), "No VCF header lines"); + Assert.assertEquals(actual.getMetaDataInSortedOrder().size(), expected.getMetaDataInSortedOrder().size(), "No VCF header lines"); // for some reason set.equals() is returning false but all paired elements are .equals(). Perhaps compare to is busted? - //Assert.assertEquals(actual.getMetaData(), expected.getMetaData()); - final List actualLines = new ArrayList(actual.getMetaData()); - final List expectedLines = new ArrayList(expected.getMetaData()); + //Assert.assertEquals(actual.getMetaDataInInputOrder(), expected.getMetaDataInInputOrder()); + final List actualLines = new ArrayList(actual.getMetaDataInSortedOrder()); + final List expectedLines = new ArrayList(expected.getMetaDataInSortedOrder()); for ( int i = 0; i < actualLines.size(); i++ ) { Assert.assertEquals(actualLines.get(i), expectedLines.get(i), "VCF header lines"); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java index d4e489420..a7fff4559 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java @@ -39,9 +39,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.variantcontext.*; -import org.broadinstitute.sting.utils.variantcontext.writer.VCFWriter; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; @@ -166,7 +163,7 @@ public class VCFWriterUnitTest extends BaseTest { Assert.assertEquals(VCFHeader.HEADER_FIELDS.values()[index], field); index++; } - Assert.assertEquals(header.getMetaData().size(), metaData.size()); + Assert.assertEquals(header.getMetaDataInSortedOrder().size(), metaData.size()); index = 0; for (String key : header.getGenotypeSamples()) { Assert.assertTrue(additionalColumns.contains(key)); From dd0c47ab7e60abf38117e232d1c2ec8706652ed4 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 9 Jul 2012 10:23:26 -0400 Subject: [PATCH 10/14] Don't cast to a specific walker type since any walker can use the VA engine --- .../sting/gatk/walkers/annotator/MVLikelihoodRatio.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java index e38d7d142..07fbfc3d2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.gatk.samples.SampleDB; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; @@ -33,7 +34,7 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( mendelianViolation == null ) { - if (checkAndSetSamples(((VariantAnnotator) walker).getSampleDB())) { + if (checkAndSetSamples(((Walker) walker).getSampleDB())) { mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP ); } else { From 6c17c50fa27939ac93798cd477b240cb3894608b Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 9 Jul 2012 13:46:31 -0400 Subject: [PATCH 11/14] Updates to ReduceReads * Added optional parameter to not hard clip on the interval border * Made not clipping the default behavior (hence integration tests changed) * Updated integration tests. --- .../reducereads/ReduceReadsWalker.java | 17 +++++++++++++++-- .../reducereads/ReduceReadsIntegrationTest.java | 12 ++++++------ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java index 5b4a660e8..51993e1f2 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java @@ -154,6 +154,13 @@ public class ReduceReadsWalker extends ReadWalker, Red @Argument(fullName = "dont_compress_read_names", shortName = "nocmp_names", doc = "", required = false) protected boolean DONT_COMPRESS_READ_NAMES = false; + /** + * Optionally hard clip all incoming reads to the desired intervals. The hard clips will happen exactly at the interval + * border. + */ + @Argument(fullName = "hard_clip_to_interval", shortName = "clip_int", doc = "", required = false) + protected boolean HARD_CLIP_TO_INTERVAL = false; + /** * Minimum proportion of mismatches in a site to trigger a variant region. Anything below this will be * considered consensus. @@ -260,8 +267,14 @@ public class ReduceReadsWalker extends ReadWalker, Red read = ReadClipper.hardClipAdaptorSequence(read); // Strip away adaptor sequences, if any. if (!DONT_CLIP_LOW_QUAL_TAILS) read = ReadClipper.hardClipLowQualEnds(read, minTailQuality); // Clip low quality tails - if (!isWholeGenome()) - mappedReads = hardClipReadToInterval(read); // Hard clip the remainder of the read to the desired interval + if (!isWholeGenome()) { + if (HARD_CLIP_TO_INTERVAL) + mappedReads = hardClipReadToInterval(read); // Hard clip the remainder of the read to the desired interval + else { + mappedReads = new LinkedList(); + mappedReads.add(read); + } + } else { mappedReads = new LinkedList(); if (!read.isEmpty()) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java index f65d383e3..0a62afdc0 100755 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java @@ -21,28 +21,28 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testDefaultCompression() { - RRTest("testDefaultCompression ", L, "4c92d59d4a5292af1f968dc922c2c63e"); + RRTest("testDefaultCompression ", L, "887d05e3bdbe831a2584305088806a39"); } @Test(enabled = true) public void testMultipleIntervals() { String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110"; - RRTest("testMultipleIntervals ", intervals, "97d5c3fda5551741676793ba325ec7ed"); + RRTest("testMultipleIntervals ", intervals, "04208b8e2c2e65bcb9d0eb038702b006"); } @Test(enabled = true) public void testHighCompression() { - RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "e6bc1cd0e9de961cf0fb1789bf6ab108"); + RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "3a607bc3ebaf84e9dc44e005c5f8a047"); } @Test(enabled = true) public void testLowCompression() { - RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "f33ec7cd0b98eebd73d1025ca656cd7e"); + RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "afd39459c841b68a442abdd5ef5f8f27"); } @Test(enabled = true) public void testIndelCompression() { - RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "e6e2bc889e4f342a7fedc5d38b391d20"); + RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "f7b9fa44c10bc4b2247813d2b8dc1973"); } @Test(enabled = true) @@ -62,7 +62,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testAddingReadAfterTailingTheStash() { String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s "; - executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("022931f032a4122cfe41e58e74d0aede"))); + executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("886b43e1f26ff18425814dc7563931c6"))); } } From 7eb45b40380879ec78a1d33f351e4ff09a27edcd Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 9 Jul 2012 13:55:12 -0400 Subject: [PATCH 12/14] Fixed BQSR IntegrationTests * BinaryTag covariate is Experimental, not Standard (this was breaking integration tests) * New parameter in the Recalibration report requires new MD5 for one of the integration tests. --- .../sting/gatk/walkers/bqsr/BinaryTagCovariate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java index 424d076b7..a89586c2c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java @@ -9,7 +9,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; * @author Mauricio Carneiro * @since 7/6/12 */ -public class BinaryTagCovariate implements StandardCovariate { +public class BinaryTagCovariate implements ExperimentalCovariate { private String tag; From d7bf74fb7eed48f018cd2c45b88eb69ab694ddad Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 10 Jul 2012 02:04:26 -0400 Subject: [PATCH 13/14] Updating default value for -mindel to the one used by Khalid in the pipeline and me in my tests. --- .../walkers/compression/reducereads/ReduceReadsWalker.java | 2 +- .../compression/reducereads/ReduceReadsIntegrationTest.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java index 51993e1f2..095149bae 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java @@ -173,7 +173,7 @@ public class ReduceReadsWalker extends ReadWalker, Red * considered consensus. */ @Argument(fullName = "minimum_del_proportion_to_trigger_variant", shortName = "mindel", doc = "", required = false) - protected double minIndelProportionToTriggerVariant = 0.01; + protected double minIndelProportionToTriggerVariant = 0.05; /** * Downsamples the coverage of a variable region approximately (guarantees the minimum to be equal to this). diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java index 0a62afdc0..08f7ddd37 100755 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java @@ -21,13 +21,13 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testDefaultCompression() { - RRTest("testDefaultCompression ", L, "887d05e3bdbe831a2584305088806a39"); + RRTest("testDefaultCompression ", L, "323dd4deabd7767efa0f2c6e7fa4189f"); } @Test(enabled = true) public void testMultipleIntervals() { String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110"; - RRTest("testMultipleIntervals ", intervals, "04208b8e2c2e65bcb9d0eb038702b006"); + RRTest("testMultipleIntervals ", intervals, "c437fb160547ff271f8eba30e5f3ff76"); } @Test(enabled = true) From 279dff9f81bfdf996159103fad653d126916f350 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Tue, 10 Jul 2012 13:59:00 -0400 Subject: [PATCH 14/14] Bug fix when specifying a JEXL expression for a field that doesn't exist: we should treat the whole expression as false, but we were rethrowing the JEXL exception in this case. Added integration test to cover this in SelectVariants --- .../utils/variantcontext/VariantJEXLContext.java | 7 ++++++- .../variantutils/SelectVariantsIntegrationTest.java | 13 +++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java index e25599812..913615a84 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java @@ -268,7 +268,12 @@ class JEXLMap implements Map { // treat errors as no match jexl.put(exp, value == null ? false : value); } catch (Exception e) { - throw new UserException.CommandLineException(String.format("Invalid JEXL expression detected for %s with message %s", exp.name, e.getMessage())); + // if exception happens because variable is undefined (i.e. field in expression is not present), evaluate to FALSE + // todo - might be safer if we explicitly checked for an exception type, but Apache's API doesn't seem to have that ability + if (e.getMessage().contains("undefined variable")) + jexl.put(exp,false); + else + throw new UserException.CommandLineException(String.format("Invalid JEXL expression detected for %s with message %s", exp.name, e.getMessage())); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index 59eaa177d..e25d65465 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -70,6 +70,19 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testComplexSelection--" + testfile, spec); } + @Test + public void testNonExistingFieldSelection() { + String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString(" -env -ef -select 'foo!=0||DP>0' --variant " + testfile), + 1, + Arrays.asList("44e77cea624cfff2b8acc3a4b30485cb") // should yield empty vcf because the foo!=0 will yield complete expression false + ); + spec.disableShadowBCF(); + executeTest("testNonExistingSelection--" + testfile, spec); + } + @Test public void testSampleExclusion() { String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";