diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java index 5b4a660e8..095149bae 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java @@ -154,6 +154,13 @@ public class ReduceReadsWalker extends ReadWalker, Red @Argument(fullName = "dont_compress_read_names", shortName = "nocmp_names", doc = "", required = false) protected boolean DONT_COMPRESS_READ_NAMES = false; + /** + * Optionally hard clip all incoming reads to the desired intervals. The hard clips will happen exactly at the interval + * border. + */ + @Argument(fullName = "hard_clip_to_interval", shortName = "clip_int", doc = "", required = false) + protected boolean HARD_CLIP_TO_INTERVAL = false; + /** * Minimum proportion of mismatches in a site to trigger a variant region. Anything below this will be * considered consensus. @@ -166,7 +173,7 @@ public class ReduceReadsWalker extends ReadWalker, Red * considered consensus. */ @Argument(fullName = "minimum_del_proportion_to_trigger_variant", shortName = "mindel", doc = "", required = false) - protected double minIndelProportionToTriggerVariant = 0.01; + protected double minIndelProportionToTriggerVariant = 0.05; /** * Downsamples the coverage of a variable region approximately (guarantees the minimum to be equal to this). @@ -260,8 +267,14 @@ public class ReduceReadsWalker extends ReadWalker, Red read = ReadClipper.hardClipAdaptorSequence(read); // Strip away adaptor sequences, if any. if (!DONT_CLIP_LOW_QUAL_TAILS) read = ReadClipper.hardClipLowQualEnds(read, minTailQuality); // Clip low quality tails - if (!isWholeGenome()) - mappedReads = hardClipReadToInterval(read); // Hard clip the remainder of the read to the desired interval + if (!isWholeGenome()) { + if (HARD_CLIP_TO_INTERVAL) + mappedReads = hardClipReadToInterval(read); // Hard clip the remainder of the read to the desired interval + else { + mappedReads = new LinkedList(); + mappedReads.add(read); + } + } else { mappedReads = new LinkedList(); if (!read.isEmpty()) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java index f65d383e3..08f7ddd37 100755 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java @@ -21,28 +21,28 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testDefaultCompression() { - RRTest("testDefaultCompression ", L, "4c92d59d4a5292af1f968dc922c2c63e"); + RRTest("testDefaultCompression ", L, "323dd4deabd7767efa0f2c6e7fa4189f"); } @Test(enabled = true) public void testMultipleIntervals() { String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110"; - RRTest("testMultipleIntervals ", intervals, "97d5c3fda5551741676793ba325ec7ed"); + RRTest("testMultipleIntervals ", intervals, "c437fb160547ff271f8eba30e5f3ff76"); } @Test(enabled = true) public void testHighCompression() { - RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "e6bc1cd0e9de961cf0fb1789bf6ab108"); + RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "3a607bc3ebaf84e9dc44e005c5f8a047"); } @Test(enabled = true) public void testLowCompression() { - RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "f33ec7cd0b98eebd73d1025ca656cd7e"); + RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "afd39459c841b68a442abdd5ef5f8f27"); } @Test(enabled = true) public void testIndelCompression() { - RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "e6e2bc889e4f342a7fedc5d38b391d20"); + RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "f7b9fa44c10bc4b2247813d2b8dc1973"); } @Test(enabled = true) @@ -62,7 +62,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testAddingReadAfterTailingTheStash() { String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s "; - executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("022931f032a4122cfe41e58e74d0aede"))); + executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("886b43e1f26ff18425814dc7563931c6"))); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java index e38d7d142..07fbfc3d2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.gatk.samples.SampleDB; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; @@ -33,7 +34,7 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( mendelianViolation == null ) { - if (checkAndSetSamples(((VariantAnnotator) walker).getSampleDB())) { + if (checkAndSetSamples(((Walker) walker).getSampleDB())) { mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP ); } else { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java new file mode 100644 index 000000000..a89586c2c --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BinaryTagCovariate.java @@ -0,0 +1,61 @@ +package org.broadinstitute.sting.gatk.walkers.bqsr; + +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +/** + * Binary covariate allows BQSR to recalibrate based on a binary covariate in the BAM file. This covariate should assume values of 1 and 0. + * + * @author Mauricio Carneiro + * @since 7/6/12 + */ +public class BinaryTagCovariate implements ExperimentalCovariate { + + private String tag; + + @Override + public void initialize(RecalibrationArgumentCollection RAC) { + tag = RAC.BINARY_TAG_NAME; + } + + @Override + public void recordValues(GATKSAMRecord read, ReadCovariates values) { + final Object tagObject = read.getAttribute(tag); + + byte[] binaryTag; + if (tagObject instanceof byte[]) + binaryTag = (byte[]) tagObject; + else if (tagObject instanceof String) { + int readLength = ((String) tagObject).length(); + binaryTag = new byte[readLength]; + for (int i = 0; i reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), vcfCodec, false); VCFHeader header = (VCFHeader)reader.getHeader(); - for ( VCFHeaderLine headerLine : header.getMetaData() ) { + for ( VCFHeaderLine headerLine : header.getMetaDataInInputOrder() ) { String key = headerLine.getKey(); if ( headerLine instanceof VCFIDHeaderLine) key += "_" + ((VCFIDHeaderLine) headerLine).getID(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 3ac09d2a7..f49e78469 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -198,21 +198,19 @@ public class PairHMMIndelErrorModel { } } else { + final int refWindowStart = ref.getWindow().getStart(); + final int refWindowStop = ref.getWindow().getStop(); + if (DEBUG) { System.out.format("Read Name:%s, aln start:%d aln stop:%d orig cigar:%s\n",p.getRead().getReadName(), p.getRead().getAlignmentStart(), p.getRead().getAlignmentEnd(), p.getRead().getCigarString()); } + GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead()); - if (read.isEmpty()) - continue; - - if (read.getSoftEnd() > ref.getWindow().getStop()) + if (!read.isEmpty() && (read.getSoftEnd() > refWindowStop && read.getSoftStart() < refWindowStop)) read = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, ref.getWindow().getStop()); - if (read.isEmpty()) - continue; - - if (read.getSoftStart() < ref.getWindow().getStart()) + if (!read.isEmpty() && (read.getSoftStart() < refWindowStart && read.getSoftEnd() > refWindowStart)) read = ReadClipper.hardClipByReferenceCoordinatesLeftTail (read, ref.getWindow().getStart()); if (read.isEmpty()) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java index 9a22e8cf6..5bbc6dacc 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java @@ -102,7 +102,7 @@ class MergeSegregatingAlternateAllelesVCFWriter implements VariantContextWriter if (useSingleSample != null) { // only want to output context for one sample Set singSampSet = new TreeSet(); singSampSet.add(useSingleSample); - header = new VCFHeader(header.getMetaData(), singSampSet); + header = new VCFHeader(header.getMetaDataInSortedOrder(), singSampSet); } innerWriter.writeHeader(header); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 5bd7cccd2..244c5d109 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -225,10 +225,10 @@ public class VariantRecalibrator extends RodWalker { @Argument(fullName="suppressCommandLineHeader", shortName="suppressCommandLineHeader", doc="If true, do not output the header containing the command line used", required=false) public boolean SUPPRESS_COMMAND_LINE_HEADER = false; - @Hidden @Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records.", required=false) public boolean MERGE_INFO_WITH_MAX_AC = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index 485f3394b..43816b0fa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -63,7 +63,7 @@ public class FilterLiftedVariants extends RodWalker { Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)); Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); - final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaData() : null, samples); + final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaDataInSortedOrder() : null, samples); writer.writeHeader(vcfHeader); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index 484400025..c1755aa00 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -92,7 +92,7 @@ public class LeftAlignVariants extends RodWalker { Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)); Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); - Set headerLines = vcfHeaders.get(trackName).getMetaData(); + Set headerLines = vcfHeaders.get(trackName).getMetaDataInSortedOrder(); baseWriter.writeHeader(new VCFHeader(headerLines, samples)); writer = VariantContextWriterFactory.sortOnTheFly(baseWriter, 200); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 21965afcd..60d41abd5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -95,7 +95,7 @@ public class LiftoverVariants extends RodWalker { Set metaData = new HashSet(); if ( vcfHeaders.containsKey(trackName) ) - metaData.addAll(vcfHeaders.get(trackName).getMetaData()); + metaData.addAll(vcfHeaders.get(trackName).getMetaDataInSortedOrder()); if ( RECORD_ORIGINAL_LOCATION ) { metaData.add(new VCFInfoHeaderLine("OriginalChr", 1, VCFHeaderLineType.String, "Original contig name for the record")); metaData.add(new VCFInfoHeaderLine("OriginalStart", 1, VCFHeaderLineType.Integer, "Original start position for the record")); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 0503e417a..18b4d0b6c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -385,10 +385,14 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD if ( value == null ) builder.unfiltered(); else { - if ( value instanceof Integer ) + if ( value instanceof Integer ) { // fast path for single integer result - builder.filter(getDictionaryString((Integer)value)); - else { + final String filterString = getDictionaryString((Integer)value); + if ( VCFConstants.PASSES_FILTERS_v4.equals(filterString)) + builder.passFilters(); + else + builder.filter(filterString); + } else { for ( final int offset : (List)value ) builder.filter(getDictionaryString(offset)); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java index 21deb4158..143ab52df 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java @@ -26,10 +26,7 @@ package org.broadinstitute.sting.utils.codecs.bcf2; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFIDHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.*; @@ -84,8 +81,8 @@ public final class BCF2Utils { boolean sawPASS = false; // set up the strings dictionary - for ( VCFHeaderLine line : header.getMetaData() ) { - if ( line instanceof VCFIDHeaderLine) { + for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) { + if ( line instanceof VCFIDHeaderLine && ! (line instanceof VCFContigHeaderLine) ) { final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line; if ( ! seen.contains(idLine.getID())) { sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 5ad939e76..b3420514b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -115,7 +115,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec protected VCFHeader parseHeaderFromLines( final List headerStrings, final VCFHeaderVersion version ) { this.version = version; - Set metaData = new TreeSet(); + Set metaData = new LinkedHashSet(); Set sampleNames = new LinkedHashSet(); int contigCounter = 0; // iterate over all the passed in strings diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java index 296be7873..7a9329583 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java @@ -52,7 +52,7 @@ public class VCFHeader { } // the associated meta data - private final Set mMetaData = new TreeSet(); + private final Set mMetaData = new LinkedHashSet(); private final Map mInfoMetaData = new HashMap(); private final Map mFormatMetaData = new HashMap(); private final Map mFilterMetaData = new HashMap(); @@ -230,14 +230,22 @@ public class VCFHeader { } /** - * get the meta data, associated with this header + * get the meta data, associated with this header, in sorted order * * @return a set of the meta data */ - public Set getMetaData() { - Set lines = new LinkedHashSet(); + public Set getMetaDataInInputOrder() { + return makeGetMetaDataSet(mMetaData); + } + + public Set getMetaDataInSortedOrder() { + return makeGetMetaDataSet(new TreeSet(mMetaData)); + } + + private static Set makeGetMetaDataSet(final Set headerLinesInSomeOrder) { + final Set lines = new LinkedHashSet(); lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_1.getFormatString(), VCFHeaderVersion.VCF4_1.getVersionString())); - lines.addAll(mMetaData); + lines.addAll(headerLinesInSomeOrder); return Collections.unmodifiableSet(lines); } @@ -247,7 +255,7 @@ public class VCFHeader { * @return */ public VCFHeaderLine getMetaDataLine(final String key) { - for (final VCFHeaderLine line: getMetaData()) { + for (final VCFHeaderLine line: mMetaData) { if ( line.getKey().equals(key) ) return line; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFStandardHeaderLines.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFStandardHeaderLines.java index 40d22f46f..b2e8cc100 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFStandardHeaderLines.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFStandardHeaderLines.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.utils.codecs.vcf; import com.google.java.contract.Ensures; -import com.google.java.contract.Invariant; import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -60,8 +59,8 @@ public class VCFStandardHeaderLines { @Requires("header != null") @Ensures("result != null") public static VCFHeader repairStandardHeaderLines(final VCFHeader header) { - final Set newLines = new LinkedHashSet(header.getMetaData().size()); - for ( VCFHeaderLine line : header.getMetaData() ) { + final Set newLines = new LinkedHashSet(header.getMetaDataInInputOrder().size()); + for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) { if ( line instanceof VCFFormatHeaderLine ) { line = formatStandards.repair((VCFFormatHeaderLine) line); } else if ( line instanceof VCFInfoHeaderLine) { diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java index dc7bcd926..f80b0eae4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java @@ -25,8 +25,6 @@ package org.broadinstitute.sting.utils.codecs.vcf; -import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; @@ -34,7 +32,6 @@ import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; @@ -129,7 +126,7 @@ public class VCFUtils { if ( source.getRecordType().equals(VariantContext.class)) { VCFHeader header = (VCFHeader)source.getHeader(); if ( header != null ) - fields.addAll(header.getMetaData()); + fields.addAll(header.getMetaDataInSortedOrder()); } } @@ -160,7 +157,7 @@ public class VCFUtils { // todo -- needs to remove all version headers from sources and add its own VCF version line for ( VCFHeader source : headers ) { //System.out.printf("Merging in header %s%n", source); - for ( VCFHeaderLine line : source.getMetaData()) { + for ( VCFHeaderLine line : source.getMetaDataInSortedOrder()) { String key = line.getKey(); if ( line instanceof VCFIDHeaderLine ) @@ -250,7 +247,7 @@ public class VCFUtils { * @param refDict the SAM formatted reference sequence dictionary */ public final static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) { - return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaData(), referenceFile, refDict), oldHeader.getGenotypeSamples()); + return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaDataInInputOrder(), referenceFile, refDict), oldHeader.getGenotypeSamples()); } public final static Set withUpdatedContigsAsLines(final Set oldLines, final File referenceFile, final SAMSequenceDictionary refDict) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java index 0590329c4..f2375f6f9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java @@ -260,6 +260,7 @@ public class VariantContextBuilder { return this; } + @Requires({"filter != null", "!filter.equals(\"PASS\")"}) public VariantContextBuilder filter(final String filter) { if ( this.filters == null ) this.filters = new LinkedHashSet(1); this.filters.add(filter); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 843838972..d7e072980 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -644,7 +644,9 @@ public class VariantContextUtils { if ( setKey != null ) { attributes.put(setKey, setValue); - if( mergeInfoWithMaxAC && vcWithMaxAC != null ) { attributesWithMaxAC.put(setKey, vcWithMaxAC.getSource()); } + if( mergeInfoWithMaxAC && vcWithMaxAC != null ) { + attributesWithMaxAC.put(setKey, setValue); + } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java index e25599812..913615a84 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java @@ -268,7 +268,12 @@ class JEXLMap implements Map { // treat errors as no match jexl.put(exp, value == null ? false : value); } catch (Exception e) { - throw new UserException.CommandLineException(String.format("Invalid JEXL expression detected for %s with message %s", exp.name, e.getMessage())); + // if exception happens because variable is undefined (i.e. field in expression is not present), evaluate to FALSE + // todo - might be safer if we explicitly checked for an exception type, but Apache's API doesn't seem to have that ability + if (e.getMessage().contains("undefined variable")) + jexl.put(exp,false); + else + throw new UserException.CommandLineException(String.format("Invalid JEXL expression detected for %s with message %s", exp.name, e.getMessage())); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index 7d9a18d14..45610bbf9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -109,7 +109,10 @@ class BCF2Writer extends IndexingVariantContextWriter { // -------------------------------------------------------------------------------- @Override - public void writeHeader(final VCFHeader header) { + public void writeHeader(VCFHeader header) { + // make sure the header is sorted correctly + header = new VCFHeader(header.getMetaDataInSortedOrder(), header.getGenotypeSamples()); + // create the config offsets map if ( header.getContigLines().isEmpty() ) { if ( ALLOW_MISSING_CONTIG_LINES ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java index 8ccb79744..4548e026e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java @@ -87,13 +87,13 @@ class VCFWriter extends IndexingVariantContextWriter { final boolean doNotWriteGenotypes, final String versionLine, final String streamNameForError) { - header = doNotWriteGenotypes ? new VCFHeader(header.getMetaData()) : header; + header = doNotWriteGenotypes ? new VCFHeader(header.getMetaDataInSortedOrder()) : header; try { // the file format field needs to be written first writer.write(versionLine + "\n"); - for ( VCFHeaderLine line : header.getMetaData() ) { + for ( VCFHeaderLine line : header.getMetaDataInSortedOrder() ) { if ( VCFHeaderVersion.isFormatString(line.getKey()) ) continue; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index 59eaa177d..e25d65465 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -70,6 +70,19 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testComplexSelection--" + testfile, spec); } + @Test + public void testNonExistingFieldSelection() { + String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString(" -env -ef -select 'foo!=0||DP>0' --variant " + testfile), + 1, + Arrays.asList("44e77cea624cfff2b8acc3a4b30485cb") // should yield empty vcf because the foo!=0 will yield complete expression false + ); + spec.disableShadowBCF(); + executeTest("testNonExistingSelection--" + testfile, spec); + } + @Test public void testSampleExclusion() { String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf"; diff --git a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java index a819e41c7..22989b328 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java @@ -119,7 +119,6 @@ public class ReadClipperUnitTest extends BaseTest { GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i); if (!clipLeft.isEmpty()) { -// System.out.println(String.format("Left Tail [%d]: %s (%d,%d,%d : %d,%d,%d) -> %s (%d,%d,%d : %d,%d,%d)", i, cigar.toString(), read.getUnclippedStart(), read.getSoftStart(), read.getAlignmentStart(), read.getAlignmentEnd(), read.getSoftEnd(), read.getUnclippedEnd(), clipLeft.getCigarString(), clipLeft.getUnclippedStart(), clipLeft.getSoftStart(), clipLeft.getAlignmentStart(), clipLeft.getAlignmentEnd(), clipLeft.getSoftEnd(), clipLeft.getUnclippedEnd())); Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString())); assertUnclippedLimits(read, clipLeft); } @@ -137,7 +136,7 @@ public class ReadClipperUnitTest extends BaseTest { if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side for (int i = alnStart; i <= alnEnd; i++) { GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i); - if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. + if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); assertUnclippedLimits(read, clipRight); } @@ -278,7 +277,6 @@ public class ReadClipperUnitTest extends BaseTest { private void checkClippedReadsForLowQualEnds(GATKSAMRecord read, GATKSAMRecord clippedRead, byte lowQual, int nLowQualBases) { assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed assertNoLowQualBases(clippedRead, lowQual); // Make sure the low qualities are gone - assertNumberOfBases(read, clippedRead, nLowQualBases); // Make sure only low quality bases were clipped } /** @@ -294,12 +292,6 @@ public class ReadClipperUnitTest extends BaseTest { } } - private void assertNumberOfBases(GATKSAMRecord read, GATKSAMRecord clipLeft, int nLowQualBases) { - if (read.getCigarString().contains("M")) - Assert.assertEquals(clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), read.getReadLength() - nLowQualBases, read.getCigarString(), clipLeft.getCigarString())); - } - - private boolean startsWithInsertion(Cigar cigar) { return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0; } diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderUnitTest.java index b8d6f2d1d..62d584ef6 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderUnitTest.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.codecs.vcf; import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.PositionalBufferedStream; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; @@ -26,7 +25,7 @@ public class VCFHeaderUnitTest extends BaseTest { private VCFHeader createHeader(String headerStr) { VCFCodec codec = new VCFCodec(); VCFHeader header = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(headerStr)))); - Assert.assertEquals(header.getMetaData().size(), VCF4headerStringCount); + Assert.assertEquals(header.getMetaDataInInputOrder().size(), VCF4headerStringCount); return header; } @@ -98,7 +97,7 @@ public class VCFHeaderUnitTest extends BaseTest { } catch (IOException e) { Assert.fail("Unable to make a temp file!"); } - for (VCFHeaderLine line : header.getMetaData()) + for (VCFHeaderLine line : header.getMetaDataInSortedOrder()) pw.println(line); pw.close(); Assert.assertEquals(md5SumFile(myTempFile), md5sum); diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index 1e3c799fe..b271d8c84 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -59,7 +59,7 @@ public class VCFIntegrationTest extends WalkerTest { executeTest("Test writing samtools WEx BCF example", spec1); } - @Test(enabled = false) // TODO disabled because current BCF2 is 1 based + @Test(enabled = false) public void testReadingSamtoolsWExBCFExample() { String testVCF = privateTestDir + "ex2.bcf"; String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s "; diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java index 94ed2ce5f..ca4cdf306 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java @@ -106,7 +106,7 @@ public class VariantContextTestProvider { for ( final VariantContext vc : vcs ) if ( vc.hasGenotypes() ) samples.addAll(vc.getSampleNames()); - this.header = samples.isEmpty() ? header : new VCFHeader(header.getMetaData(), samples); + this.header = samples.isEmpty() ? header : new VCFHeader(header.getMetaDataInSortedOrder(), samples); this.vcs = vcs; } @@ -885,12 +885,12 @@ public class VariantContextTestProvider { } public static void assertEquals(final VCFHeader actual, final VCFHeader expected) { - Assert.assertEquals(actual.getMetaData().size(), expected.getMetaData().size(), "No VCF header lines"); + Assert.assertEquals(actual.getMetaDataInSortedOrder().size(), expected.getMetaDataInSortedOrder().size(), "No VCF header lines"); // for some reason set.equals() is returning false but all paired elements are .equals(). Perhaps compare to is busted? - //Assert.assertEquals(actual.getMetaData(), expected.getMetaData()); - final List actualLines = new ArrayList(actual.getMetaData()); - final List expectedLines = new ArrayList(expected.getMetaData()); + //Assert.assertEquals(actual.getMetaDataInInputOrder(), expected.getMetaDataInInputOrder()); + final List actualLines = new ArrayList(actual.getMetaDataInSortedOrder()); + final List expectedLines = new ArrayList(expected.getMetaDataInSortedOrder()); for ( int i = 0; i < actualLines.size(); i++ ) { Assert.assertEquals(actualLines.get(i), expectedLines.get(i), "VCF header lines"); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java index d4e489420..a7fff4559 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java @@ -39,9 +39,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.variantcontext.*; -import org.broadinstitute.sting.utils.variantcontext.writer.VCFWriter; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; @@ -166,7 +163,7 @@ public class VCFWriterUnitTest extends BaseTest { Assert.assertEquals(VCFHeader.HEADER_FIELDS.values()[index], field); index++; } - Assert.assertEquals(header.getMetaData().size(), metaData.size()); + Assert.assertEquals(header.getMetaDataInSortedOrder().size(), metaData.size()); index = 0; for (String key : header.getGenotypeSamples()) { Assert.assertTrue(additionalColumns.contains(key));