From c3ea96d85616a3ce0b4b90d40e2d141b1efd16cf Mon Sep 17 00:00:00 2001
From: Mark DePristo
* Tables pertaining to different coverage summaries. Suffix on the table files declares the contents: @@ -93,7 +96,7 @@ import java.util.*; *
* java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ - * -T VariantEval \ + * -T DepthOfCoverage \ * -o file_name_base \ * -I input_bams.list * [-geneList refSeq.sorted.txt] \ diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java index d94d9ff84..f142fa5aa 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java @@ -12,19 +12,35 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.ArrayList; /** - * TODO FOR CHRIS HARTL + * Allows for reading in RefSeq information * *- * Codec Description + * Parses a sorted UCSC RefSeq file (see below) into relevant features: the gene name, the unique gene name (if multiple transcrips get separate entries), exons, gene start/stop, coding start/stop, + * strandedness of transcription. *
* *- * See also: link to file specification + * Instructions for generating a RefSeq file for use with the RefSeq codec can be found on the Wiki here + * http://www.broadinstitute.org/gsa/wiki/index.php/RefSeq *
+ *Usage
+ * The RefSeq Rod can be bound as any other rod, and is specified by REFSEQ, for example + *+ * -refSeqBinding:REFSEQ /path/to/refSeq.txt + *+ * + * You will need to consult individual walkers for the binding name ("refSeqBinding", above) * *File format example
+ * If you want to define your own file for use, the format is (tab delimited): + * bin, name, chrom, strand, transcription start, transcription end, coding start, coding end, num exons, exon starts, exon ends, id, alt. name, coding start status (complete/incomplete), coding end status (complete,incomplete) + * and exon frames, for example: + *+ * 76 NM_001011874 1 - 3204562 3661579 3206102 3661429 3 3204562,3411782,3660632, 3207049,3411982,3661579, 0 Xkr4 cmpl cmpl 1,2,0, + *+ * for more information see here *- * A BAM file containing exactly one sample. + * *
* * @author Mark DePristo From 5e832254a4e024378f7fdee252abf7df9e289c6a Mon Sep 17 00:00:00 2001 From: Mauricio CarneiroDate: Mon, 19 Sep 2011 13:28:41 -0400 Subject: [PATCH 16/41] Fixing ReadAndInterval overlap comments. --- .../sting/utils/sam/ReadUtils.java | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 62bbb0307..18fcdabf2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -118,31 +118,40 @@ public class ReadUtils { /** * This enum represents all the different ways in which a read can overlap an interval. * - * NO_OVERLAP: + * NO_OVERLAP_CONTIG: + * read and interval are in different contigs. + * + * NO_OVERLAP_LEFT: + * the read does not overlap the interval. + * + * |----------------| (interval) + * <----------------> (read) + * + * NO_OVERLAP_RIGHT: * the read does not overlap the interval. * * |----------------| (interval) * <----------------> (read) * - * LEFT_OVERLAP: + * OVERLAP_LEFT: * the read starts before the beginning of the interval but ends inside of it * * |----------------| (interval) * <----------------> (read) * - * RIGHT_OVERLAP: + * OVERLAP_RIGHT: * the read starts inside the interval but ends outside of it * * |----------------| (interval) * <----------------> (read) * - * FULL_OVERLAP: + * OVERLAP_LEFT_AND_RIGHT: * the read starts before the interval and ends after the interval * * |-----------| (interval) * <-------------------> (read) * - * CONTAINED: + * OVERLAP_CONTAINED: * the read starts and ends inside the interval * * |----------------| (interval) From ba150570f3d7747256f634a2828ab673a98953f7 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 19 Sep 2011 13:30:32 -0400 Subject: [PATCH 17/41] Updating to use new rod system syntax plus name change for CountRODs --- .../sting/queue/qscripts/GATKResourcesBundle.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index e8b8258c1..036a77b58 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -300,9 +300,9 @@ class GATKResourcesBundle extends QScript { bamFile = bamIn } - class IndexVCF(@Input vcf: File, @Input ref: File) extends CountRod with UNIVERSAL_GATK_ARGS { + class IndexVCF(@Input vcf: File, @Input ref: File) extends CountRODs with UNIVERSAL_GATK_ARGS { //@Output val vcfIndex: File = swapExt(vcf.getParent, vcf, ".vcf", ".vcf.idx") - this.rodBind :+= RodBind(vcf.getName, "VCF", vcf) + this.rod :+= vcf this.reference_sequence = ref } @@ -313,7 +313,7 @@ class GATKResourcesBundle extends QScript { } class MakeDBSNP129(@Input dbsnp: File, @Input ref: File, @Output dbsnp129: File) extends SelectVariants with UNIVERSAL_GATK_ARGS { - this.rodBind :+= RodBind("variant", "VCF", dbsnp) + this.variant = dbsnp this.select ++= List("\"dbSNPBuildID <= 129\"") this.reference_sequence = ref this.out = dbsnp129 From 080c9575470c505e10f7b09d59fa22fcb668867d Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 19 Sep 2011 13:53:08 -0400 Subject: [PATCH 18/41] Fixing contracts for SoftUnclippedEnd utils Now accepts reads that are entirely contained inside an insertion. --- .../broadinstitute/sting/utils/sam/ReadUtils.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 18fcdabf2..2de17db14 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -667,7 +667,7 @@ public class ReadUtils { return ReadAndIntervalOverlap.OVERLAP_RIGHT; } - @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"}) + @Ensures({"(result >= read.getUnclippedStart() && result <= read.getUnclippedEnd()) || readIsEntirelyInsertion(read)"}) public static int getRefCoordSoftUnclippedStart(SAMRecord read) { int start = read.getUnclippedStart(); for (CigarElement cigarElement : read.getCigar().getCigarElements()) { @@ -679,7 +679,7 @@ public class ReadUtils { return start; } - @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"}) + @Ensures({"(result >= read.getUnclippedStart() && result <= read.getUnclippedEnd()) || readIsEntirelyInsertion(read)"}) public static int getRefCoordSoftUnclippedEnd(SAMRecord read) { int stop = read.getUnclippedStart(); int shift = 0; @@ -695,6 +695,14 @@ public class ReadUtils { return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ; } + private static boolean readIsEntirelyInsertion(SAMRecord read) { + for (CigarElement cigarElement : read.getCigar().getCigarElements()) { + if (cigarElement.getOperator() != CigarOperator.INSERTION) + return false; + } + return true; + } + /** * Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region before * the alignment start of the read. From 56106d54ed620965aea0b39052de43c81671c817 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 19 Sep 2011 14:00:00 -0400 Subject: [PATCH 19/41] Changing ReadUtils behavior to comply with GenomeLocParser Now the functions getRefCoordSoftUnclippedStart and getRefCoordSoftUnclippedEnd will return getUnclippedStart if the read is all contained within an insertion. Updated the contracts accordingly. This should give the same behavior as the GenomeLocParser now. --- .../src/org/broadinstitute/sting/utils/sam/ReadUtils.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 2de17db14..5d3ef3086 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -667,7 +667,7 @@ public class ReadUtils { return ReadAndIntervalOverlap.OVERLAP_RIGHT; } - @Ensures({"(result >= read.getUnclippedStart() && result <= read.getUnclippedEnd()) || readIsEntirelyInsertion(read)"}) + @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"}) public static int getRefCoordSoftUnclippedStart(SAMRecord read) { int start = read.getUnclippedStart(); for (CigarElement cigarElement : read.getCigar().getCigarElements()) { @@ -679,9 +679,13 @@ public class ReadUtils { return start; } - @Ensures({"(result >= read.getUnclippedStart() && result <= read.getUnclippedEnd()) || readIsEntirelyInsertion(read)"}) + @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"}) public static int getRefCoordSoftUnclippedEnd(SAMRecord read) { int stop = read.getUnclippedStart(); + + if (readIsEntirelyInsertion(read)) + return stop; + int shift = 0; CigarOperator lastOperator = null; for (CigarElement cigarElement : read.getCigar().getCigarElements()) { From 61b89e236ab13b073a3572e983b6c730efd5331e Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Tue, 20 Sep 2011 00:14:35 -0400 Subject: [PATCH 20/41] To work around potential problem with invalid javax.mail 1.4.1 in ivy cache, added explicit javax.mail 1.4.4 along with build.xml code to remove 1.4.1. --- build.xml | 8 ++++++++ ivy.xml | 6 ++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/build.xml b/build.xml index e5ad9daf0..1f26e7b7a 100644 --- a/build.xml +++ b/build.xml @@ -163,6 +163,14 @@ + + + + diff --git a/ivy.xml b/ivy.xml index 115f4062a..f90b9a010 100644 --- a/ivy.xml +++ b/ivy.xml @@ -15,10 +15,8 @@ - - - +- + From 5d0705acd654cfed6e9bf2ba690a0c75ee5a50d8 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Tue, 20 Sep 2011 09:07:28 -0400 Subject: [PATCH 21/41] Adding quality scores to the VCF records created by the Haplotype Caller --- .../sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java | 1 - 1 file changed, 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index a5cb00a5d..87dd37bf6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -40,7 +40,6 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; -import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileupImpl; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.*; From b7511c5ff3b36e16037bfbbbd17b9fd4c9ea47af Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 20 Sep 2011 10:53:18 -0400 Subject: [PATCH 22/41] Fixed long-standing bug in tribble index creation -- Previously, on the fly indices didn't have dictionary set on the fly, so the GATK would read, add dictionary, and rewrite the index. This is now fixed, so that the on the fly index contains the reference dictionary when first written, avoiding the unnecessary read and write -- Added a GenomeAnalysisEngine and Walker function called getMasterSequenceDictionary() that fetches the reference sequence dictionary. This can be used conveniently everywhere, and is what's written into the Tribble index -- Refactored tribble index utilities from RMDTrackBuilder into IndexDictionaryUtils -- VCFWriter now requires the master sequence dictionary -- Updated walkers that create VCFWriters to provide the master sequence dictionary --- .../sting/gatk/GenomeAnalysisEngine.java | 8 ++ .../gatk/io/storage/VCFWriterStorage.java | 4 +- .../sting/gatk/io/stubs/VCFWriterStub.java | 10 ++ .../gatk/refdata/indexer/RMDIndexer.java | 2 +- .../refdata/tracks/IndexDictionaryUtils.java | 106 ++++++++++++++++ .../gatk/refdata/tracks/RMDTrackBuilder.java | 113 ++++-------------- .../sting/gatk/walkers/Walker.java | 10 ++ .../variantutils/LiftoverVariants.java | 2 +- .../variantutils/RandomlySplitVariants.java | 2 +- .../utils/codecs/vcf/IndexingVCFWriter.java | 38 ++++-- .../utils/codecs/vcf/StandardVCFWriter.java | 26 ++-- .../sting/utils/gcf/GCFWriter.java | 5 +- .../org/broadinstitute/sting/WalkerTest.java | 2 +- .../tracks/RMDTrackBuilderUnitTest.java | 4 +- .../codecs/vcf/IndexFactoryUnitTest.java | 22 +++- .../utils/genotype/vcf/VCFWriterUnitTest.java | 5 +- 16 files changed, 228 insertions(+), 131 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/IndexDictionaryUtils.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 5b9ebd99b..972943e26 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -929,6 +929,14 @@ public class GenomeAnalysisEngine { return readsDataSource.getHeader(reader); } + /** + * Gets the master sequence dictionary for this GATK engine instance + * @return a never-null dictionary listing all of the contigs known to this engine instance + */ + public SAMSequenceDictionary getMasterSequenceDictionary() { + return getReferenceDataSource().getReference().getSequenceDictionary(); + } + /** * Returns data source object encapsulating all essential info and handlers used to traverse * reads; header merger, individual file readers etc can be accessed through the returned data source object. diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java index ebb4cbe66..4ca7b935f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java @@ -46,7 +46,7 @@ public class VCFWriterStorage implements Storage , VCFWriter { else if ( stub.getOutputStream() != null ) { this.file = null; this.stream = stub.getOutputStream(); - writer = new StandardVCFWriter(stream, stub.doNotWriteGenotypes()); + writer = new StandardVCFWriter(stream, stub.getMasterSequenceDictionary(), stub.doNotWriteGenotypes()); } else throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream."); @@ -71,7 +71,7 @@ public class VCFWriterStorage implements Storage , VCFWriter { } // The GATK/Tribble can't currently index block-compressed files on the fly. Disable OTF indexing even if the user explicitly asked for it. - return new StandardVCFWriter(file, this.stream, indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes()); + return new StandardVCFWriter(file, this.stream, stub.getMasterSequenceDictionary(), indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes()); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java index 936243f9d..82cb43634 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.io.stubs; +import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.gatk.CommandLineExecutable; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; @@ -150,6 +151,15 @@ public class VCFWriterStub implements Stub , VCFWriter { return isCompressed; } + /** + * Gets the master sequence dictionary from the engine associated with this stub + * @link GenomeAnalysisEngine.getMasterSequenceDictionary + * @return + */ + public SAMSequenceDictionary getMasterSequenceDictionary() { + return engine.getMasterSequenceDictionary(); + } + /** * Should we tell the VCF writer not to write genotypes? * @return true if the writer should not write genotypes. diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java index 029800aea..9e5a95d10 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java @@ -101,7 +101,7 @@ public class RMDIndexer extends CommandLineProgram { Index index = IndexFactory.createIndex(inputFileSource, codec, approach); // add writing of the sequence dictionary, if supplied - builder.setIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary(), indexFile, false); + builder.validateAndUpdateIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary()); // create the output stream, and write the index LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/IndexDictionaryUtils.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/IndexDictionaryUtils.java new file mode 100644 index 000000000..d133439dc --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/IndexDictionaryUtils.java @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.tracks; + +import net.sf.samtools.SAMSequenceDictionary; +import net.sf.samtools.SAMSequenceRecord; +import org.apache.log4j.Logger; +import org.broad.tribble.index.Index; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; +import org.broadinstitute.sting.utils.SequenceDictionaryUtils; + +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +/** + * Utilities for working with Sequence Dictionaries embedded in tribble indices + * + * @author Your Name + * @since Date created + */ +public class IndexDictionaryUtils { + private final static Logger logger = Logger.getLogger(IndexDictionaryUtils.class); + + // a constant we use for marking sequence dictionary entries in the Tribble index property list + public static final String SequenceDictionaryPropertyPredicate = "DICT:"; + + /** + * get the sequence dictionary from the track, if available. If not, make it from the contig list that is always in the index + * @param index the index file to use + * @return a SAMSequenceDictionary if available, null if unavailable + */ + public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) { + SAMSequenceDictionary dict = new SAMSequenceDictionary(); + for (Map.Entry entry : index.getProperties().entrySet()) { + if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate)) + dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()), + Integer.valueOf(entry.getValue()))); + } + return dict; + } + + /** + * create the sequence dictionary with the contig list; a backup approach + * @param index the index file to use + * @param dict the sequence dictionary to add contigs to + * @return the filled-in sequence dictionary + */ + static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) { + LinkedHashSet seqNames = index.getSequenceNames(); + if (seqNames == null) { + return dict; + } + for (String name : seqNames) { + SAMSequenceRecord seq = new SAMSequenceRecord(name, 0); + dict.addSequence(seq); + } + return dict; + } + + public static void setIndexSequenceDictionary(Index index, SAMSequenceDictionary dict) { + for ( SAMSequenceRecord seq : dict.getSequences() ) { + final String contig = IndexDictionaryUtils.SequenceDictionaryPropertyPredicate + seq.getSequenceName(); + final String length = String.valueOf(seq.getSequenceLength()); + index.addProperty(contig,length); + } + } + + public static void validateTrackSequenceDictionary(final String trackName, + final SAMSequenceDictionary trackDict, + final SAMSequenceDictionary referenceDict, + final ValidationExclusion.TYPE validationExclusionType ) { + // if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation + if (trackDict == null || trackDict.size() == 0) + logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation"); + else { + Set trackSequences = new TreeSet (); + for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences()) + trackSequences.add(dictionaryEntry.getSequenceName()); + SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java index 46eb79aa7..3b4558579 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.refdata.tracks; import net.sf.samtools.SAMSequenceDictionary; -import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; import org.broad.tribble.FeatureCodec; import org.broad.tribble.FeatureSource; @@ -41,7 +40,6 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.SequenceDictionaryUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -52,16 +50,11 @@ import org.broadinstitute.sting.utils.instrumentation.Sizeof; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; -import java.util.LinkedHashSet; -import java.util.Map; -import java.util.Set; -import java.util.TreeSet; - /** - * - * @author aaron + * + * @author aaron * ` * Class RMDTrackBuilder * @@ -76,9 +69,6 @@ public class RMDTrackBuilder { // extends PluginManager { private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class); public final static boolean MEASURE_TRIBBLE_QUERY_PERFORMANCE = false; - // a constant we use for marking sequence dictionary entries in the Tribble index property list - public static final String SequenceDictionaryPropertyPredicate = "DICT:"; - // private sequence dictionary we use to set our tracks with private SAMSequenceDictionary dict = null; @@ -210,13 +200,19 @@ public class RMDTrackBuilder { // extends PluginManager { try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); } catch (ReviewedStingException e) { } - sequenceDictionary = getSequenceDictionaryFromProperties(index); + sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index); // if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match if (sequenceDictionary.size() == 0 && dict != null) { File indexFile = Tribble.indexFile(inputFile); - setIndexSequenceDictionary(inputFile,index,dict,indexFile,true); - sequenceDictionary = getSequenceDictionaryFromProperties(index); + validateAndUpdateIndexSequenceDictionary(inputFile, index, dict); + try { // re-write the index + writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile)); + } catch (IOException e) { + logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK"); + } + + sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index); } if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE ) @@ -363,96 +359,31 @@ public class RMDTrackBuilder { // extends PluginManager { // this can take a while, let them know what we're doing logger.info("Creating Tribble index in memory for file " + inputFile); Index idx = IndexFactory.createIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME); - setIndexSequenceDictionary(inputFile, idx, dict, null, false); + validateAndUpdateIndexSequenceDictionary(inputFile, idx, dict); return idx; } - - // --------------------------------------------------------------------------------------------------------- - // static functions to work with the sequence dictionaries of indexes - // --------------------------------------------------------------------------------------------------------- - - /** - * get the sequence dictionary from the track, if available. If not, make it from the contig list that is always in the index - * @param index the index file to use - * @return a SAMSequenceDictionary if available, null if unavailable - */ - public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) { - SAMSequenceDictionary dict = new SAMSequenceDictionary(); - for (Map.Entry entry : index.getProperties().entrySet()) { - if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate)) - dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()), - Integer.valueOf(entry.getValue()))); - } - return dict; - } - - /** - * create the sequence dictionary with the contig list; a backup approach - * @param index the index file to use - * @param dict the sequence dictionary to add contigs to - * @return the filled-in sequence dictionary - */ - private static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) { - LinkedHashSet seqNames = index.getSequenceNames(); - if (seqNames == null) { - return dict; - } - for (String name : seqNames) { - SAMSequenceRecord seq = new SAMSequenceRecord(name, 0); - dict.addSequence(seq); - } - return dict; - } - /** * set the sequence dictionary of the track. This function checks that the contig listing of the underlying file is compatible. * (that each contig in the index is in the sequence dictionary). * @param inputFile for proper error message formatting. * @param dict the sequence dictionary * @param index the index file - * @param indexFile the index file - * @param rewriteIndex should we rewrite the index when we're done? - * */ - public void setIndexSequenceDictionary(File inputFile, Index index, SAMSequenceDictionary dict, File indexFile, boolean rewriteIndex) { - if (dict == null) return; - - SAMSequenceDictionary currentDict = createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary()); + public void validateAndUpdateIndexSequenceDictionary(final File inputFile, final Index index, final SAMSequenceDictionary dict) { + if (dict == null) throw new ReviewedStingException("BUG: dict cannot be null"); // check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set - validateTrackSequenceDictionary(inputFile.getAbsolutePath(),currentDict,dict); + final SAMSequenceDictionary currentDict = IndexDictionaryUtils.createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary()); + validateTrackSequenceDictionary(inputFile.getAbsolutePath(), currentDict, dict); - for (SAMSequenceRecord seq : currentDict.getSequences()) { - if (dict.getSequence(seq.getSequenceName()) == null) - continue; - index.addProperty(SequenceDictionaryPropertyPredicate + dict.getSequence(seq.getSequenceName()).getSequenceName(), String.valueOf(dict.getSequence(seq.getSequenceName()).getSequenceLength())); - } - // re-write the index - if (rewriteIndex) try { - writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile)); - } catch (IOException e) { - logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK"); - } + // actually update the dictionary in the index + IndexDictionaryUtils.setIndexSequenceDictionary(index, dict); } - public void setIndexSequenceDictionary(Index index, SAMSequenceDictionary dict) { - for ( SAMSequenceRecord seq : dict.getSequences() ) { - final String contig = SequenceDictionaryPropertyPredicate + seq.getSequenceName(); - final String length = String.valueOf(seq.getSequenceLength()); - index.addProperty(contig,length); - } - } - - public void validateTrackSequenceDictionary(String trackName, SAMSequenceDictionary trackDict, SAMSequenceDictionary referenceDict) { - // if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation - if (trackDict == null || trackDict.size() == 0) - logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation"); - else { - Set trackSequences = new TreeSet (); - for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences()) - trackSequences.add(dictionaryEntry.getSequenceName()); - SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict); - } + public void validateTrackSequenceDictionary(final String trackName, + final SAMSequenceDictionary trackDict, + final SAMSequenceDictionary referenceDict ) { + IndexDictionaryUtils.validateTrackSequenceDictionary(trackName, trackDict, referenceDict, validationExclusionType); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index c88c7c3c4..10261112c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers; +import net.sf.samtools.SAMSequenceDictionary; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; @@ -77,6 +78,15 @@ public abstract class Walker { return toolkit; } + /** + * Gets the master sequence dictionary for this walker + * @link GenomeAnalysisEngine.getMasterSequenceDictionary + * @return + */ + protected SAMSequenceDictionary getMasterSequenceDictionary() { + return getToolkit().getMasterSequenceDictionary(); + } + /** * (conceptual static) method that states whether you want to see reads piling up at a locus * that contain a deletion at the locus. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 1c76a21ea..a932d44ed 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -99,7 +99,7 @@ public class LiftoverVariants extends RodWalker { final VCFHeader vcfHeader = new VCFHeader(metaData, samples); - writer = new StandardVCFWriter(file, false); + writer = new StandardVCFWriter(file, getMasterSequenceDictionary(), false); writer.writeHeader(vcfHeader); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java index 1fefd20fc..fa5093839 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java @@ -75,7 +75,7 @@ public class RandomlySplitVariants extends RodWalker { hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames)); vcfWriter1.writeHeader(new VCFHeader(hInfo, samples)); - vcfWriter2 = new StandardVCFWriter(file2, true); + vcfWriter2 = new StandardVCFWriter(file2, getMasterSequenceDictionary(), true); vcfWriter2.writeHeader(new VCFHeader(hInfo, samples)); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/IndexingVCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/IndexingVCFWriter.java index 4ae87ddcb..71ec4ce1b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/IndexingVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/IndexingVCFWriter.java @@ -24,6 +24,9 @@ package org.broadinstitute.sting.utils.codecs.vcf; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import net.sf.samtools.SAMSequenceDictionary; import org.broad.tribble.Tribble; import org.broad.tribble.TribbleException; import org.broad.tribble.index.DynamicIndexCreator; @@ -31,7 +34,9 @@ import org.broad.tribble.index.Index; import org.broad.tribble.index.IndexFactory; import org.broad.tribble.util.LittleEndianOutputStream; import org.broad.tribble.util.PositionalStream; +import org.broadinstitute.sting.gatk.refdata.tracks.IndexDictionaryUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.*; @@ -41,21 +46,24 @@ import java.io.*; */ public abstract class IndexingVCFWriter implements VCFWriter { final private String name; + private final SAMSequenceDictionary refDict; - private File indexFile = null; private OutputStream outputStream; private PositionalStream positionalStream = null; private DynamicIndexCreator indexer = null; private LittleEndianOutputStream idxStream = null; - protected IndexingVCFWriter(String name, File location, OutputStream output, boolean enableOnTheFlyIndexing) { + @Requires({"name != null", + "! ( location == null && output == null )", + "! ( enableOnTheFlyIndexing && location == null )"}) + protected IndexingVCFWriter(final String name, final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing) { outputStream = output; this.name = name; + this.refDict = refDict; if ( enableOnTheFlyIndexing ) { - indexFile = Tribble.indexFile(location); try { - idxStream = new LittleEndianOutputStream(new FileOutputStream(indexFile)); + idxStream = new LittleEndianOutputStream(new FileOutputStream(Tribble.indexFile(location))); //System.out.println("Creating index on the fly for " + location); indexer = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME); indexer.initialize(location, indexer.defaultBinSize()); @@ -66,15 +74,16 @@ public abstract class IndexingVCFWriter implements VCFWriter { idxStream = null; indexer = null; positionalStream = null; - indexFile = null; } } } + @Ensures("result != null") public OutputStream getOutputStream() { return outputStream; } + @Ensures("result != null") public String getStreamName() { return name; } @@ -89,6 +98,7 @@ public abstract class IndexingVCFWriter implements VCFWriter { if ( indexer != null ) { try { Index index = indexer.finalizeIndex(positionalStream.getPosition()); + IndexDictionaryUtils.setIndexSequenceDictionary(index, refDict); index.write(idxStream); idxStream.close(); } catch (IOException e) { @@ -108,15 +118,27 @@ public abstract class IndexingVCFWriter implements VCFWriter { indexer.addFeature(vc, positionalStream.getPosition()); } - protected static final String writerName(File location, OutputStream stream) { + /** + * Returns a reasonable "name" for this writer, to display to the user if something goes wrong + * + * @param location + * @param stream + * @return + */ + protected static final String writerName(final File location, final OutputStream stream) { return location == null ? stream.toString() : location.getAbsolutePath(); } - protected static OutputStream openOutputStream(File location) { + /** + * Returns a output stream writing to location, or throws a UserException if this fails + * @param location + * @return + */ + protected static OutputStream openOutputStream(final File location) { try { return new FileOutputStream(location); } catch (FileNotFoundException e) { - throw new ReviewedStingException("Unable to create VCF file at location: " + location, e); + throw new UserException.CouldNotCreateOutputFile(location, "Unable to create VCF writer", e); } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java index 7cba5fc3e..0da7a100f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.utils.codecs.vcf; +import net.sf.samtools.SAMSequenceDictionary; import org.broad.tribble.Tribble; import org.broad.tribble.TribbleException; import org.broad.tribble.index.DynamicIndexCreator; @@ -62,21 +63,12 @@ public class StandardVCFWriter extends IndexingVCFWriter { * * @param location the file location to write to */ - public StandardVCFWriter(File location) { - this(location, openOutputStream(location), true, false); + public StandardVCFWriter(final File location, final SAMSequenceDictionary refDict) { + this(location, openOutputStream(location), refDict, true, false); } - public StandardVCFWriter(File location, boolean enableOnTheFlyIndexing) { - this(location, openOutputStream(location), enableOnTheFlyIndexing, false); - } - - /** - * create a VCF writer, given a stream to write to - * - * @param output the file location to write to - */ - public StandardVCFWriter(OutputStream output) { - this(output, false); + public StandardVCFWriter(File location, final SAMSequenceDictionary refDict, boolean enableOnTheFlyIndexing) { + this(location, openOutputStream(location), refDict, enableOnTheFlyIndexing, false); } /** @@ -85,12 +77,12 @@ public class StandardVCFWriter extends IndexingVCFWriter { * @param output the file location to write to * @param doNotWriteGenotypes do not write genotypes */ - public StandardVCFWriter(OutputStream output, boolean doNotWriteGenotypes) { - this(null, output, false, doNotWriteGenotypes); + public StandardVCFWriter(final OutputStream output, final SAMSequenceDictionary refDict, final boolean doNotWriteGenotypes) { + this(null, output, refDict, false, doNotWriteGenotypes); } - public StandardVCFWriter(File location, OutputStream output, boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) { - super(writerName(location, output), location, output, enableOnTheFlyIndexing); + public StandardVCFWriter(final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) { + super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing); mWriter = new BufferedWriter(new OutputStreamWriter(getOutputStream())); // todo -- fix buffer size this.doNotWriteGenotypes = doNotWriteGenotypes; } diff --git a/public/java/src/org/broadinstitute/sting/utils/gcf/GCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/gcf/GCFWriter.java index 7ff6e27a2..18fae18c4 100644 --- a/public/java/src/org/broadinstitute/sting/utils/gcf/GCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/gcf/GCFWriter.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.utils.gcf; +import net.sf.samtools.SAMSequenceDictionary; import org.broadinstitute.sting.utils.codecs.vcf.IndexingVCFWriter; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -52,8 +53,8 @@ public class GCFWriter extends IndexingVCFWriter { // // -------------------------------------------------------------------------------- - public GCFWriter(File location, boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) { - super(writerName(location, null), location, null, enableOnTheFlyIndexing); + public GCFWriter(final File location, final SAMSequenceDictionary refDict, boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) { + super(writerName(location, null), location, null, refDict, enableOnTheFlyIndexing); this.location = location; this.skipGenotypes = doNotWriteGenotypes; diff --git a/public/java/test/org/broadinstitute/sting/WalkerTest.java b/public/java/test/org/broadinstitute/sting/WalkerTest.java index 386c17659..a1817e3c7 100755 --- a/public/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java @@ -75,7 +75,7 @@ public class WalkerTest extends BaseTest { Index indexFromOutputFile = IndexFactory.createIndex(resultFile, new VCFCodec()); Index dynamicIndex = IndexFactory.loadIndex(indexFile.getAbsolutePath()); - if ( ! indexFromOutputFile.equalsIgnoreTimestamp(dynamicIndex) ) { + if ( ! indexFromOutputFile.equalsIgnoreProperties(dynamicIndex) ) { Assert.fail(String.format("Index on disk from indexing on the fly not equal to the index created after the run completed. FileIndex %s vs. on-the-fly %s%n", indexFromOutputFile.getProperties(), dynamicIndex.getProperties())); diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java index ae218e898..724c343e4 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java @@ -29,7 +29,6 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMSequenceDictionary; import org.broad.tribble.Tribble; import org.broad.tribble.index.Index; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -45,7 +44,6 @@ import org.testng.annotations.Test; import java.io.*; import java.nio.channels.FileChannel; -import java.util.Map; /** @@ -164,7 +162,7 @@ public class RMDTrackBuilderUnitTest extends BaseTest { try { Index idx = builder.loadIndex(vcfFile, new VCFCodec()); // catch any exception; this call should pass correctly - SAMSequenceDictionary dict = RMDTrackBuilder.getSequenceDictionaryFromProperties(idx); + SAMSequenceDictionary dict = IndexDictionaryUtils.getSequenceDictionaryFromProperties(idx); } catch (IOException e) { e.printStackTrace(); Assert.fail("IO exception unexpected" + e.getMessage()); diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java index 1809ab778..55bd4783b 100755 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java @@ -1,27 +1,45 @@ package org.broadinstitute.sting.utils.codecs.vcf; +import net.sf.samtools.SAMSequenceDictionary; import org.broad.tribble.Tribble; import org.broad.tribble.index.*; import org.broad.tribble.iterators.CloseableTribbleIterator; import org.broad.tribble.source.BasicFeatureSource; +import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.testng.Assert; +import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import java.util.*; /** * tests out the various functions in the index factory class */ -public class IndexFactoryUnitTest { +public class IndexFactoryUnitTest extends BaseTest { File inputFile = new File("public/testdata/HiSeq.10000.vcf"); File outputFile = new File("public/testdata/onTheFlyOutputTest.vcf"); File outputFileIndex = Tribble.indexFile(outputFile); + private SAMSequenceDictionary dict; + + @BeforeTest + public void setup() { + try { + dict = new CachingIndexedFastaSequenceFile(new File(b37KGReference)).getSequenceDictionary(); + } + catch(FileNotFoundException ex) { + throw new UserException.CouldNotReadInputFile(b37KGReference,ex); + } + } + // // test out scoring the indexes // @@ -37,7 +55,7 @@ public class IndexFactoryUnitTest { BasicFeatureSource source = new BasicFeatureSource (inputFile.getAbsolutePath(), indexFromInputFile, new VCFCodec()); int counter = 0; - VCFWriter writer = new StandardVCFWriter(outputFile); + VCFWriter writer = new StandardVCFWriter(outputFile, dict); writer.writeHeader((VCFHeader)source.getHeader()); CloseableTribbleIterator it = source.iterator(); while (it.hasNext() && (counter++ < maxRecords || maxRecords == -1) ) { diff --git a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java index e3a926fb9..a8e6593b1 100644 --- a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java @@ -38,12 +38,13 @@ public class VCFWriterUnitTest extends BaseTest { private Set additionalColumns = new HashSet (); private File fakeVCFFile = new File("FAKEVCFFILEFORTESTING.vcf"); private GenomeLocParser genomeLocParser; + private IndexedFastaSequenceFile seq; @BeforeClass public void beforeTests() { File referenceFile = new File(hg18Reference); try { - IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile); + seq = new CachingIndexedFastaSequenceFile(referenceFile); genomeLocParser = new GenomeLocParser(seq); } catch(FileNotFoundException ex) { @@ -55,7 +56,7 @@ public class VCFWriterUnitTest extends BaseTest { @Test public void testBasicWriteAndRead() { VCFHeader header = createFakeHeader(metaData,additionalColumns); - VCFWriter writer = new StandardVCFWriter(fakeVCFFile); + VCFWriter writer = new StandardVCFWriter(fakeVCFFile, seq.getSequenceDictionary()); writer.writeHeader(header); writer.add(createVC(header)); writer.add(createVC(header)); From 08ffb18b96b1d22f14c488cd71a1d02b5490f6a1 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 20 Sep 2011 11:02:51 -0400 Subject: [PATCH 23/41] Renaming datasets in the MDCP Making dataset names and files generated by the MDCP more uniform. --- .../MethodsDevelopmentCallingPipeline.scala | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala index 80bfe03d1..637d3edca 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala @@ -98,40 +98,52 @@ class MethodsDevelopmentCallingPipeline extends QScript { // BUGBUG: We no longer support b36/hg18 because several of the necessary files aren't available aligned to those references val targetDataSets: Map[String, Target] = Map( - "HiSeq" -> new Target("NA12878.HiSeq", hg18, dbSNP_hg18_129, hapmap_hg18, + "NA12878_wgs_b37" -> new Target("NA12878.HiSeq.WGS.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37, + new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.bam"), + new File("/humgen/gsa-hpprojects/dev/carneiro/hiseq19/analysis/snps/NA12878.HiSeq19.filtered.vcf"), + "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1), + "NA12878_wgs_decoy" -> new Target("NA12878.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37, + new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37_decoy.NA12878.clean.dedup.recal.bam"), + new File("/humgen/gsa-hpprojects/dev/carneiro/hiseq19/analysis/snps/NA12878.HiSeq19.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED ** + "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1), + "NA12878_wgs_hg18" -> new Target("NA12878.HiSeq.WGS.hg18", hg18, dbSNP_hg18_129, hapmap_hg18, "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.indels.10.mask", new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam"), new File("/home/radon01/depristo/work/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.vcf"), "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg18.intervals", 2.14, 99.0, !lowPass, !exome, 1), - "HiSeq19" -> new Target("NA12878.HiSeq19", hg19, dbSNP_b37, hapmap_b37, indelMask_b37, - new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.bam"), - new File("/humgen/gsa-hpprojects/dev/carneiro/hiseq19/analysis/snps/NA12878.HiSeq19.filtered.vcf"), - "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1), - "GA2hg19" -> new Target("NA12878.GA2.hg19", hg19, dbSNP_b37, hapmap_b37, indelMask_b37, - new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.GA2.WGS.bwa.cleaned.hg19.bam"), - new File("/humgen/gsa-hpprojects/dev/carneiro/hiseq19/analysis/snps/NA12878.GA2.hg19.filtered.vcf"), - "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1), - "WEx" -> new Target("NA12878.WEx", hg18, dbSNP_hg18_129, hapmap_hg18, + "NA12878_wex_b37" -> new Target("NA12878.HiSeq.WEx.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37, + new File("/seq/picard_aggregation/C339/NA12878/v3/NA12878.bam"), + new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED ** + "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1), + "NA12878_wex_hg18" -> new Target("NA12878.HiSeq.WEx.hg18", hg18, dbSNP_hg18_129, hapmap_hg18, "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/GA2.WEx.cleaned.indels.10.mask", new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.WEx.cleaned.recal.bam"), new File("/home/radon01/depristo/work/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.vcf"), "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1), - "WExTrio" -> new Target("CEUTrio.WEx", hg19, dbSNP_b37, hapmap_b37, indelMask_b37, + "NA12878_wex_decoy" -> new Target("NA12878.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37, + new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.b37_decoy.NA12878.clean.dedup.recal.bam"), + new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED ** + "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1), + "CEUTrio_wex_b37" -> new Target("CEUTrio.HiSeq.WEx.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37, new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.bwa.cleaned.recal.bam"), new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 3), - "WGSTrio" -> new Target("CEUTrio.WGS", hg19, dbSNP_b37, hapmap_b37, indelMask_b37, + "CEUTrio_wgs_b37" -> new Target("CEUTrio.HiSeq.WGS.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37, new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.bwa.cleaned.recal.bam"), new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED ** "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3), - "WExTrioDecoy" -> new Target("CEUTrio.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37, + "CEUTrio_wex_decoy" -> new Target("CEUTrio.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37, new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.b37_decoy.list"), new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED ** "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 3), - "WGSTrioDecoy" -> new Target("CEUTrio.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37, + "CEUTrio_wgs_decoy" -> new Target("CEUTrio.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37, new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37_decoy.list"), new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED ** "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3), + "GA2hg19" -> new Target("NA12878.GA2.hg19", hg19, dbSNP_b37, hapmap_b37, indelMask_b37, + new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.GA2.WGS.bwa.cleaned.hg19.bam"), + new File("/humgen/gsa-hpprojects/dev/carneiro/hiseq19/analysis/snps/NA12878.GA2.hg19.filtered.vcf"), + "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1), "FIN" -> new Target("FIN", b37, dbSNP_b37, hapmap_b37, indelMask_b37, new File("/humgen/1kg/processing/pipeline_test_bams/FIN.79sample.Nov2010.chr20.bam"), new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED ** From a1b4cafe7a63ecbeea9b06a94445f6bbf925d5e1 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 20 Sep 2011 13:59:59 -0400 Subject: [PATCH 24/41] Bug fix for NPE when timer wasn't initialized --- .../broadinstitute/sting/gatk/traversals/TraversalEngine.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index 27fd173cb..c6321e2ad 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -358,7 +358,7 @@ public abstract class TraversalEngine ,Provide public void printOnTraversalDone() { printProgress(null, null, true); - final double elapsed = timer.getElapsedTime(); + final double elapsed = timer == null ? 0 : timer.getElapsedTime(); ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics(); From 827c942c8027d7888b29ddff8399834b7bcf94e4 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 20 Sep 2011 14:01:14 -0400 Subject: [PATCH 25/41] Rev tribble --- .../{tribble-24.jar => tribble-25.jar} | Bin 299210 -> 305986 bytes .../{tribble-24.xml => tribble-25.xml} | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename settings/repository/org.broad/{tribble-24.jar => tribble-25.jar} (89%) rename settings/repository/org.broad/{tribble-24.xml => tribble-25.xml} (51%) diff --git a/settings/repository/org.broad/tribble-24.jar b/settings/repository/org.broad/tribble-25.jar similarity index 89% rename from settings/repository/org.broad/tribble-24.jar rename to settings/repository/org.broad/tribble-25.jar index b1c39e60a14ce895e515cadb07c356d6d428bbc4..6d467ba3fdd6e6285e8244a87aefa4a0d232cfe1 100644 GIT binary patch delta 11585 zcmai42Vhjy(w@2ZW_PpM^g;qD8$wM8J(N%qdJ_y%LrVe#5&|UjCRvas1PHK_qZBEE zfYKvb5Gjh~2_ond3rc(XY)=tTo=P$Q%(*waS>*r!g>&!RGjqO~GiT16bMML7Z6R-c z8seYPN7bwV7J!&RE!X-tP^0PAW`mltGj{EzIC=ZMF5jGSn9TR`CvmDGE8bQg%8Iu& zyR0bx&Qy%DCePAAmBenpP`&1)?RUNAXwWuYctmIyC7AHi_V>?tZifoMCDoZTGTeDO zqIUUr;jifpUW~f0H`o-tPOoWZU!fBx6FTZdWj$3Vd=m4_(zc>zZKMIOHXCY{gsJ_@ zI?REN`BJ33R`<^|DSNkfGbz%rPi~+j)($(R1h8ql=ae=+OWrwa6 1j7{QH)%)IQeg3`&f2W z3@iVwBoLh!lES#!X _{)!7%MU-_U%_CfdE=TYNJHJ$MbXZP zpQFofZM0aq59J3+rs+i8vVTgqi0JN0f+(D 8yO>}yv5sl^l`X$ZjW@I=-8F;uNP|8YWO?~TYBH_ z8%x0vt2%?*M@k9j{!3BiewP~Squ&1Wv62u!#cK)9(u2Ov3t#GE^wpOi+oUarzQ5_q z1Px8Xx+5m%xjR-1+iq{avaI~dopy>(5okEY8FROlKi9vfMI N{4)^Z^j0U)6hz+M84EvW)fc-F(a8Tjn$!D zdkwT=6Ku*N&A6^PKepgYOBPPyODjKYjcxp}EvE9Nof$j$K@03?#x%aCTd@;%=1UhV zIyzxj^8W&0#BSWWyBT{}u%{I|U_&$Z;;P C@* za2!L8x59ADGh;qGZh{31*u@jAI0+{+R-pxpd?6pFSa2%er v(TdIJa31 G`?Ni>+%pv+pHfK>6Z(yOrPt2-a@1$K)1G&o7)z>KB%Gc(_i% z4ClqlS!U`;z8Rkpw;L* MK)LgB?z_&IWu#b yEYI+pth1 zQF>6{AtHDK)^a-T`mbMueuTeBbh?W18ompM#7+y=Grh-S_z&?B(SBsnSv+UM5AeK* zucx#YCoD>Y*!q!b5_7JrL9Se1{Km&I69Q=Vl1&uWzC6HmuL<*^7!VvpD`2AM1+dWl ztkCl_Ue6kb7RhQHQSwe 1va7FqDCyr{}5ofD)LFRQ-a{?uj6;cB%hg zyA4LWtwCt-=+W1%TDw4Reg0D4M%YAJodEb>Fau>r?C$z22)kl{wwLTufw4wIq?i|i z?Hps^Ll_G`KrZ|T^5DOecxu1|jG}Z?A0}gGD8#NXRjRb>ScXbMh0X9HX{v{I*h1Y_ zWVeIB*Btc#nww#3Ty*2a 4-i8w#4HQlj(xk zBM|2yBM*_VA}y{*(E%` b=b6g^ESc2a53JfqRhFqI8>$uOWB zwO0topLEzWlUxw*g4zy`7OUhgqVZfTks@^Is!oGH?2wVkm^%&3?j*yQR_atjV$u-^ z^xy@s( N;TSR2({5X}c0NWsgjZoN z3zOm|h5 V} mlo0xU`1slOC8SD6O27^7>$! z+=FR?*NnhC0u_dl9mpz HdUq@23ch^w#ylKHL~cORoZ!Vsm2)0jw-AgakR zXo}D;`4}|g+?-5AbA;2-B2`O@(vCq(en?>ieriQe4*9sN@*?+6lXD=I7A=6wWawA) zf_jCL(N&0qYqU;Wha_64Tfo1d9WBaT;9KYi-$5aKPfOYlG(rC(z0i_6SV$A~IGiBF z;Us?&P7xY)+C2=VWXGE{Osa#{skbPzDR71ac%{*szD-Se%Gp49NLx%ZymJql!@E?Y zk<5D}Pn!k9r?FJWJ}pipr?uA8E#!uz)03lUWZDneRr$3iK01@n z0DdFy-K8}CUxp+5XU^Z!q14fmiA+=mSK3x-0a14hz39g9edlR^uU3Ug@#Hy=%I zSLQ=7oTZpm$uoX%jwp=+)(#&?uTihvPA!J>6qIQAO(h) ~ZLsc^rDt)}wbw zp9*+_cu!P7Ul;T{)rdPt }f{`s!DHq4ce8XO@+9WacFPU%HUO!MC zK#a^N(cth*4dfRM-Fh5HwsT5Z$JP(v`0pS$ZqY6ncmyK#R8XJN_Mi$FOx~784^e=- zNlAAoLb9F}KBu;syWokw$6>g8C`QmQ=xs+jXgC_VVAQGqO`wWT8Jv)!v{Q$qTRTBd z7)AzmqK$2qygy6Vp0xMpyS_}xG=u}lX@lt7#$brRq4a8>2@P- l!|iTS*l-) #8NB2rxY$3+_Q;cQbsSWriQ9{Ah7ho8uZSAXS@b+M7wR5r-i=B**6jSTj{} zZs34S&bk~*M )B86GsU{bua)`?#JjWsrZ0|g{W#>)Y@5(mH}aA7 z_)N9+*jRua6g-p$5K?`jeBOmEQ9j3@0HKvhZDp=nqYa5;Fp |ahn<-0QYn1sKyT_1?a%;C^Snykpc(r3COA8CiZ8h zPq (}uoJXhk^>a0Xa#Ci!d@&Br;AhI1*Bo`ya+ zA7)`OEWrg7MGIjwE`l#{30%cx(x>CdTeD~;-g3Zga$OGX>3Nf2p$K-u9SSZJEs|^C zSE9`H=Ch3!)4Mbw8^U2)PI+4qPE)mNTXEeTaMx*}{ey xdW`FU>nRnKNkEz19W)44i #}o5~@Y?MAsD)!z>4QkXOX1?k9yuD z!Cdh5d@HBVq<(_>qtNC_d@@6J@D1M5&5+bG;fVze^pvj{&Z(4Y9pEs&Fkz!mtGX zYVF$44_o=m57Sr2eV4wSJiFa>WjOBszdk;SBct%4RJEYhmp%iiYXgwa2NJ}sR5Xi{ zR!X4j&nz4$4`^H&IXK8i>e@XH=lMyh-c }*$eXUDmge}tEOB$e|SCWc6A^$&PX2Oe=3zmUqrsBKC@*&Hp !hq8|T`%O}XPI zsWtnQSM|PjJg&rB Iy9< Ry7Amx%pGoVe&Y~ zb$O2ZsXof(OVkaznHg);F*-`cdi6s+;FbyXrMl99 aV&}$}g)O0;N;phwuKmn5JK`ML$ovepU?-uddJn#e=h|udDr!szbN*
t z*>h8}Xxe0{&YZiE;$Vpu>YAWwh4S3U^_5xsK)3nTQ0=fj`LZLlhq|TNk=k9oze8iR zN}W5*u9fQXMQ2W}q@{IhYk%ls_WD|xJOXna>!5w4yJb~3?YypRWH0TlKsi#ktlbuc z(7fc*Tr3=`))eW9nkF*7QiEMtnObdK!{T9DTdCpnFs-jirE^*ll%uubQ##QnM@wU# zSe2tSk;JJSO-=^!dydvY*0dh24Uxp!(OPdwTpg`FDT$_Iw5KGoWQ>*}iOJBuV4CL?l~)4KZ^uGYne$M%JV_Pt zleAa~yn3QmOS!nyU<5d?m-vZSiZq*@PY;zjWq*~>fh-p~M0EdlEbM;JXb~=wCut#4 zv71dqP4`0({M;$}w7|()w3s(ZlLyOU#U#=?P_K24dO4&~MjuqdR7wyu9k>%0SFE4a zm0pLrG;(9n;rha*pE6debWg|01QT|ZE8@i5$ ivDqE!bnV(%0}_c|n|F;4PoGAmG`Q %P3Rl=(YA%XcPX^4L=6m zu9$nDUZT0Ad7=2v)8&I2W4xG4@)D|aIzi zs1o9G%78;RO308?_{UFPFnJfk6;!p zwp_Y+Np=5^j(Y1yLM^(6zQQp}3zLmr?{j#-hr#simP#kDMzOaaZFFlE*~6Mn2Hg5! zwjZ6(P-)}UsE;awi?uM3aSiQa@NC1O1+%GZQGq%u{SS;4uUD}yFzarZWYGq!CT+Jw z^c<>d9 SsAQ1Hta5*( cr9RH0@H{+#45c z09*!miPjTA#k4M*o=^FE)O@YJoyCWL)?h& l$YrJPjt~=W++i&qC=}E<-K;(Uh;8QJo04@BXn>9`53=5l>S5U6OQvVk{Jt( zk(AM0_yhB6$xXM|T*cbUP4^nC?1fsmXnawz%OH@J)I4+a*avj(z@?>^C0B0hc+{xA z4B! pqiubEn zSBa$@?IO|_Yf*pOO00Nyq~9}?(zx{Sa>UQF#USafpI$mMs(m~Fm)2fVpY&qf7NX`d z!_bF|Nn36K*ESGE9n?T^v%hNko8AG5Tl|(#uIJ*ts`i?~ZDYm$;i|8AWr+cMY6-#C zm@7L`Hd>7lTe9hIAMDGF<>P9^pp kR3Pd%4Tdb@wy^AY4mF3r7c@P9>bV{d}t z;!2hLZ%d=gSJ6uWmwI0E(+;@heMO5^2L6c)4$P`cZ#PsLddYV^q|4vhpxVT<&l;WF z{4AOK vRSov@Oe`^sc9am<3-MLF-D `-^p}wGe4#(vGb=R#Cd=;vIxDhj<#8 z#q}GikNAOFNoW4Wh`qC_3%t=yvx)o5HJgNeKjU1FVtS9_;tgAKO!Zc`t`qF@r3UQ2 zQi8oU)gz$4RSSBnHi^GVjb*}r4M}yHZb+4`(afUv8iSX~y!q95$pOpgPfT)X=W}i@ HISu{?jA+l@ delta 11458 zcmahv34Baf_vgI#GBcUH?29C_Oax()NP<$aQ%VW3v}uZx7ENr4t#)IJonXjqZL!l$ z5i(Sj?u%0T@2X$R*Y;ETwW#i@&HtSHUS=}d-~T_{_s+X#z2}~L?s@O#m5(Cc*&Y#` z-$&K#05*U@4>VmH+)N!vx4SpW+lR^W_G=TBaPDNx H&-=zZV5-nz`aqd$wjYOGC zvs8Z{z29Kj8uy36vN>U$(Uap?ZU8T(b~1q4rm6u1IE$^)y0V 6q#ctFYAbFZ=HE-IMJhJZNP` H ziyy18V$Bk*VRgUCt5`kr(6hlT!rlDTT~cPzsjL_YxIbE9*q!plRRajV{FKz|K6b6G z6kB!Q4|z%C%Ouqu-XT_8U#2y$?tHzO;hHfwM@qnb{^vGQp#7KkrL~@m3SJB3k+9vZ zY;0g!NcbdAP%ySv0%n4SvqYazY$J6X`F-J)b(sL`GDXouGCeFBBgG4$80tC~2v!UV zs5O85r1K@%h9OoA4a6`Ew_$`8BkdT4(Ts|*VFNqHVjMrj+aU!T+AzU}iFQoFWIMFs zX9puwY}knJsdh-_&c^)E#D-1nV8=9ma5b}HbH=7w(aECH?bMsWmo)Cq SoYaC8;)l$Pq1Stma%||Hk=d)V{oz!r|^BM6(6_LJuL{QVOzfB zT5-A^Cg2Pk&a`wX1o^Y$tO%TK!#Q@Gi!PQn&x-TK#b8VlRo%2ko~Q`CSBZ%pHF0Xu zxXHaAn@~Dw)Zmiwqb5%&8eir<-!@*XjKWk5^&E)8eM =Lw0$K|*}hea?)R3)M_Y^9E?aJ3HQKx7rLMSPlw4_nr7?^@9@33JrtL^?4E zn? fxVVY~}LA5-*rQC3N5 zR?m`gqjcB-yZEw4$93q|aXoC&aRWT3V &~Yb_kX`7u;% ;!$)|4F9&rzgw F^~zz}qC1M9^R?Xtm-CIv%G1!`H0rMb>=+ zc8Z`BY=SR|oD`fNbQn)E!zl!Z$d5-ye;sDREQEyK6Go1j?igJ<$ sa zj* zUU{eZm3InZd8ZJhy8;w}BySZ!Ha)B+Mv6P5gPzxrg0)0gXFz*MC3qdU>AGJ0-W(gX z+(1$+^a#)nD(R|AI2;5#K~GOo3jigw3sUz+lP}Nd$Cvy6@oh5swmZ?$$)~S MPuEH4jFN}v@D04M{QjCL1m XjD z={O!{U@6Tdd&XgyU|LGXz*eXt1Dj$W*hbz`q_@JsZjHMQEv>LUIiZC!)9EA 48Ki9)VD26evdkjhp6-upEH^=OIWs0=n}!Bo}(XaRg$0V8esNuE-HJ zr9cA;3=LWl)EdIzE@%L4Ad$kVF@;hV>C2@+@=a`eGT+5B%SP=ojp{)H6zU5mB`JmJ z9%$r&RG${RM6ROHSS=;R7{qmsfMD1ygPcX~F-7(y8Ww4%VK#QA(=dJVdW5hiDKIq( z#Xt%UHqQ`h_X@R>(8VN-h3++l3gTm--C3yU+5%g cw zXkrp5dK<$g$bik{r!CNpCeNp~yH{;@*hd_!_9-K3NnH<8%2O6b(WTSGYy+#d7c`Y3 z%S9IXB6@E pOv&;tY*6Kbr)XX?obFy$aT3(t9( z4TA!l;|~KqcL(?L|H!@nAGi;Lb|-W7KQccL2UxYZl!Y;AzVUkgk@z4SVq$mE(U_4w z+T;I7TW!$Z;SinIY<-8E{72@)MgY6#FK*ysMMj*yNeSyMNpFOd#XGkaIAX?W4R}mf zb|gI II7mm`Enk{z+E~6>LW$0*JXHqIt*kgc@+BJ8OVUr8~u= zc_)P#HQ>tRKCFQjeAmo-Kn=+L7?YOPOcsGeK~VNF$U*3rc??=2t&>awS|OZ**11}G zoOTTE;)gaYfuGveJ?*VM$Ai;k^ykvf2egr^F1SoDj^9uox&pD3U{m2+NT(#*3ciE( z@I7>g8_*AafKs?gyV8%8PkxqOXiWnwr8W8@yhI|05dBHg%1NTa>+%YC8D61TQ)z#D zmADmnjqto(8Jw>Zr;Asc4$7Y}1g-GKEocdEQjbPNZxI7kAf``~sE>U**GZ1N+sL$( z`D&mYB|px|1m^Nw8h4HF?Km&foycR@^OhYDMfaG9_H!fJZv&e%-Q$7|60;qeF=5Aq zMM~{fNMPoEe96_=>6vjfGaU=|)_&!Sk8b2MfWOFlwa|bPLoy $~gn0E6T;&dB&9J&_L98l%Z%>&) DJ$@B4P6(cuFp(eo$1aQDoE-&20f8A`6+566;Zn5(lE%YDah1hNc|V(9je_) zHt#>> )un)D=*_sNIDL^3y;HnRCV-;=vxDMgv+aeejez5GK2NjOJ~@L zVx=>LVHZfJs)@E!7X_J=>a`)$v_XdyxCocXY@Re5(1fL1;ooEpy&F(3+q8@B99->r zr1_k|XqwDSMbLm_Fc2Xx!^~6-2#}eoSHH~kV=zd8y|u2(zF>>t1&( ^0u`4K?Qi2lynObGL5Y83GqI(7y9NMhoRo7 z7)H}z@IFklk>P>iC;tKOV&Pr_%HSj^PCI!}y0t6xqSOk|6^dbmyf39|FIZ0R#&z>5 z(-00Lrwyl%C?g;SAAv-C6q?~kN=2jSo5g7QR52cU-~{@XP)wg7N}w3WP%0V=OL4pl z)>2u_byX*Nsc1$;o#A0Sw1xl3Sno}# qct$nNf~%l zUnU%_Ny7BQ@Yqp^G|`MG_99Ar2o5h}i5uZH1yXS;G{?uG8%~2EE}UMs6xk4SRLyBW z`yQ@QtZV!}YOFx~nMz;!jtG~I!x&m^WAh9n@7RxDt+yY0H{f0}3*-jG1-t=OaRJ9* z96}q5+QvHi4w7Rq9wFBjXEWDKB13Y~Ovnw43#@_CjbK@1r7V3hqwp}4L|e;?jz9^+ zk40M;eyrh>(2?CyW|e-DC4orH#Rjg4(bgqnqb*BHkHTcrZ+mNVWs)$;f;N=l09|0i zdE}z`wE7l8XIw;qvzYd?CG?856v}Zq#moxWf~(*&`q=UXu9Y4fO}?2%>+T1t6at}$ zYIm*(Y!t3;svG7|bh-M_>+MejTIm&MI~5YY(1LGH_2jRzh=``$zOIP4 $!3V$J& z8UT)4pwz TBCd>X%k5^$7nmJ$^jTL#o`xzg&M`747b97D456(%g1tP zF0XU4FZI*MJ(5J9iY)Y{$oDGK&K#@78+24j@PC p zwD7k{fkrN`zn^eX!P}@TDtNO)RH#5P(sxTjG1A7ekHS=+sq(D`>%Se@Oe^Kqgqo)D z2QDhqkiQMF3@UZ%7B;^?^DiV4tViMTYACW)L(;)M)P=JAly*ZCijxdbs6L`4N)^%! zy2sOC$54Pwr=qYmZC58Of%K_!6g~!lzGDvhxb3;s0nZ2UCveZ%Za7Px3V8bVz^GLD zf%~luUq63M2e_si-{$E9w?%X=rEim)it#T!oIj6GUf-edc5*GZR57vG_&nr!XDTlG zKR -_SybYx}${p)hd_dIg=;Hu5Q)jan$4Iex9 ZL#?x=_Om7< z!nG+Z-~=U9ZfRm|ffg^;WNCKK;I`TsgR60_Rw23QxGYuVf1?K03pAei>+QAQ3{QO9 zMXQXFhAb@5&RZo^FkCxhn3VRY_KIQpcayZ?Ql#iNMQbO?mQT_0 #nj^ESz1TggR`{)2@IL7-6w&4v$g&bxHVfFD1rWSw0sHdn4=Al zz%O%5;_h>`F|y}C9gyhKM#-LXmsTW!t1hir0)yviqa|=+o;FDWE$3?`64*Ll8zh0B z=4-tq(0PG2OafI4OfK6(ZLsVazECTaqd2}$>neL{7n(K?SVR``0Cz9a9+H4kt__jE zh;r?INqn^29CF19t)a+VtTmH~_-5@R)9GLjxTTBHi?!zNuzfan$etjPzeF<*V~5k> zsuEl2J{TRv2d!yRUfBlfytZ4cU8co|ilv$*Bfk$SQ7Xxy!*EVP {v#7 zCbt%T;@+!c3U_zwyYwAb-TQeeWE#18{=Su2ZjR3gi_z^vx zE@e(wr8SY1zUqU5+K+NoKS{}zi1^nk(^$Vk#E<-qAxzGvRw8q?Nj _ LO{TE?cYD5G%qQ#{rry);8f5koeZ zzK}UKv?oi6;!;gKwZTk3se3-jT0?*T!p%P;-fNJH+VR++veJy(%Ui2DT_=Uy{NuLq z19j3PM0S~C5f4|=1oDu_SJLQ)6o~V5|C m|llp#rT}#UVJODVw1@lypd4%zv@LL`?zK>s_hvs z$|5ReDnUNHG >$Re`E_hgfU zqj&$f`!4)KJltCP@#cKZq)o-PVAUq_Hk IMe-KY+by;b{*Rx$cwNldVrn{7H2B8~TAJMa^T&t38T99( z)DUq!LbZsYdnuB6I7dGFu=X;=B)4q8;k@uyop}~HnUcI71)bg2{leh;uGgB2@a }-bkkO{G}USJUMf(X;*YK7Aj7H%WzRNHRIta35mn|Od?h`9)b#y!&EGV@OK%kT z;%#D*PWyu8xeC1a0CBP}Ijzw)Q|Q_DZGU^2Dg|!-3qNm!!89*Tvy0WU)kv|Nn559E zZ6tK`$a?sTb@JjCTv?ew#Sl0DEw*eD)91)^6L});)g`pSaP!|}&de}PT|@7=tWeUQ zZP&v^e`y8W?(&VCHts#;2K|~}=%q;-Nc)Ab?;r!auQ5 #_S|XfBI03WL(@GzhK!q z3jjC&a_`KmhLr3yS{3;hDFA=oZcfY9k!>TMrVPr>zv?>hj=>amRSg%XzNUb!*lh;v z``;yJT%#S5o4;50T<|hA5by0Wm81;W_x)TdLb+x6jqmbRFMVKTQ!P+r>^8^PbvKRi z;7tR4K-XfWdO7^9j7rl4?$%O7{#TR;y~8Q^w0#E`%{4dw%f+`<{H>8pA1Cr#Y4j1Ng%%*T)-gSKtzyIxDu1~7*H?pL>M=c>O> $?I?Gb>dmFvBr?q$~;D)`&!U%F7H~)Ex zE;LAoKdq%oQm?TDBzYe=-A#MQ@Js1b&FWYD@fLgjY0_KmqevD_pV2}>%IP4hnqm-( I_~7V&0me&b8~^|S diff --git a/settings/repository/org.broad/tribble-24.xml b/settings/repository/org.broad/tribble-25.xml similarity index 51% rename from settings/repository/org.broad/tribble-24.xml rename to settings/repository/org.broad/tribble-25.xml index 9b2b967f8..ed7a1fd69 100644 --- a/settings/repository/org.broad/tribble-24.xml +++ b/settings/repository/org.broad/tribble-25.xml @@ -1,3 +1,3 @@ - From bffd3cca6fe26fe31a6fa5a33745dcaee0139c56 Mon Sep 17 00:00:00 2001 From: Mark DePristo+ Date: Tue, 20 Sep 2011 15:07:06 -0400 Subject: [PATCH 27/41] Bug fix for reduced read; only adds regular bases for calculation -- No longer passes on deletions for genotyping --- .../walkers/genotyper/DiploidSNPGenotypeLikelihoods.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java index 5f6865d04..ec180f0cd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java @@ -276,8 +276,11 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { if ( elt.isReducedRead() ) { // reduced read representation byte qual = elt.getReducedQual(); - add(obsBase, qual, (byte)0, (byte)0, elt.getReducedCount()); // fast calculation of n identical likelihoods - return elt.getReducedCount(); // we added nObs bases here + if ( BaseUtils.isRegularBase( elt.getBase() )) { + add(obsBase, qual, (byte)0, (byte)0, elt.getReducedCount()); // fast calculation of n identical likelihoods + return elt.getReducedCount(); // we added nObs bases here + } else // odd bases or deletions => don't use them + return 0; } else { byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual); return qual > 0 ? add(obsBase, qual, (byte)0, (byte)0, 1) : 0; From d9ea764611c0e6b63e8b6e2022fafc52ee56bea9 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Tue, 20 Sep 2011 15:22:27 -0400 Subject: [PATCH 28/41] SnpEff annotator now adds OriginalSnpEffVersion and OriginalSnpEffCmd lines to the header of the VCF output file. This change is urgently required for production, which is why it's going into Stable+Unstable instead of just Unstable. The keys for the SnpEff version and command header lines in the VCF file output by VariantAnnotator (OriginalSnpEffVersion and OriginalSnpEffCmd) are intentionally different from the keys for those same lines in the SnpEff output file (SnpEffVersion and SnpEffCmd), so that output files from VariantAnnotator won't be confused with output files from SnpEff itself. --- .../sting/gatk/walkers/annotator/SnpEff.java | 49 +++++++++++++++---- .../walkers/annotator/VariantAnnotator.java | 4 +- .../annotator/VariantAnnotatorEngine.java | 6 +-- .../VariantAnnotatorAnnotation.java | 4 +- .../VariantAnnotatorIntegrationTest.java | 2 +- 5 files changed, 48 insertions(+), 17 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index 4ead77506..8f99c6118 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -58,6 +58,13 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio // lacking a SnpEff version number in the VCF header: public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" }; public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion"; + public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd"; + + // When we write the SnpEff version number and command line to the output VCF, we change + // the key name slightly so that the output VCF won't be confused in the future for an + // output file produced by SnpEff directly: + public static final String OUTPUT_VCF_HEADER_VERSION_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_VERSION_LINE_KEY; + public static final String OUTPUT_VCF_HEADER_COMMAND_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY; // SnpEff aggregates all effects (and effect metadata) together into a single INFO // field annotation with the key EFF: @@ -165,10 +172,26 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio UNKNOWN } - - public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { + public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set headerLines ) { + // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff + // without providing a SnpEff rod via --snpEffFile): validateRodBinding(walker.getSnpEffRodBinding()); - checkSnpEffVersion(walker, toolkit); + RodBinding snpEffRodBinding = walker.getSnpEffRodBinding(); + + // Make sure that the SnpEff version number and command-line header lines are present in the VCF header of + // the SnpEff rod, and that the file was generated by a supported version of SnpEff: + VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName()); + VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY); + VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY); + + checkSnpEffVersion(snpEffVersionLine); + checkSnpEffCommandLine(snpEffCommandLine); + + // If everything looks ok, add the SnpEff version number and command-line header lines to the + // header of the VCF output file, changing the key names so that our output file won't be + // mistaken in the future for a SnpEff output file: + headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue())); + headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue())); } public Map annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { @@ -204,12 +227,7 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio } } - private void checkSnpEffVersion ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { - RodBinding snpEffRodBinding = walker.getSnpEffRodBinding(); - - VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName()); - VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY); - + private void checkSnpEffVersion ( VCFHeaderLine snpEffVersionLine ) { if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) { throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_VERSION_LINE_KEY + " entry in the VCF header for the SnpEff " + "input file, and so could not verify that the file was generated by a supported version of SnpEff (" + @@ -224,6 +242,14 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio } } + private void checkSnpEffCommandLine ( VCFHeaderLine snpEffCommandLine ) { + if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) { + throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY + " entry in the VCF header for the SnpEff " + + "input file, which should be added by all supported versions of SnpEff (" + + Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")"); + } + } + private boolean isSupportedSnpEffVersion ( String versionString ) { for ( String supportedVersion : SUPPORTED_SNPEFF_VERSIONS ) { if ( supportedVersion.equals(versionString) ) { @@ -248,10 +274,13 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio List parsedEffects = new ArrayList (); Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY); - List individualEffects; + if ( effectFieldValue == null ) { + return parsedEffects; + } // The VCF codec stores multi-valued fields as a List , and single-valued fields as a String. // We can have either in the case of SnpEff, since there may be one or more than one effect in this record. + List individualEffects; if ( effectFieldValue instanceof List ) { individualEffects = (List )effectFieldValue; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index fb3dbc3cf..f6a1c4f31 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -208,8 +208,6 @@ public class VariantAnnotator extends RodWalker implements Ann engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit()); engine.initializeExpressions(expressionsToUse); - engine.invokeAnnotationInitializationMethods(); - // setup the header fields // note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones Set hInfo = new HashSet (); @@ -219,6 +217,8 @@ public class VariantAnnotator extends RodWalker implements Ann hInfo.add(line); } + engine.invokeAnnotationInitializationMethods(hInfo); + VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 68cd07803..e5effe6d8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -114,13 +114,13 @@ public class VariantAnnotatorEngine { dbAnnotations.put(rod, rod.getName()); } - public void invokeAnnotationInitializationMethods() { + public void invokeAnnotationInitializationMethods( Set headerLines ) { for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) { - annotation.initialize(walker, toolkit); + annotation.initialize(walker, toolkit, headerLines); } for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) { - annotation.initialize(walker, toolkit); + annotation.initialize(walker, toolkit, headerLines); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java index 160a3d258..521f89016 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java @@ -25,9 +25,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.util.List; +import java.util.Set; @DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations") public abstract class VariantAnnotatorAnnotation { @@ -35,5 +37,5 @@ public abstract class VariantAnnotatorAnnotation { public abstract List getKeyNames(); // initialization method (optional for subclasses, and therefore non-abstract) - public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { } + public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set headerLines ) { } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 08baae7a7..2c06c6b7f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -134,7 +134,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation + "snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000", 1, - Arrays.asList("486fc6a5ca1819f5ab180d5d72b1ebc9") + Arrays.asList("ed9d1b37b0bd8b65ff9ce2688e0e102e") ); executeTest("Testing SnpEff annotations", spec); } From a91ac0c5dbb28667c38d69230827112af569ab55 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 21 Sep 2011 10:15:05 -0400 Subject: [PATCH 32/41] Intermediate commit of bugfixes to CombineVariants --- .../org/broadinstitute/sting/utils/Utils.java | 40 ++++--- .../variantcontext/VariantContextUtils.java | 106 ++++++++---------- .../CombineVariantsIntegrationTest.java | 11 +- 3 files changed, 82 insertions(+), 75 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index f6edb319f..6ce492c63 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -240,22 +240,34 @@ public class Utils { return ret.toString(); } - //public static String join(String separator, Collection strings) { - // return join( separator, strings.toArray(new String[0]) ); - //} - - public static String join(String separator, Collection objects) { - if(objects.isEmpty()) { + /** + * Returns a string of the form elt1.toString() [sep elt2.toString() ... sep elt.toString()] for a collection of + * elti objects (note there's no actual space between sep and the elti elements). Returns + * "" if collection is empty. If collection contains just elt, then returns elt.toString() + * + * @param separator the string to use to separate objects + * @param objects a collection of objects. the element order is defined by the iterator over objects + * @param the type of the objects + * @return a non-null string + */ + public static String join(final String separator, final Collection objects) { + if (objects.isEmpty()) { // fast path for empty collection return ""; - } - Iterator iter = objects.iterator(); - final StringBuilder ret = new StringBuilder(iter.next().toString()); - while(iter.hasNext()) { - ret.append(separator); - ret.append(iter.next().toString()); - } + } else { + final Iterator iter = objects.iterator(); + final T first = iter.next(); - return ret.toString(); + if ( ! iter.hasNext() ) // fast path for singleton collections + return first.toString(); + else { // full path for 2+ collection that actually need a join + final StringBuilder ret = new StringBuilder(first.toString()); + while(iter.hasNext()) { + ret.append(separator); + ret.append(iter.next().toString()); + } + return ret.toString(); + } + } } public static String dupString(char c, int nCopies) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 986d6305c..147538253 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -316,19 +316,22 @@ public class VariantContextUtils { return pruneVariantContext(vc, null); } - public static VariantContext pruneVariantContext(VariantContext vc, Collection keysToPreserve ) { - MutableVariantContext mvc = new MutableVariantContext(vc); + public static VariantContext pruneVariantContext(final VariantContext vc, final Collection keysToPreserve ) { + final MutableVariantContext mvc = new MutableVariantContext(vc); if ( keysToPreserve == null || keysToPreserve.size() == 0 ) mvc.clearAttributes(); else { - Map d = mvc.getAttributes(); + final Map d = mvc.getAttributes(); mvc.clearAttributes(); for ( String key : keysToPreserve ) if ( d.containsKey(key) ) mvc.putAttribute(key, d.get(key)); } + // this must be done as the ID is stored in the attributes field + if ( vc.hasID() ) mvc.setID(vc.getID()); + Collection gs = mvc.getGenotypes().values(); mvc.clearGenotypes(); for ( Genotype g : gs ) { @@ -443,34 +446,6 @@ public class VariantContextUtils { throw new ReviewedStingException(String.format("Couldn't find master VCF %s at %s", masterName, unsortedVCs.iterator().next())); } - - public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, byte refBase) { - return simpleMerge(genomeLocParser, unsortedVCs, null, FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GenotypeMergeType.UNSORTED, false, false, refBase); - } - - - /** - * Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided. - * If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with - * the sample name - * - * @param genomeLocParser loc parser - * @param unsortedVCs collection of unsorted VCs - * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs - * @param filteredRecordMergeType merge type for filtered records - * @param genotypeMergeOptions merge option for genotypes - * @param annotateOrigin should we annotate the set it came from? - * @param printMessages should we print messages? - * @param inputRefBase the ref base - * @return new VariantContext - */ - public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, - FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions, - boolean annotateOrigin, boolean printMessages, byte inputRefBase ) { - - return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, "set", false, false); - } - /** * Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided. * If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with @@ -486,12 +461,18 @@ public class VariantContextUtils { * @param setKey the key name of the set * @param filteredAreUncalled are filtered records uncalled? * @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count? - * @return new VariantContext + * @return new VariantContext representing the merge of unsortedVCs */ - public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, - FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions, - boolean annotateOrigin, boolean printMessages, String setKey, - boolean filteredAreUncalled, boolean mergeInfoWithMaxAC ) { + public static VariantContext simpleMerge(final GenomeLocParser genomeLocParser, + final Collection unsortedVCs, + final List priorityListOfVCs, + final FilteredRecordMergeType filteredRecordMergeType, + final GenotypeMergeType genotypeMergeOptions, + final boolean annotateOrigin, + final boolean printMessages, + final String setKey, + final boolean filteredAreUncalled, + final boolean mergeInfoWithMaxAC ) { if ( unsortedVCs == null || unsortedVCs.size() == 0 ) return null; @@ -514,26 +495,28 @@ public class VariantContextUtils { return null; // establish the baseline info from the first VC - VariantContext first = VCs.get(0); - String name = first.getSource(); - GenomeLoc loc = getLocation(genomeLocParser,first); + final VariantContext first = VCs.get(0); + final String name = first.getSource(); + final Allele refAllele = determineReferenceAllele(VCs); - Set alleles = new TreeSet (); - Map genotypes = new TreeMap (); - double negLog10PError = -1; - Set filters = new TreeSet (); - Map attributes = new TreeMap (); - Set inconsistentAttributes = new HashSet (); - String rsID = null; + final Set alleles = new TreeSet (); + final Set filters = new TreeSet (); + final Map attributes = new TreeMap (); + final Set inconsistentAttributes = new HashSet (); + final Set variantSources = new HashSet (); // contains the set of sources we found in our set of VCs that are variant + final Set rsIDs = new LinkedHashSet (1); // most of the time there's one id + + GenomeLoc loc = getLocation(genomeLocParser,first); int depth = 0; int maxAC = -1; - Map attributesWithMaxAC = new TreeMap (); + final Map attributesWithMaxAC = new TreeMap (); + double negLog10PError = -1; VariantContext vcWithMaxAC = null; + Map genotypes = new TreeMap (); // counting the number of filtered and variant VCs - int nFiltered = 0, nVariant = 0; + int nFiltered = 0; - Allele refAllele = determineReferenceAllele(VCs); boolean remapped = false; // cycle through and add info from the other VCs, making sure the loc/reference matches @@ -546,7 +529,7 @@ public class VariantContextUtils { loc = getLocation(genomeLocParser,vc); // get the longest location nFiltered += vc.isFiltered() ? 1 : 0; - nVariant += vc.isVariant() ? 1 : 0; + if ( vc.isVariant() ) variantSources.add(vc.getSource()); AlleleMapper alleleMapping = resolveIncompatibleAlleles(refAllele, vc, alleles); remapped = remapped || alleleMapping.needsRemapping(); @@ -566,8 +549,10 @@ public class VariantContextUtils { // if (vc.hasAttribute(VCFConstants.DEPTH_KEY)) depth += Integer.valueOf(vc.getAttributeAsString(VCFConstants.DEPTH_KEY)); - if (rsID == null && vc.hasID()) - rsID = vc.getID(); + + // TODO -- REVERT CHANGE + if (rsIDs.isEmpty() && vc.hasID()) rsIDs.add(vc.getID()); + if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) { String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY); // lets see if the string contains a , separator @@ -627,17 +612,16 @@ public class VariantContextUtils { if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() ) filters.clear(); - // we care about where the call came from - if ( annotateOrigin ) { + if ( annotateOrigin ) { // we care about where the call came from String setValue; - if ( nFiltered == 0 && nVariant == priorityListOfVCs.size() ) // nothing was unfiltered + if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered setValue = "Intersection"; else if ( nFiltered == VCs.size() ) // everything was filtered out setValue = "FilteredInAll"; - else if ( nVariant == 0 ) // everyone was reference + else if ( variantSources.isEmpty() ) // everyone was reference setValue = "ReferenceInAll"; - else { // we are filtered in some subset - List s = new ArrayList (); + else { + LinkedHashSet s = new LinkedHashSet (); for ( VariantContext vc : VCs ) if ( vc.isVariant() ) s.add( vc.isFiltered() ? "filterIn" + vc.getSource() : vc.getSource() ); @@ -652,8 +636,10 @@ public class VariantContextUtils { if ( depth > 0 ) attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth)); - if ( rsID != null ) - attributes.put(VariantContext.ID_KEY, rsID); + + if ( ! rsIDs.isEmpty() ) { + attributes.put(VariantContext.ID_KEY, Utils.join(",", rsIDs)); + } VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, (mergeInfoWithMaxAC ? attributesWithMaxAC : attributes) ); // Trim the padded bases of all alleles if necessary diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 3267173a7..c6cf8454c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -96,7 +96,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c"); } - @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "4836086891f6cbdd40eebef3076d215a"); } + @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "e6a053129c5f7b13129beefed9282155"); } @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "6a34b5d743efda8b2f3b639f3a2f5de8"); } @Test public void threeWayWithRefs() { @@ -131,4 +131,13 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); } @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f704caeaaaed6711943014b847fe381a"); } @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); } + + @Test + public void combineDBSNPDuplicateSites() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T CombineVariants -NO_HEADER -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132, + 1, + Arrays.asList("")); + executeTest("combineDBSNPDuplicateSites:", spec); + } } \ No newline at end of file From 34f435565ca294be93792e6998243116bf14f755 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 21 Sep 2011 10:16:17 -0400 Subject: [PATCH 33/41] Accidentally committed unclean tribble jar to repo --- settings/repository/org.broad/tribble-25.jar | Bin 305986 -> 299110 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/settings/repository/org.broad/tribble-25.jar b/settings/repository/org.broad/tribble-25.jar index 6d467ba3fdd6e6285e8244a87aefa4a0d232cfe1..e260764a5b7e4c676c5b260010ccad0dd46becfe 100644 GIT binary patch delta 5549 zcmZWt30TzC7XL4oVR2^I1leR&Py_*&+;Gn_MZr=rm%vy0xn!c{D?k=CRCe&Mb5s;i zflzyH)Hj-IS xT>oN24|yW}7`Pcfe)PkSKfALJ#ZL@ppEa@KQV9iL%YUQbYvPZBnwz?G z5he5NhRVoPcXH*DWBNb2 9X20L zYP?^$M9}upxF|H~Iev~KGR4!6OGcJlb#h_v)3iUQ9l*a*^ia;t=u(qE>k3C)FfZSL z=B>*VJ7kK|Z|E5V=k#Kb;#8>X&|lWOUn<{sKwK%=^eop=5ON)%;gakDMQez6%|ug^ zSd((#Qv_9dp6HL5vrk00Bcoi*<#`8OZQ_jK#!if@9BLkbuw~=!P4K0?^pzFoXb)(~ zCOvD0xAfqxS#bXyWR&_xFCn1A 3{xxtK9x=O9ez+YQ(EHtPkUS *rSadX( z%oCsS=v$VFjhL7@BU#*t^J4Z&6{pzXc+4HYxFD>PAcVmlMfQZqm7 pl0J%0eH|2{(8*s9b9ij9++0lG(pNB4rObu%MW;4(jLk~Id@bh-||$}>dE`gX!oouspK7~3LxL} z1eS%91ALE$Mv}jI*w8-Y5y6SbC~>kmDhUFk5Gal(UQiT49L$pkkXr85Z7?b1b5{Q- z2;y$a_RkGk%MkK}cjEUE#DoWzc~v~A=bea8AhkTDkhjTKJoVd4NdiK`#1F_I%sl4< zGC?QP3kj~I5In}gEtL#sI*3mt5y)hx5 SeczOh!q i}EN;9gb(Z-PfgM3v$#0h2auWnbVI5A8 ziWR|C?QY8AGkSP+Ey05lX0D~E(~elx-TWESIwd-w%vKN>MQD(2oV1$SD~Szyct(-X zr)7e%b_RW{FzT!EIe*6Lqf-i+Tp=M_jecxBjh=pisY9{IW7$-25Cy)9e~TX&Gl(AH ztCQwjb)hE-qX8QDh|A31SBZV#26h=La$XJvGX2b2sGMuHf1y{pl9k)D>Fvj;w+8O4 z#mQM2_P7(oELRsLiY}v6aZ5$?fRtP{6kA_u!+lthDSEvS$3HsH6iW9jqn9*s&TtP% z$|DAdHi*s;nx}>ult;Y9*Wg|bal<aY|Lm|?eo``i8 z^F_5Wrol8aqhO7>(u11!!_MxAx)t Kbp0KJb^t&$y22?p|!ydBMDODrOqfb!}30 z`wq}}bySB5p0YiEYb_lhM%tPD;~_eX#v0$ Y|f*$WJIZctB^psLFeMf9=HNPEy- zhG{D49nU;sa3%bKLY!W}1mTdMrdE}~q)%2OFtpT2{h$+Ym5Hs~!1Y#*U%L}7>YZ|~ zVy{EkwNXv<5c3M%) lqxGRsT52HRB-`Z=;G#JVavK>s=qT0{v>+YIIFIz_1ad-VM z+MAw8j*vx +%3rEWAE;~SygKQ6eTh(s&-b(fG(5lx%+Ez8viP@7M6w h7boRST4Il z4s}A%vT_P4z0*P8V8QcTc`MI{zHu07uT%X}W*>#Uay{yKat*y<7-?^r`N?XkMfCS8 z3yAi0gT`cxqf^4Dzozt2S=_$pG5sxMP0(RGO|YSmt43^B-+K#~bj$`WEvH|wm}5-p z4qW=hXR_Y3LyhY7?_sAl(rAoAG*OQnQ9*Xl-b|tPh_qk_McQ=KstUy12YRi{UZ)cT zM%q{0{FB@?GKB7mxCT0akG83yo8B4VvWuRejI@ubnA6 WbB=3!JS`g9OZ|zLQShmW(1a^{s5O+S{9gi9Li>sx^+Rq%}^s&(-_% z!~?@)`K44Po-|dGK#03Wk3nlO@jnfT&Y&yJNP8|Uv<0z}1VCblY=BW!mcHv7UY}3j zSB$j94|Zkto)FiU(EqXYCN@xNaoW^enD{w;{V>wLq*~lMIK34~SFEi{-H66dXf-We zta=xi`iWZTLZ-Uu!&U5uA?&uOPB2xIA6K0-()P4jRQO0v4S=X>(i599tD4$=hJvop z_zJZxuO|E_!5hI#j!beF1V;3TvHn%8*c;3MRC~XXL J<2{m*Aw VBx;wA <2$RHp|_12CTbaz@I;Y*^gonJFs8#2yu>t1K-H6aNrwa z!~ydyU5 urfu2Sv|_f<&I zaZ#^)UlgT0I_i$RU{kKL%nfz(F;kMxmAT1|A5yneF9=LRl%$ECjB1W8c`4E}8Devz z5AF^$AJ`ZxiLQ$6mf}XMs>kii{1mTxKl8&acJ?d{^S=BHduLFhq|B1P^1*n%p6)*^ zvqU`26HB(>$|x^)c0 TZCN%MxE}}Pe=_3-$vZ-RE1J=%N*`B*wy#;{@)^7blH+p6+ z(OF)li)G#{Fj}@&jKm0UtrcI`VT4oPOfb!G5`-B}Y_aIb&%~CNfDPg*>_|)54zU{+ z$5K}z_BYC9i3UsXS@D7mYOgMeA#SLJ+z~6WnJv#Ai$CCc(3l!2h}MORPR-7MxJ-k^ z^`)3*keRmJSDMrc mG`liPG23?W` zxg`vg#^7Q>_XN?S(UM&=Z>$y%2T8R&gX+;zSEN!hR_ZFhTNYsHHCEc>j@qT^QmCJN zG239VSta#$LT%6n=>WC^OC^NHd@AW^$PUR~-k4>ubl)L0IH1=4AJQJaTD4zFf!K yu!{k1JXm!Qm%Nz1-VAtAU zj{wDkrj3&ImbJ0+7M@Alv2rC({b_>i#RJq&k!_vPozEP3JZ@IY{7m^GZ (8EB(xcQ0BrCBMG3h`-m-Q8-Jp=tp89h5;2s+HA}>VU_nR-ris0YKP}iS zJ6gt4d3G=+c E^nc^V}axuryXiT zO1yAvbZ>H&FG#yVq?$W>K7<_SK^=LIByc0f@uVkCm@u9U*U4~&(!GhK2Rnl3twa*V zFxoDObV4OQiF89nNg};b`BxGdi%N&dWGE_GlgS8FswR_nQ3*^YBT<=~Okz>_E?Fge zP9f8AWND-FU NGd9Arjn_sn5L3BsF3%`G*r^wCvm9!^L;V|71vZU z5tZ~*byjUE8HFR^)5thf3a62NsGOaq#vC+Vjc~zqG9IDx(@8ukooA5IsH~j OZs(~jMW0caz47T8#MvC)Q|QhdmbR?Z^) (;$F(N3(@8yj;{)P`c|REL zTpbTjGbUX$u7sP7Q(7nvRo-;X2aq9Ks%9*~&-x!1p}ybV`)mrkZrhKH79|gvKl|9E zrW;uhdRZsIlPvG1brQRyY3&qsWuj+8+!GsAi@~&RikeUb1HO5J09@Vl?rar!&VW}O zczwl>*MvFm*>m5=pR^DJCSe-q=Oh|Ck9fcnz*Tv0*OqQgQ`e;#OMvTC{bNa D@GN%ew4&8n}I~j)tX^5FDMD4x Tgd_+|+GzO4`g4AtIm8!RA$|^cjT*#9w=ELgX?3Q$o?kL(Oe%-e zCx{!i+qJ2;`u+`GOH8!!nC2l}dAQrADg4~GuGqxE5zPcH8tY$a-keNsJ+Y(c*yQ*~ z1~_)g=1^Ei(T#>p7WLF?t{PbAT) zRm%85|J8!I5vNa9K$|n^)HnftOY`h@vKjomTsIF~+LW__Pzui=H@YuF?XMRMI`tJo zC9_0AbH9KxMbB6BNSqI_SlI*z>=$)(ND9n`M-x59;K9Ff!+o!b9@KvU 3&2#gO!sFI51M;kG&U9ITibKDS3_Shc}ru;2Oivv z5AQoY{p}C ubQ)92QC+8bwDUgQ- zd}_!9Q#-q=t} kKFfP{CYs0sJX2I&55gp|Y>d%7rFMb aOEBVrWx#}t4d*bsis+ V0OqKlWJp%%QbU0cFxz!+QCOvD(l&N7=4( z$K;tw85#e1bES6iaxZ|3jEQy!^m}ccY)jANsYh^q9wgq{p2jYR&Bh+=G-4_7#5m@k zc`|n;9I{NboAaczl^^gw;y9W#>X8X{HIp8iB(}Nn$;B@uBeh+omd$SIf9)L+mSn z!dHNf?q%l@+ml%g|9k+$eQ3opb@XdCT92b$XmNz>LJiB+wQl-O5QOfU0M5U|tPDc4 zo0fxtlo@QGEq)@<`bGnBA=?~Qz~RdzRAb @{NG-E?aIa^`_B3&_ z>`u#9sIgr08r|(Ae6uonQ)4PDr`aTVrONkNdb_~S7LG9x?G+bM!1*yN3I9T#wh~-Z zg^xbW5?fNARqB#fGAL~uLLVAoMNKPY9W|^{*RJC#fSvx%XX2+z$91wJ&1{@r#PE!L z&G2e~urr!IY6PL3CZB=(IiG#z(AKNfNP4aYQ+F?NC~cTzPcs{#A_jeXsTp*7wVK); z2GOe=D*jZmqs~2KXZjhe64q|$8o;w|a6ESn3BuUU-7~bm5BrOWc8{e!wo+Yb{Tkwp z{J{9K`Wz@R6YV`)Aycssx(!UXphxe>dU|N3y78B-cfQdMu0JN)N!gBk62`l>0Xot; zX=BNT^aR*TOtcO4yT5f(v;11+Lw=hAIkept&c~VgYVEQal-fa!FMv4Huk+PiyXn3u z@gTf)m}p=0{$W k2*Iw#EooaPmRgP&F0r3-PQ!&(!!rr!RI z;aB_e`G|KcQWFX--CkM<8;6OuIp>eH@@;8u5ow8`Y%T&*LlZc3rid7@El!QBd^<-E zZ2_XS;+x#eN+*+Q UV dXt7F7m_Q74XfbhiUL~v&lHgOIG7E|%{2SyRp)CLa From 7d11f93b8270913e471ad92cc969c4c6257f9843 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 21 Sep 2011 10:58:32 -0400 Subject: [PATCH 34/41] Final bugfix for CombineVariants -- Now handles multiple records at a site, so that you don't see records like set=dbsnp-dbsnp-dbsnp when combining something with dbsnp -- Proper handling of ids. If you are merging files with multiple ids for the same record, the ids are merged into a comma separated list --- .../sting/utils/variantcontext/VariantContextUtils.java | 3 +-- .../variantutils/CombineVariantsIntegrationTest.java | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 147538253..e0e27b4f7 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -550,8 +550,7 @@ public class VariantContextUtils { if (vc.hasAttribute(VCFConstants.DEPTH_KEY)) depth += Integer.valueOf(vc.getAttributeAsString(VCFConstants.DEPTH_KEY)); - // TODO -- REVERT CHANGE - if (rsIDs.isEmpty() && vc.hasID()) rsIDs.add(vc.getID()); + if ( vc.hasID() && ! vc.getID().equals(VCFConstants.EMPTY_ID_FIELD) ) rsIDs.add(vc.getID()); if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) { String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index c6cf8454c..74ff850a4 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -90,14 +90,14 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f"); } // official project VCF files in tabix format @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9"); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a"); } + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "d76cd5b3ced7745d42fe0af39ce0b32e"); } @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083"); } @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c"); } - @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "e6a053129c5f7b13129beefed9282155"); } - @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "6a34b5d743efda8b2f3b639f3a2f5de8"); } + @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "c6adeda751cb2a08690dd9202356629f"); } + @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "3a08fd5ee18993dfc8882156ccf5d2e9"); } @Test public void threeWayWithRefs() { WalkerTestSpec spec = new WalkerTestSpec( From ecc7f34774df37969a7d333d298bc803963c6159 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 21 Sep 2011 11:09:54 -0400 Subject: [PATCH 35/41] Putative fix for BAQ problem. --- .../java/src/org/broadinstitute/sting/utils/baq/BAQ.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java index ef7cf751e..1723557c4 100644 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java +++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java @@ -5,6 +5,7 @@ import net.sf.picard.reference.ReferenceSequence; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -131,19 +132,18 @@ public class BAQ { private final static double EM = 0.33333333333; private final static double EI = 0.25; - private double[][][] EPSILONS = new double[256][256][64]; + private double[][][] EPSILONS = new double[256][256][SAMUtils.MAX_PHRED_SCORE+1]; private void initializeCachedData() { for ( int i = 0; i < 256; i++ ) for ( int j = 0; j < 256; j++ ) - for ( int q = 0; q < 64; q++ ) { - double qual = qual2prob[q < minBaseQual ? minBaseQual : q]; + for ( int q = 0; q <= SAMUtils.MAX_PHRED_SCORE; q++ ) { EPSILONS[i][j][q] = 1.0; } for ( char b1 : "ACGTacgt".toCharArray() ) { for ( char b2 : "ACGTacgt".toCharArray() ) { - for ( int q = 0; q < 64; q++ ) { + for ( int q = 0; q <= SAMUtils.MAX_PHRED_SCORE; q++ ) { double qual = qual2prob[q < minBaseQual ? minBaseQual : q]; double e = Character.toLowerCase(b1) == Character.toLowerCase(b2) ? 1 - qual : qual * EM; EPSILONS[(byte)b1][(byte)b2][q] = e; From 174859fc68c434ee568884ea9d9e53541070b24a Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 21 Sep 2011 11:14:54 -0400 Subject: [PATCH 36/41] Don't allow whitespace in the INFO field --- .../sting/utils/codecs/vcf/AbstractVCFCodec.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 624d06a71..18646b057 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -215,7 +215,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR, true); // if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data) - if (( (header == null || (header != null && !header.hasGenotypingData())) && nParts != NUM_STANDARD_FIELDS) || + if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) || (header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) ) throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) + " tokens, and saw " + nParts + " )", lineNo); @@ -345,6 +345,9 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, generateException("The VCF specification requires a valid info field"); if ( !infoField.equals(VCFConstants.EMPTY_INFO_FIELD) ) { + if ( infoField.indexOf("\t") != -1 || infoField.indexOf(" ") != -1 ) + generateException("The VCF specification does not allow for whitespace in the INFO field"); + int infoValueSplitSize = ParsingUtils.split(infoField, infoValueArray, VCFConstants.INFO_FIELD_SEPARATOR_CHAR); for (int i = 0; i < infoValueSplitSize; i++) { String key; From 6592972f82bab1bc0903460241b7b1656902e4cd Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 21 Sep 2011 11:25:08 -0400 Subject: [PATCH 37/41] Putative fix for BAQ array out of bounds -- Old code required qual to be <64, which isn't strictly necessary. Now uses the Picard SAMUtils.MAX_PHRED_SCORE constant -- Unittest to enforce this behavior --- .../src/org/broadinstitute/sting/utils/baq/BAQ.java | 2 +- .../broadinstitute/sting/utils/baq/BAQUnitTest.java | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java index 1723557c4..4f096f86e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java +++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java @@ -152,7 +152,7 @@ public class BAQ { } } - private double calcEpsilon( byte ref, byte read, byte qualB ) { + protected double calcEpsilon( byte ref, byte read, byte qualB ) { return EPSILONS[ref][read][qualB]; } diff --git a/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java index 2e4dac6da..67943ccb4 100644 --- a/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java @@ -172,6 +172,17 @@ public class BAQUnitTest extends BaseTest { } } + @Test(enabled = true) + public void testBAQQualRange() { + BAQ baq = new BAQ(1e-3, 0.1, 7, (byte)4, false); // matches current samtools parameters + final byte ref = (byte)'A'; + final byte alt = (byte)'A'; + + for ( int i = 0; i <= SAMUtils.MAX_PHRED_SCORE; i++ ) + Assert.assertTrue(baq.calcEpsilon( ref, alt, (byte)i) >= 0.0, "Failed to get baq epsilon range"); + } + + public void testBAQ(BAQTest test, boolean lookupWithFasta) { BAQ baqHMM = new BAQ(1e-3, 0.1, 7, (byte)4, false); // matches current samtools parameters From 2585fc3d6cc4dd873eed2868fef8335d7e40a504 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 21 Sep 2011 15:22:26 -0400 Subject: [PATCH 38/41] Updating Rscript path doc text for Broad users --- .../sting/analyzecovariates/AnalyzeCovariates.java | 2 +- .../gatk/walkers/variantrecalibration/VariantRecalibrator.java | 2 +- .../src/org/broadinstitute/sting/utils/R/RScriptExecutor.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java index 7ea515591..1ef452a5c 100755 --- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java +++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java @@ -114,7 +114,7 @@ public class AnalyzeCovariates extends CommandLineProgram { private String RECAL_FILE = "output.recal_data.csv"; @Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false) private String OUTPUT_DIR = "analyzeCovariates/"; - @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false) + @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false) private String PATH_TO_RSCRIPT = "Rscript"; @Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false) private String PATH_TO_RESOURCES = "public/R/"; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 529d17285..89e702b64 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -155,7 +155,7 @@ public class VariantRecalibrator extends RodWalker Date: Wed, 21 Sep 2011 15:25:01 -0400 Subject: [PATCH 39/41] Marginally cleaner isVCFStream() function -- cleanup trying to debug minor bug. Failed to fix the bug, but the code is nicer now --- .../sting/utils/codecs/vcf/AbstractVCFCodec.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 18646b057..e8d1dc6d6 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -601,12 +601,15 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, private final static boolean isVCFStream(final InputStream stream, final String MAGIC_HEADER_LINE) { try { byte[] buff = new byte[MAGIC_HEADER_LINE.length()]; - stream.read(buff, 0, MAGIC_HEADER_LINE.length()); - String firstLine = new String(buff); - stream.close(); - return firstLine.startsWith(MAGIC_HEADER_LINE); + int nread = stream.read(buff, 0, MAGIC_HEADER_LINE.length()); + boolean eq = Arrays.equals(buff, MAGIC_HEADER_LINE.getBytes()); + return eq; +// String firstLine = new String(buff); +// return firstLine.startsWith(MAGIC_HEADER_LINE); } catch ( IOException e ) { return false; + } finally { + try { stream.close(); } catch ( IOException e ) {} } } } From 6bcfce225f42e31a03d721b119410da7c9063f5d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 21 Sep 2011 15:39:19 -0400 Subject: [PATCH 40/41] Fix for dynamic type determination for bgzip files -- GZipInputStream handles bgzip files under linux, but not mac -- Added BlockCompressedInputStream test as well, which works properly on bgzip files --- .../sting/utils/codecs/vcf/AbstractVCFCodec.java | 6 +++++- .../sting/gatk/refdata/tracks/FeatureManagerUnitTest.java | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index e8d1dc6d6..83c7083d0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -6,6 +6,7 @@ import org.broad.tribble.FeatureCodec; import org.broad.tribble.NameAwareCodec; import org.broad.tribble.TribbleException; import org.broad.tribble.readers.LineReader; +import org.broad.tribble.util.BlockCompressedInputStream; import org.broad.tribble.util.ParsingUtils; import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -590,7 +591,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) { try { return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) || - isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE); + isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE) || + isVCFStream(new BlockCompressedInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE); } catch ( FileNotFoundException e ) { return false; } catch ( IOException e ) { @@ -608,6 +610,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, // return firstLine.startsWith(MAGIC_HEADER_LINE); } catch ( IOException e ) { return false; + } catch ( RuntimeException e ) { + return false; } finally { try { stream.close(); } catch ( IOException e ) {} } diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java index bae8e99ed..e8799e2ab 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java @@ -56,6 +56,7 @@ public class FeatureManagerUnitTest extends BaseTest { private static final File VCF3_FILE = new File(validationDataLocation + "vcfexample3.vcf"); private static final File VCF4_FILE = new File(testDir + "HiSeq.10000.vcf"); private static final File VCF4_FILE_GZ = new File(testDir + "HiSeq.10000.vcf.gz"); + private static final File VCF4_FILE_BGZIP = new File(testDir + "HiSeq.10000.bgzip.vcf.gz"); private FeatureManager manager; private GenomeLocParser genomeLocParser; @@ -109,6 +110,7 @@ public class FeatureManagerUnitTest extends BaseTest { new FMTest(VariantContext.class, VCF3Codec.class, "VCF3", VCF3_FILE); new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE); new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_GZ); + new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_BGZIP); new FMTest(TableFeature.class, BedTableCodec.class, "bedtable", null); return FMTest.getTests(FMTest.class); } From c6ba94471985e3956b2108063c9f65d350d170cb Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 21 Sep 2011 15:39:45 -0400 Subject: [PATCH 41/41] Adding bgzip vcf file for unit tests --- public/testdata/HiSeq.10000.bgzip.vcf.gz | Bin 0 -> 509234 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 public/testdata/HiSeq.10000.bgzip.vcf.gz diff --git a/public/testdata/HiSeq.10000.bgzip.vcf.gz b/public/testdata/HiSeq.10000.bgzip.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f2b9bf14a3e046fe2b7dd71ffdbf23e6c521bd9 GIT binary patch literal 509234 zcmV(-K-|9{iwFb&00000{{{d;LjnL=LG68OZyZ;W?F+hksZ+dAV2w^ZP&F{=FwJhkg$~Yn@sA=YK9<(IXz1mEHNx zZ=d{lb9H+0=EKIUjJdhQ&)j|r{lr3lWO2Uvd2_xvy;=O*=hJgqlS%uYXX$yCo_fIZ z{P^=Mug@L!JU9M4$Ir76`@hb8;-3d){>!5upPamYaq`zk-#vTs==C?ZpZ5I^W^ud- z*t|df5r28O_#?@(_%F1VfBF-4*8b`46Q5nY-JIWk%!{8lSLdgnt{10^&)1u)2WJYKepPt{)|I?3buJKv;*5%dP&DG-kj6U@3-OqgT zvVErA^O-+xPTww`-~$(byu3L7b@7g`)aEUH?CINUy!E%dk~j3xXBY4PboXPPe%C(6 z%g20setCLxcN^Y6xTXitqt4I%Mlyc5ynHJHK6 &JAXS}+(qIWU;5dP(7&%b>kJOB0Uo!>0p&^fxn-gYlz$uDVh`sTyp(RZ(P z` 6|Mt$u4@j>c{_WM~-#%|H-u(KTKlAyVgZKLKhE&T1|KQ^C zUH@d`sL@gBUc&9iKY839Z< M4EqW#D>>Fdjjo71xkIyZ|~pH46M3wqWDKXpgMzx??5V$K@BJH2|pxxV2? zpS}Nl`T2VBm&=crpDxdT{YY<>o`R46>*>|mDV;0b=Vup-Z_a2ZuNF_vt~R{LJD+~~ z>6~But2dWdn`u$$)30$1=;+|%_>kac{z9L!xcTLB@kiWZRxb3nKP|2|@2@uRPjB!7 zKD5ue^9H`3ach74{?+1p(g=7?|Kj!@V&?DkohL2+_vY$ScJB5=U(8t9YgWx?@6Rsi z!9VNkS39)0##_Dp%paaiKeO#3z3q2rZ|P-k9=4_a^y|<$ef4 Gb3E zx0hE>PH#?MUR}P~Twl{metAm2d;L%ra{c=9`_sQ|9@7s#{NaJC+5ETn*Qf4*eQ>k+ z`00W351&7>uH>@%%h{{VzpeiH>8sUGznre#oNrDqHg8vShELB|^f}AL-)RNmORpe) z=>^m;y@2$k7hu2mD{x=@6nyb}V88eZocrP{_|gmb;`%s$=>^y?uZ_O=0>1brlrOyi zYrgmjzW6(EU;Z6@`K$27?Xvsw@8FADB|F G}2Xzr6Wy^*{f0vHABm=bzthp56NQ@0Sbu@BhLx>1l|{POc@5 zvN?W8Up?&p{rVw7^ONh-PiLEp>yy*#>&?fX&VQvl{IePR*6Q8q_08$x>hkU5vx~RK z|MK>y>x)lI`EPrT^b#J>8?q0dK6?E>PVfRB@(-L?S5H2?r-#t5FFSFT-TIdz7Jc{i zW&6k1SEq0O_WbmtzLLkUpPhW%T)p3%Tz#ep@1LGOd+~hn MPd~{+ z03_-8au)FFw-~-{~_xp8op@XHZX0Z%)oH-+aD4wub%>8LyL@5B#lT+ieTK zW?KF6qwl_xllcDX^7E&YpU!DRPLO4j$F`i<$;ZpLoAcxU{^Hs5*C*e+dj0JCN3XwG z{B(NVzVprZFQ5JR?CVG0Ek11MBQF2r*Y}s#^xBT|s(kYxbPrOw=#3+gKRrJ`LG}$9 z))PFf$xX-b;_d0x+mnxH7xcDT3W6oM{&0CtH|O%|M03Q)-mJ{bU)g*-yFn)C7$u1X zz0&vZ=~mAFlv%&geF*=`#pSE-UOZ93=(2tONSBtrOCsxuklo=I{^|Vm2DK~QRr(Zu z$(qbL;bhLSGyI*8azF>ZNr&O%+4aZMn>Qae*C+I&!Tj{; 0Um5{jFvSx5t=7iHkRMlWr~Or(f?bLmu+`-~HhannFU+?CMk!K)2I7J3nwM zzjLtSluP~P)uv|PmVf>F+vP9suU9|Uhg%!b?8E2xD^gpn`my@?&ATPnnU;??Z^wV@ z3?-}U6&+su?7g*VPciOMl5^M2i|R}A|GH*%Hsnxz3i&sxS9vAh{QIXXcpxMZooD$s z-bVUc)0^K8{N(NLwcpU1;49o0-~RT`eD(R?3?DQ8X)b^+p1(Nx%QugneDmY~zqxa7 zE}ilJOYYwPziq$ewsHOaAGrX(TmUuj=RFvLKQoA6xp?%LzwD4Ae_jOoZT@151NpN6 zL2h5?h5=aYU>O!2CS}o3CV!8+Z+n;jd-`JN?q6SizIxN$@E4zM&OXwwUj6P5U;pLD z7vCSfeDUh&*^{Fmzxnp)(RZ(p@CJ_rw{V0~(2 kG?*(Up@Mk+>B_xdi4C*ef8uet=HDCPymJZVZK70@|ga(RWLkGC9P8Y>iZv# zZCyD-UzlT8%2zLbpre!HDu%CqcyjDk)_?Wt@$my|0)1VMp1xi`da{1fwyVp&GJEwtB{uuit#Js*R`rdA(KR3lT3$66IM3!=HyjQB1`m;CDn}WB3bMzS=pOF9}otGDSk=cU0dhYw1s6A z3&}y7OH#S&RwZo3DTc&(eXuymo5`&SI1QG?2`lGV96B)&r