From 034b8685889a879eda0e7c1a001358a26845755a Mon Sep 17 00:00:00 2001
From: Christopher Hartl
* One or more bam files (with proper headers) to be analyzed for coverage statistics
+ * (Optional) A REFSEQ Rod to aggregate coverage to the gene level
*
- *(Optional) A REFSEQ Rod to aggregate coverage to the gene level
- *
- * (for information about creating the REFSEQ Rod, please consult the RefSeqCodec documentation)
- *Input
*
* Tables pertaining to different coverage summaries. Suffix on the table files declares the contents: @@ -96,7 +93,7 @@ import java.util.*; *
* java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ - * -T DepthOfCoverage \ + * -T VariantEval \ * -o file_name_base \ * -I input_bams.list * [-geneList refSeq.sorted.txt] \ diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java index f142fa5aa..d94d9ff84 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java @@ -12,35 +12,19 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.ArrayList; /** - * Allows for reading in RefSeq information + * TODO FOR CHRIS HARTL * *- * Parses a sorted UCSC RefSeq file (see below) into relevant features: the gene name, the unique gene name (if multiple transcrips get separate entries), exons, gene start/stop, coding start/stop, - * strandedness of transcription. + * Codec Description *
* *- * Instructions for generating a RefSeq file for use with the RefSeq codec can be found on the Wiki here - * http://www.broadinstitute.org/gsa/wiki/index.php/RefSeq + * See also: link to file specification *
- *Usage
- * The RefSeq Rod can be bound as any other rod, and is specified by REFSEQ, for example - *- * -refSeqBinding:REFSEQ /path/to/refSeq.txt - *- * - * You will need to consult individual walkers for the binding name ("refSeqBinding", above) * *File format example
- * If you want to define your own file for use, the format is (tab delimited): - * bin, name, chrom, strand, transcription start, transcription end, coding start, coding end, num exons, exon starts, exon ends, id, alt. name, coding start status (complete/incomplete), coding end status (complete,incomplete) - * and exon frames, for example: - *- * 76 NM_001011874 1 - 3204562 3661579 3206102 3661429 3 3204562,3411782,3660632, 3207049,3411982,3661579, 0 Xkr4 cmpl cmpl 1,2,0, - *- * for more information see here *- * + * A BAM file containing exactly one sample. *
* * @author Mark DePristo From 85626e7a5dbae8a263b8e2ff2e64bd25656d6e9c Mon Sep 17 00:00:00 2001 From: Eric BanksDate: Mon, 19 Sep 2011 12:24:05 -0400 Subject: [PATCH 2/8] We no longer want people to use the August 2010 Dindel calls for indel realignment but instead Guillermo's new whole genome bi-allelic indel calls; updating the bundle accordingly. Also, there was some confusion by the 1000G data processing folks as to exactly what these indel files are, so I've renamed them so that it's clear. Wiki updated too. --- .../sting/queue/qscripts/GATKResourcesBundle.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index 59c00b8cd..e8b8258c1 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -131,11 +131,11 @@ class GATKResourcesBundle extends QScript { addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf", "hapmap_3.3", b37, true, true)) - addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/AFR+EUR+ASN+1KG.dindel_august_release_merged_pilot1.20110126.sites.vcf", - "1000G_indels_for_realignment", b37, true, false)) + addResource(new Resource("/humgen/1kg/processing/official_release/phase1/ALL.wgs.VQSR_consensus_biallelic.20101123.indels.sites.vcf", + "1000G_biallelic.indels", b37, true, false)) addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf", - "indels_mills_devine", b37, true, true)) + "Mills_Devine_2hit.indels", b37, true, true)) // // example call set for wiki tutorial From 8143def292a49844ab3ff302fbb00f5c866299f7 Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Mon, 19 Sep 2011 10:29:06 -0400 Subject: [PATCH 3/8] Fix the -T argument in the DepthOfCoverage docs Add documentation for the RefSeqCodec, pointing users to the wiki page describing how to create the file --- .../coverage/DepthOfCoverageWalker.java | 9 ++++--- .../utils/codecs/refseq/RefSeqCodec.java | 24 +++++++++++++++---- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index 86f97a36c..664c319ab 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -63,9 +63,12 @@ import java.util.*; * Input
** One or more bam files (with proper headers) to be analyzed for coverage statistics - * (Optional) A REFSEQ Rod to aggregate coverage to the gene level *
- * + *+ *(Optional) A REFSEQ Rod to aggregate coverage to the gene level + *
+ * (for information about creating the REFSEQ Rod, please consult the RefSeqCodec documentation) + *
*Output
** Tables pertaining to different coverage summaries. Suffix on the table files declares the contents: @@ -93,7 +96,7 @@ import java.util.*; *
* java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ - * -T VariantEval \ + * -T DepthOfCoverage \ * -o file_name_base \ * -I input_bams.list * [-geneList refSeq.sorted.txt] \ diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java index d94d9ff84..f142fa5aa 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java @@ -12,19 +12,35 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.ArrayList; /** - * TODO FOR CHRIS HARTL + * Allows for reading in RefSeq information * *- * Codec Description + * Parses a sorted UCSC RefSeq file (see below) into relevant features: the gene name, the unique gene name (if multiple transcrips get separate entries), exons, gene start/stop, coding start/stop, + * strandedness of transcription. *
* *- * See also: link to file specification + * Instructions for generating a RefSeq file for use with the RefSeq codec can be found on the Wiki here + * http://www.broadinstitute.org/gsa/wiki/index.php/RefSeq *
+ *Usage
+ * The RefSeq Rod can be bound as any other rod, and is specified by REFSEQ, for example + *+ * -refSeqBinding:REFSEQ /path/to/refSeq.txt + *+ * + * You will need to consult individual walkers for the binding name ("refSeqBinding", above) * *File format example
+ * If you want to define your own file for use, the format is (tab delimited): + * bin, name, chrom, strand, transcription start, transcription end, coding start, coding end, num exons, exon starts, exon ends, id, alt. name, coding start status (complete/incomplete), coding end status (complete,incomplete) + * and exon frames, for example: + *+ * 76 NM_001011874 1 - 3204562 3661579 3206102 3661429 3 3204562,3411782,3660632, 3207049,3411982,3661579, 0 Xkr4 cmpl cmpl 1,2,0, + *+ * for more information see here *- * A BAM file containing exactly one sample. + * *
* * @author Mark DePristo From 5e832254a4e024378f7fdee252abf7df9e289c6a Mon Sep 17 00:00:00 2001 From: Mauricio CarneiroDate: Mon, 19 Sep 2011 13:28:41 -0400 Subject: [PATCH 4/8] Fixing ReadAndInterval overlap comments. --- .../sting/utils/sam/ReadUtils.java | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 62bbb0307..18fcdabf2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -118,31 +118,40 @@ public class ReadUtils { /** * This enum represents all the different ways in which a read can overlap an interval. * - * NO_OVERLAP: + * NO_OVERLAP_CONTIG: + * read and interval are in different contigs. + * + * NO_OVERLAP_LEFT: + * the read does not overlap the interval. + * + * |----------------| (interval) + * <----------------> (read) + * + * NO_OVERLAP_RIGHT: * the read does not overlap the interval. * * |----------------| (interval) * <----------------> (read) * - * LEFT_OVERLAP: + * OVERLAP_LEFT: * the read starts before the beginning of the interval but ends inside of it * * |----------------| (interval) * <----------------> (read) * - * RIGHT_OVERLAP: + * OVERLAP_RIGHT: * the read starts inside the interval but ends outside of it * * |----------------| (interval) * <----------------> (read) * - * FULL_OVERLAP: + * OVERLAP_LEFT_AND_RIGHT: * the read starts before the interval and ends after the interval * * |-----------| (interval) * <-------------------> (read) * - * CONTAINED: + * OVERLAP_CONTAINED: * the read starts and ends inside the interval * * |----------------| (interval) From ba150570f3d7747256f634a2828ab673a98953f7 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 19 Sep 2011 13:30:32 -0400 Subject: [PATCH 5/8] Updating to use new rod system syntax plus name change for CountRODs --- .../sting/queue/qscripts/GATKResourcesBundle.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index e8b8258c1..036a77b58 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -300,9 +300,9 @@ class GATKResourcesBundle extends QScript { bamFile = bamIn } - class IndexVCF(@Input vcf: File, @Input ref: File) extends CountRod with UNIVERSAL_GATK_ARGS { + class IndexVCF(@Input vcf: File, @Input ref: File) extends CountRODs with UNIVERSAL_GATK_ARGS { //@Output val vcfIndex: File = swapExt(vcf.getParent, vcf, ".vcf", ".vcf.idx") - this.rodBind :+= RodBind(vcf.getName, "VCF", vcf) + this.rod :+= vcf this.reference_sequence = ref } @@ -313,7 +313,7 @@ class GATKResourcesBundle extends QScript { } class MakeDBSNP129(@Input dbsnp: File, @Input ref: File, @Output dbsnp129: File) extends SelectVariants with UNIVERSAL_GATK_ARGS { - this.rodBind :+= RodBind("variant", "VCF", dbsnp) + this.variant = dbsnp this.select ++= List("\"dbSNPBuildID <= 129\"") this.reference_sequence = ref this.out = dbsnp129 From 080c9575470c505e10f7b09d59fa22fcb668867d Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 19 Sep 2011 13:53:08 -0400 Subject: [PATCH 6/8] Fixing contracts for SoftUnclippedEnd utils Now accepts reads that are entirely contained inside an insertion. --- .../broadinstitute/sting/utils/sam/ReadUtils.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 18fcdabf2..2de17db14 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -667,7 +667,7 @@ public class ReadUtils { return ReadAndIntervalOverlap.OVERLAP_RIGHT; } - @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"}) + @Ensures({"(result >= read.getUnclippedStart() && result <= read.getUnclippedEnd()) || readIsEntirelyInsertion(read)"}) public static int getRefCoordSoftUnclippedStart(SAMRecord read) { int start = read.getUnclippedStart(); for (CigarElement cigarElement : read.getCigar().getCigarElements()) { @@ -679,7 +679,7 @@ public class ReadUtils { return start; } - @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"}) + @Ensures({"(result >= read.getUnclippedStart() && result <= read.getUnclippedEnd()) || readIsEntirelyInsertion(read)"}) public static int getRefCoordSoftUnclippedEnd(SAMRecord read) { int stop = read.getUnclippedStart(); int shift = 0; @@ -695,6 +695,14 @@ public class ReadUtils { return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ; } + private static boolean readIsEntirelyInsertion(SAMRecord read) { + for (CigarElement cigarElement : read.getCigar().getCigarElements()) { + if (cigarElement.getOperator() != CigarOperator.INSERTION) + return false; + } + return true; + } + /** * Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region before * the alignment start of the read. From 56106d54ed620965aea0b39052de43c81671c817 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 19 Sep 2011 14:00:00 -0400 Subject: [PATCH 7/8] Changing ReadUtils behavior to comply with GenomeLocParser Now the functions getRefCoordSoftUnclippedStart and getRefCoordSoftUnclippedEnd will return getUnclippedStart if the read is all contained within an insertion. Updated the contracts accordingly. This should give the same behavior as the GenomeLocParser now. --- .../src/org/broadinstitute/sting/utils/sam/ReadUtils.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 2de17db14..5d3ef3086 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -667,7 +667,7 @@ public class ReadUtils { return ReadAndIntervalOverlap.OVERLAP_RIGHT; } - @Ensures({"(result >= read.getUnclippedStart() && result <= read.getUnclippedEnd()) || readIsEntirelyInsertion(read)"}) + @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"}) public static int getRefCoordSoftUnclippedStart(SAMRecord read) { int start = read.getUnclippedStart(); for (CigarElement cigarElement : read.getCigar().getCigarElements()) { @@ -679,9 +679,13 @@ public class ReadUtils { return start; } - @Ensures({"(result >= read.getUnclippedStart() && result <= read.getUnclippedEnd()) || readIsEntirelyInsertion(read)"}) + @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"}) public static int getRefCoordSoftUnclippedEnd(SAMRecord read) { int stop = read.getUnclippedStart(); + + if (readIsEntirelyInsertion(read)) + return stop; + int shift = 0; CigarOperator lastOperator = null; for (CigarElement cigarElement : read.getCigar().getCigarElements()) { From 61b89e236ab13b073a3572e983b6c730efd5331e Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Tue, 20 Sep 2011 00:14:35 -0400 Subject: [PATCH 8/8] To work around potential problem with invalid javax.mail 1.4.1 in ivy cache, added explicit javax.mail 1.4.4 along with build.xml code to remove 1.4.1. --- build.xml | 8 ++++++++ ivy.xml | 6 ++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/build.xml b/build.xml index e5ad9daf0..1f26e7b7a 100644 --- a/build.xml +++ b/build.xml @@ -163,6 +163,14 @@ + + + + diff --git a/ivy.xml b/ivy.xml index 115f4062a..f90b9a010 100644 --- a/ivy.xml +++ b/ivy.xml @@ -15,10 +15,8 @@ - - - +- +