From 28d782b4c7ba29e9a04bcef84e878ff9b3190864 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 2 Sep 2011 13:38:35 -0400 Subject: [PATCH 01/26] Allowing multiple dnsnp and indel files in the DPP --- .../queue/qscripts/DataProcessingPipeline.scala | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index b0c23e6b1..2a135496d 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -11,7 +11,7 @@ import net.sf.samtools.SAMFileReader import net.sf.samtools.SAMFileHeader.SortOrder import org.broadinstitute.sting.queue.util.QScriptUtils -import org.broadinstitute.sting.queue.function.{CommandLineFunction, ListWriterFunction} +import org.broadinstitute.sting.queue.function.ListWriterFunction class DataProcessingPipeline extends QScript { qscript => @@ -31,7 +31,7 @@ class DataProcessingPipeline extends QScript { var reference: File = _ @Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=true) - var dbSNP: File = _ + var dbSNP: List[File] = List() /**************************************************************************** * Optional Parameters @@ -43,7 +43,7 @@ class DataProcessingPipeline extends QScript { // @Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false) - var indels: File = _ + var indels: List[File] = List() @Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false) var bwaPath: File = _ @@ -321,9 +321,9 @@ class DataProcessingPipeline extends QScript { this.input_file = inBams this.out = outIntervals this.mismatchFraction = 0.0 - this.known :+= qscript.dbSNP + this.known ++= qscript.dbSNP if (indels != null) - this.known :+= qscript.indels + this.known ++= qscript.indels this.scatterCount = nContigs this.analysisName = queueLogDir + outIntervals + ".target" this.jobName = queueLogDir + outIntervals + ".target" @@ -333,9 +333,9 @@ class DataProcessingPipeline extends QScript { this.input_file = inBams this.targetIntervals = tIntervals this.out = outBam - this.known :+= qscript.dbSNP + this.known ++= qscript.dbSNP if (qscript.indels != null) - this.known :+= qscript.indels + this.known ++= qscript.indels this.consensusDeterminationModel = cleanModelEnum this.compress = 0 this.scatterCount = nContigs @@ -344,7 +344,7 @@ class DataProcessingPipeline extends QScript { } case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs { - this.knownSites :+= qscript.dbSNP + this.knownSites ++= qscript.dbSNP this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate") this.input_file :+= inBam this.recal_file = outRecalFile From 0adb388dee3abd3272dd831c76ac4fdb69769632 Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Tue, 6 Sep 2011 12:41:46 -0400 Subject: [PATCH 02/26] Fixed bug in SelectVariants that was annotating sample_file / exclude_sample_file as @Argument instead of @Input meaning they weren't tracked in Queue. Updates for HybridSelectionPipeline: - Use VQSR on SNPs for projects using bait set whole_exome_agilent_1 and applying cut at 98.5. - If a whole_exome_agilent_1 project has less than 50 samples also mixing in 1000G samples to reach VQSR thresholds. - Updated SNP hard filters based on analysis done with ebanks to approximate VQSR results on small target batches. - Removed GSA_PRODUCTION_ONLY flag from indel caller. - Updated indel hard filters based on delangel's analysis. - Updated HybridSelectionPipelineTest to use HARD SNP filters only, for now. --- .../sting/gatk/walkers/variantutils/SelectVariants.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index bb3cd82a1..35ff66243 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -214,7 +214,7 @@ public class SelectVariants extends RodWalker { @Argument(fullName="sample_expressions", shortName="se", doc="Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times", required=false) public Set sampleExpressions ; - @Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false) + @Input(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false) public Set sampleFiles; /** @@ -226,7 +226,7 @@ public class SelectVariants extends RodWalker { /** * Note that sample exclusion takes precedence over inclusion, so that if a sample is in both lists it will be excluded. */ - @Argument(fullName="exclude_sample_file", shortName="xl_sf", doc="File containing a list of samples (one per line) to exclude. Can be specified multiple times", required=false) + @Input(fullName="exclude_sample_file", shortName="xl_sf", doc="File containing a list of samples (one per line) to exclude. Can be specified multiple times", required=false) public Set XLsampleFiles = new HashSet(0); /** From 47607a7effc2be91bc44ccc453639c544a1e45bb Mon Sep 17 00:00:00 2001 From: Roger Zurawicki Date: Tue, 6 Sep 2011 14:25:57 -0400 Subject: [PATCH 04/26] Fixed bug where deletions messed up interval clipping - Instead of using readLength, the ReadUtil function are used to get a proper read coordinate - Added debug info in interval clipping ( with -dl) NOTE: method might not be safe for production and checks need to be added to the ClippingOp code --- .../org/broadinstitute/sting/utils/clipreads/ReadClipper.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index be045309a..26c25850a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -63,6 +63,10 @@ public class ReadClipper { if (start < 0 || stop > read.getReadLength() - 1) throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); + // TODO add requires statement/check in the Hardclip function + if ( start > stop ) + stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read)); + //System.out.println("Clipping start/stop: " + start + "/" + stop); this.addOp(new ClippingOp(start, stop)); SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES); From 284f83469b9ad7942c369d5fbd41fc36fc34ea0c Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 6 Sep 2011 15:09:37 -0400 Subject: [PATCH 06/26] ReducedRead flag cached in GATKSAMRecord. 20% performance improvement --- .../sting/utils/pileup/PileupElement.java | 2 +- .../sting/utils/sam/GATKSAMRecord.java | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index 66e1afecb..12899e898 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -82,7 +82,7 @@ public class PileupElement { // -------------------------------------------------------------------------- private Integer getReducedReadQualityTagValue() { - return (Integer)getRead().getAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG); + return getRead().getIntegerAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG); } public boolean isReducedRead() { diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 517f9f75d..c55a462f1 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -41,6 +41,10 @@ public class GATKSAMRecord extends SAMRecord { // because some values can be null, we don't want to duplicate effort private boolean retrievedReadGroup = false; + /** A private cache for the reduced read quality. Null indicates the value hasn't be fetched yet or isn't available */ + private boolean lookedUpReducedReadQuality = false; + private Integer reducedReadQuality; + // These temporary attributes were added here to make life easier for // certain algorithms by providing a way to label or attach arbitrary data to // individual GATKSAMRecords. @@ -338,7 +342,17 @@ public class GATKSAMRecord extends SAMRecord { public Object getAttribute(final String tag) { return mRecord.getAttribute(tag); } - public Integer getIntegerAttribute(final String tag) { return mRecord.getIntegerAttribute(tag); } + public Integer getIntegerAttribute(final String tag) { + if ( tag == ReadUtils.REDUCED_READ_QUALITY_TAG ) { + if ( ! lookedUpReducedReadQuality ) { + lookedUpReducedReadQuality = true; + reducedReadQuality = mRecord.getIntegerAttribute(tag); + } + return reducedReadQuality; + } else { + return mRecord.getIntegerAttribute(tag); + } + } public Short getShortAttribute(final String tag) { return mRecord.getShortAttribute(tag); } From 3db7ecb92031496bd6474c2e0015791420284bc8 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 6 Sep 2011 15:09:37 -0400 Subject: [PATCH 08/26] ReducedRead flag cached in GATKSAMRecord. 20% performance improvement --- .../sting/utils/pileup/PileupElement.java | 2 +- .../sting/utils/sam/GATKSAMRecord.java | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index 66e1afecb..12899e898 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -82,7 +82,7 @@ public class PileupElement { // -------------------------------------------------------------------------- private Integer getReducedReadQualityTagValue() { - return (Integer)getRead().getAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG); + return getRead().getIntegerAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG); } public boolean isReducedRead() { diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 517f9f75d..c55a462f1 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -41,6 +41,10 @@ public class GATKSAMRecord extends SAMRecord { // because some values can be null, we don't want to duplicate effort private boolean retrievedReadGroup = false; + /** A private cache for the reduced read quality. Null indicates the value hasn't be fetched yet or isn't available */ + private boolean lookedUpReducedReadQuality = false; + private Integer reducedReadQuality; + // These temporary attributes were added here to make life easier for // certain algorithms by providing a way to label or attach arbitrary data to // individual GATKSAMRecords. @@ -338,7 +342,17 @@ public class GATKSAMRecord extends SAMRecord { public Object getAttribute(final String tag) { return mRecord.getAttribute(tag); } - public Integer getIntegerAttribute(final String tag) { return mRecord.getIntegerAttribute(tag); } + public Integer getIntegerAttribute(final String tag) { + if ( tag == ReadUtils.REDUCED_READ_QUALITY_TAG ) { + if ( ! lookedUpReducedReadQuality ) { + lookedUpReducedReadQuality = true; + reducedReadQuality = mRecord.getIntegerAttribute(tag); + } + return reducedReadQuality; + } else { + return mRecord.getIntegerAttribute(tag); + } + } public Short getShortAttribute(final String tag) { return mRecord.getShortAttribute(tag); } From 9559115ad5eeff677798c21187bdd7e07700c319 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 6 Sep 2011 16:54:01 -0400 Subject: [PATCH 09/26] Bugfix for singleton runs. Now with histograms where possible --- public/R/queueJobReport.R | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/public/R/queueJobReport.R b/public/R/queueJobReport.R index 18d33054e..a24d269c9 100644 --- a/public/R/queueJobReport.R +++ b/public/R/queueJobReport.R @@ -12,7 +12,9 @@ if ( onCMDLine ) { inputFileName = args[1] outputPDF = args[2] } else { - inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt" + #inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt" + inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt" + #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt" outputPDF = NA } @@ -113,11 +115,22 @@ plotGroup <- function(groupTable) { textplot(as.data.frame(sum), show.rownames=F) title(paste("Job summary for", name, "itemizing each iteration"), cex=3) + # histogram of job times by groupAnnotations + if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) { + # todo -- how do we group by annotations? + p <- ggplot(data=sub, aes(x=runtime)) + geom_histogram() + p <- p + xlab("runtime in seconds") + ylab("No. of jobs") + p <- p + opts(title=paste("Job runtime histogram for", name)) + print(p) + } + # as above, but averaging over all iterations groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration") - sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) - textplot(as.data.frame(sum), show.rownames=F) - title(paste("Job summary for", name, "averaging over all iterations"), cex=3) + if ( dim(sub)[1] > 1 ) { + sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) + textplot(as.data.frame(sum), show.rownames=F) + title(paste("Job summary for", name, "averaging over all iterations"), cex=3) + } } # print out some useful basic information @@ -146,7 +159,7 @@ plotJobsGantt(gatkReportData, T) plotJobsGantt(gatkReportData, F) plotProgressByTime(gatkReportData) for ( group in gatkReportData ) { - plotGroup(group) + plotGroup(group) } if ( ! is.na(outputPDF) ) { From da9c8ab38601c2fd893307869c46221cd37d68ba Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 6 Sep 2011 20:39:42 -0400 Subject: [PATCH 10/26] Revving the Tribble jar where the DbsnpCodec class was renamed to OldDbsnpCodec. Updating GATK code accordingly. --- .../gatk/refdata/VariantContextAdaptors.java | 24 +++++++++--------- .../variantutils/ValidateVariants.java | 11 ++------ .../VariantsToVCFIntegrationTest.java | 2 +- .../{tribble-23.jar => tribble-24.jar} | Bin 299168 -> 299210 bytes .../{tribble-23.xml => tribble-24.xml} | 2 +- 5 files changed, 16 insertions(+), 23 deletions(-) rename settings/repository/org.broad/{tribble-23.jar => tribble-24.jar} (93%) rename settings/repository/org.broad/{tribble-23.xml => tribble-24.xml} (51%) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index bf490e28c..7bf518fd5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.refdata; import net.sf.samtools.util.SequenceUtil; import org.broad.tribble.Feature; import org.broad.tribble.annotation.Strand; -import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broad.tribble.dbsnp.OldDbSNPFeature; import org.broad.tribble.gelitext.GeliTextFeature; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.classloader.PluginManager; @@ -93,27 +93,27 @@ public class VariantContextAdaptors { // -------------------------------------------------------------------------------------------------------------- private static class DBSnpAdaptor implements VCAdaptor { - private static boolean isSNP(DbSNPFeature feature) { + private static boolean isSNP(OldDbSNPFeature feature) { return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact"); } - private static boolean isMNP(DbSNPFeature feature) { + private static boolean isMNP(OldDbSNPFeature feature) { return feature.getVariantType().contains("mnp") && feature.getLocationType().contains("range"); } - private static boolean isInsertion(DbSNPFeature feature) { + private static boolean isInsertion(OldDbSNPFeature feature) { return feature.getVariantType().contains("insertion"); } - private static boolean isDeletion(DbSNPFeature feature) { + private static boolean isDeletion(OldDbSNPFeature feature) { return feature.getVariantType().contains("deletion"); } - private static boolean isIndel(DbSNPFeature feature) { + private static boolean isIndel(OldDbSNPFeature feature) { return isInsertion(feature) || isDeletion(feature) || isComplexIndel(feature); } - public static boolean isComplexIndel(DbSNPFeature feature) { + public static boolean isComplexIndel(OldDbSNPFeature feature) { return feature.getVariantType().contains("in-del"); } @@ -125,7 +125,7 @@ public class VariantContextAdaptors { * * @return an alternate allele list */ - public static List getAlternateAlleleList(DbSNPFeature feature) { + public static List getAlternateAlleleList(OldDbSNPFeature feature) { List ret = new ArrayList(); for (String allele : getAlleleList(feature)) if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele); @@ -139,7 +139,7 @@ public class VariantContextAdaptors { * * @return an alternate allele list */ - public static List getAlleleList(DbSNPFeature feature) { + public static List getAlleleList(OldDbSNPFeature feature) { List alleleList = new ArrayList(); // add ref first if ( feature.getStrand() == Strand.POSITIVE ) @@ -156,14 +156,14 @@ public class VariantContextAdaptors { /** * Converts non-VCF formatted dbSNP records to VariantContext. - * @return DbSNPFeature. + * @return OldDbSNPFeature. */ @Override - public Class getAdaptableFeatureType() { return DbSNPFeature.class; } + public Class getAdaptableFeatureType() { return OldDbSNPFeature.class; } @Override public VariantContext convert(String name, Object input, ReferenceContext ref) { - DbSNPFeature dbsnp = (DbSNPFeature)input; + OldDbSNPFeature dbsnp = (OldDbSNPFeature)input; if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) ) return null; Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index c0f695966..2c7902914 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broad.tribble.TribbleException; -import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; @@ -41,7 +40,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.util.Collection; import java.util.HashSet; -import java.util.List; import java.util.Set; @@ -168,14 +166,9 @@ public class ValidateVariants extends RodWalker { // get the RS IDs Set rsIDs = null; if ( tracker.hasValues(dbsnp.dbsnp) ) { - List dbsnpList = tracker.getValues(dbsnp.dbsnp, ref.getLocus()); rsIDs = new HashSet(); - for ( Object d : dbsnpList ) { - if (d instanceof DbSNPFeature ) - rsIDs.add(((DbSNPFeature)d).getRsID()); - else if (d instanceof VariantContext ) - rsIDs.add(((VariantContext)d).getID()); - } + for ( VariantContext rsID : tracker.getValues(dbsnp.dbsnp, ref.getLocus()) ) + rsIDs.add(rsID.getID()); } try { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java index e740acf05..95fafac8d 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java @@ -23,7 +23,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " --variant:dbsnp " + GATKDataLocation + "Comparisons/Validated/dbSNP/dbsnp_129_b36.rod" + + " --variant:OldDbsnp " + GATKDataLocation + "Comparisons/Validated/dbSNP/dbsnp_129_b36.rod" + " -T VariantsToVCF" + " -L 1:1-30,000,000" + " -o %s" + diff --git a/settings/repository/org.broad/tribble-23.jar b/settings/repository/org.broad/tribble-24.jar similarity index 93% rename from settings/repository/org.broad/tribble-23.jar rename to settings/repository/org.broad/tribble-24.jar index 40e2d4c5ec85a3fd0c7f14b55a1d0ba3c283c217..b1c39e60a14ce895e515cadb07c356d6d428bbc4 100644 GIT binary patch delta 5617 zcmZu#30T!t7XL37cH(lDftgvI7Rfh zVEmV5zKAQSns6DJJ+{t#<+$DNeC3c+Jr|ziJWjHwW+L~+#n$YU1z{&dCCcuQm`ny6 z@Alot#kadZ;o|RmZssc?`ZCTO_l@98m!HfTi-1%sj4V536uRFV@}>r%c0^{=tP>JLjML`2*dmjjT49gOA-wTt81T?I|@R% zBNR>*-OK`EwNFIY87rDi8{^*HDiIfE`_9&CZo4O#x?)e`!EFOEeTE-*jlfcste&~8 zqpu)z7`=8+*N72@=zaYRvHLVc&qAo#S1PK53O%5~G#R6hmh-f?Hr?jTk>-yum7p6h z`5Fo?+4Sl}x(#Ff#bvyxZdVo}V_4f3in%}3c7YqM=(uN%CCu-936ak`^Q1m32VeZR zxMTBBL0~k*kTkciq4BY^G4zCr6P?cdX2pRTIM5tKo*Z*CZ3>|qCJ-Dd(bC$=o~EMvQUYEZ zrdz*Bmt|Bw{z$rMfhu3LUFvEN5QO#sXw^w>wLej{9mO>6w~t5~)17BhM*!1+%P+_; zqN-hhe2c5z`LJnTn71H=d2?_2VX_w-Uq^MP`Vft2T(BJBjXp|_U8yOzpvT(++k?uv zz=7$qqv=eXyp8WGHC}G!i3m-UU3s`M3uP+@^f@y}o`pxyba1u&9q*B<5_v10bW>op zynwg&Z|`^-D`>$hAWBaLz<~(S8dCZbN7IEZa;1o|9BW-GCJYjUgh5ajPWr>QTV*?# zw}Dui{t9y1Al80heN4WJe%p`9Ej+%1P4ab1Tzl_~{41|O)m1s1bFH`K(>(gEo$}H? zDCb1TE}rZ|R^(e=`1Q`@Gc2ts%$5AXs~6@*9`gMT?@zio8mcE%h|&wh7aFh2*1dX^ zHM{6T9&x3oex%kF9c~FH=dF-Sn?PE)>(P_QNgm>}H%S8T<-}QJG>W}Hi@b(=f;))> zA0O~fB9WMav?LOQ%(f&lf=RE!SC*uanYfFVG!l&~ebSYf7o^h&*#=GNWEL_m8DusxIT>UI z3ZKbPwtP36ctB((8H`t@>dAyvndAlbtU-My@zm2k>h-MAXI1F=*;(jT3(=WGXMVci zd^T}MS7&pHhjecjgk%xVz}U&6{EG~6g_y-e1NMs*pPq{;bf^b<3H|NKPX0+t=OsoXRsMpYeleMd{(fPqwQYum zzAlES8`gi?z2&x|4@&I>fzdFPk=AY{SIEs)VyvR2l6H15YtWcY_$|ijCvE98L_gI1 z$-KWqpPT@RK6;cwRpy(SfR0 zsr#z}oR$)Nr6Fc1_3_ng?jzA?0p&{-?{y5>lYj`sri*r9m8)#d>A}I=C-fm<6sdAQ zE{V(61&Q7eyG#k!JXg@!Vz^3v8x$@h<1k~Y$wDRMp2Ne_e`5zW%a!oFmecUM${=n9 z@r8utN;0$9YD76(9RdftWt-XR*c!e{Cl4*y=Mp8AKAe4gJ52RXbE>#C=Q=6Xw7*J#oQGgV8%+~f?Am?DK!0ZFX*e4O7LwAD!grm zXwg@77kw3lVjeQc7lu3%9if^*>8Sj!>f zC3rAcbfFz2Sr2{w2I?891YwLSq>NsA-Y}-S$?!gNoCwu<|K~pQ-F+pfn$G=<)Ys6< zTmxeAh&Pt8efy4x|4@gF=(uA3*;zOcs`B`Y2F-bN1GD2LL$ux*vMVG_V6O|5CBt># zKBp*WL0}YNwn4w+3O6fS1sf$lNL!;6B!3MxI4px3Jhw=4hqE;_xo6kV=3oW;k>PXJ zn;nkrQJqI?DZgK1hWwsWWwn;$>N)PTNU{e_KC#1+mG+CgQbSJ?qj9PzE4$^FOZ{Q= zPRRk^ zz3{9i$qRny4l-oH&5!l;s$>+b@;9=@tmFpzg5JHo3*rjMFfUe$v#$hS_y@gS7)@2_ z-(*5Q&{#mmVv?T~&`6vwp;0=YYHQC)Ha&4B7P5WZ;x6lcm2|MTdkc*WTKRLgqBs1V z-fx(!ornV|vDR;29fK7%ej;?TLrk{&+asCem_ zt)EZP;b+}J;k2#AjPy!^(9isblW;fs9-J#Cei+Gri|Os*8vy5vi919E$^kGhK(>U0 zbxKEOt)sY2uo>5}iCn8^8Sfdx1!1h&K5PDMab|f(oXo+$MA=?g3AML2p5xW=baG!! zJAhYk?8rx7(g&T9`Yw148cN6{jQ07`_~LCK%z2D<-TV2}=4 zw#v5bV}nl+y00R>ewUhH_Nlb?BvD50@38mf~2qetEm97+}W zQM#hi&6e~(3Y1<_$+t8?eJSyVsiR~ESWM-yp0^i{)P78FVn*uu>HyT0(&?Ywc-Kqm zes*7F`T=nIDoqXR2aL~q_MW@>^q+E!f>i$6Z!>wH+FxY)*p(@rx}%I*YUt#V{8&ct zV;Y{aReu5xDrtY8`y0LCqRN%{;=dmoww5*kBXzx6VM#fO#!cw#uDzZ{uOvq5O>Bqr z<;o`dY@|-6xpF6s1BesE(=&_v%S6b{4B0`p8?l4K8D-_iM6%=|+ z&!N~n$r|Q%Lpa)A^JY+g_@qLyb%ZIn`LI!($bepu1_ zHWP}qw9y% UATL-~O|&*f+C5?teHVoP0cy%BZ~y=R delta 5613 zcmZu#2Ut|s629fIEM=*JG}%RB0c=>1D1t`pQ9-OA0#*d0h*7}?b_FSVmGNm3H8DPn zis&ZU7<)4 zH%umuf}j_KZx$Oj8rsXX^e1wg+~fVgjkQMn`LzxEQH)|o_G=y~W=lA5PW}@I&Jhz1 zn19kO6LBP6-$zEJ+{u%VoN)e?k9^{`iyKe$>@T@fzL<*mxXrs|L15BLNlW%pPI2W&)dpeu?tPN$K0~QBQb$};o}`q(M9dyGzX(+ zqKVmm;1zXxFABUs>say7_cTnhwJ(P8wXd!c`Df zxI%cgh;hvEWlf^8e&LIZ5AVgA4{wc^M0dr0d#ROD4YzY;fa3j2fVqCV9$yav21>y& zXExFMG`9$=iNEWD{oi>B!gpRu)-jWL(w=U5vA2>|K_4r0RP4+jRje0nNhVh6u_oqe z#|rH++f7PT&<=Y+*kO-FWieWryPez1XXW zkMcnarJ~l?>{Q!{_dwLusi?sHAQG&UR5~a%*ZGXyxc(K-<4jIa)i;JC5Ynv_p$A@X)Tqs82(6B=BDOSlcc(ZsKyU^nNuGj8@j$xmN09KyB}4=`cXkQN%z9W$0qDyjns)3-c&33^X#L) zm->r1b?M!_JvY#GyulIjn?xVz+aUQ`PW>S5#=f%5sF&Wst;C{ll&;8_*r~M9k4x+X zfk`ALMxFfNRFkB$wE11y!GrzlnG_zziUv&~a#yTgmr(h84z}zf7YE^3dAxjwk8Kzy zpXW25m@0eop!256_AY3*G+R!@Wn{UQD}Tdh*}Pib#?y=|m8Wo)kGIHQ@`hUr@_TJC zfsV)JYe?dFLayd!*UrjyIvhJ*B|qjxF<+DWxMER)lFENsM$e69dcKb9s$>_~S3q3h zk1E;05_M0`Bf z$wSB)SDdMO#{nxn=(=Rm6_!tx{h)6sArN;%cDJNVAgy_XRf*(PWIL5ehU-|jf?FDi zWalLeOCvE1gQ7Ij8I_Z11Q!GRnnwEINYBY+94Z?plcA{GnoLHb(q#%6h05wFBo39U zQ%F229j267j{Im<5~h)9IC69vNkhdrous2uFr7?Ct5Ptl?jx5sBL@OB6e3l79sL=ekd%3PmIpl@s83esS z0@1n|7X&}sgPZV6zl-mKIGY5(;x~vhnl_(Xg3!TcO0ONu^?T4%nY70vEN+-6wUFsj zr;W|hxmd7eVJtnYm}tGam(3=<@in7o6MyN=nMYBmDj&5h873Vp1YK=PLjc6hC3uWN@?6UEPd4bMXQG28+d6QBtoX(;89-=qgutJX*)Hv7X>)Yw=Qc5xng1|)EL@mL6fx4$mTEL?0Vy>N` z?mf{C4lYpr$L1V;^28_zOuA`8c|zR+@|uxeFJe00xg;#4=Nps$8l&;hZy_0FVn&}1 zJDl*PyC5*>r!nesg-5QxMCn|#Sh$eS<17eF5;UffYq{yErF1Kuw3M_%`=l%F^CwVS zCea%EoXs|S{DuQI@nT1CTShwJ`tC;qiy7>>HI(j_+@NF`F`4MfkGZZ3-=9QR2a_He z(v$;?GzjXc6E3Ej?jPLzkRAX`Mrlk95S~ZU&`5I=)fk1tp*%7iTUN}cQ{Y*x^8Z_M zg1l-vMRED+6k*>kui-DnuBq_0723~SPpDApa^eM@E=nexUEH4i&VF*_f{P$9>8^?Y z@G|G4PVF#@g5{LT@I4=`Y>+&mcDdTr_ZXD$gAMWkdd+o#4C_`Jzk=cs^(vvg>;q%z zn!^MZTx@)E4ZYkkiO?i)<%w$lKiZRaE7eebD=F2X-#C=-A^Sq670PE&*)tB&GPuJR zR_GRkszRXeD&pJWr4CC-CyD+gzj)!O*dsGVH(Dyam+UmDw1Re_+Yh4dvZ0U z4N1k9D4Q&|f*q-{4oqv+eg2mBm>!?ewq(*yL-*QLtB)hhSfjEnSwq=g&qf3u&ZkVD zSfOeL?ak#-{!+PR^Q(1UOQ993ZIH84de9k`(l^HMVM} zO{@KrH38-#dV^-tR%5?{g2G?znO_Mhy8o3rYSS&sGxl_h zNd<#8RH@0_mffMz>g8C#YIjW~EFdB1FpG_z`Pt^+z`lm?!x_F{P*H;#=85cqTtV!l zZvG~Exp!px5z=oJ@d26_U4n&b%6Wy9bf3KyE0?{X%8K2ln6$_f+=}SE!MK6=Vq(2F z(9zD`eDpye(cvP*X`b}d2>a+&hKcq{(bV>s@ z#uz6Kc+ue40OAZy>xnbcei3(m&@%eZI3`-!4ntn_s;{H8TT4{h10|I9(fDQq{{0E` r`%?$cO4Q@QP)e;55>=~`QeqE7OI2(F!%AMj!W|FL8NHcG3nu&@8D?cG diff --git a/settings/repository/org.broad/tribble-23.xml b/settings/repository/org.broad/tribble-24.xml similarity index 51% rename from settings/repository/org.broad/tribble-23.xml rename to settings/repository/org.broad/tribble-24.xml index 2f6a16a03..9b2b967f8 100644 --- a/settings/repository/org.broad/tribble-23.xml +++ b/settings/repository/org.broad/tribble-24.xml @@ -1,3 +1,3 @@ - + From 1ef8a1750a4821cdafd109b63dd83e62b873b68e Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 6 Sep 2011 21:07:49 -0400 Subject: [PATCH 11/26] I asked nicely and got nothing. Then I threatened and still got nothing. So I am carrying through on my threats. Guillermo, you have a short reprieve because you were away on vacation, but let's get yours done tomorrow afternoon. --- .../sting/gatk/walkers/coverage/DepthOfCoverageWalker.java | 3 +++ .../gatk/walkers/indels/SomaticIndelDetectorWalker.java | 5 +++++ .../sting/gatk/walkers/validation/ValidationAmplicons.java | 3 +++ 3 files changed, 11 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index 7fe16c9df..4537f06f8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -60,6 +60,9 @@ import java.util.*; * and/or percentage of bases covered to or beyond a threshold. * Additionally, reads and bases can be filtered by mapping or base quality score. * + * If any of the command-line arguments for this tool are not clear to you, + * please email chartl at broadinstitute dot org and he will gladly explain them in more detail. + * *

Input

*

* One or more bam files (with proper headers) to be analyzed for coverage statistics diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index 546bbe1a6..e5ad3106d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -77,6 +77,11 @@ import java.util.*; * if even a weak evidence for the same indel, not necessarily a confident call, exists in the first sample ("Normal"), or as somatic * if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains * only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords. + * + * If any of the general usage of this tool or any of the command-line arguments for this tool are not clear to you, + * please email asivache at broadinstitute dot org and he will gladly explain everything in more detail. + * + * */ @ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, PlatformUnitFilter.class}) public class SomaticIndelDetectorWalker extends ReadWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java index f9bd019ea..178b4c177 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -39,6 +39,9 @@ import java.util.List; * reasons why the site may fail validation (nearby variation, for example). *

* + * If any of the command-line arguments for this tool are not clear to you, + * please email chartl at broadinstitute dot org and he will gladly explain them in more detail. + * *

Input

*

* Requires a VCF containing alleles to design amplicons towards, a VCF of variants to mask out of the amplicons, and an From 436f6eb52ba9a279a6c3af63749bfe92ad4e925c Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Wed, 7 Sep 2011 08:53:30 -0400 Subject: [PATCH 12/26] Reverting Eric's change and pushing in some command-line-option documentation. --- .../coverage/DepthOfCoverageWalker.java | 45 +++++++++++++++++-- .../validation/ValidationAmplicons.java | 19 +++++--- 2 files changed, 54 insertions(+), 10 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index 4537f06f8..3a18fe610 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -60,9 +60,6 @@ import java.util.*; * and/or percentage of bases covered to or beyond a threshold. * Additionally, reads and bases can be filtered by mapping or base quality score. * - * If any of the command-line arguments for this tool are not clear to you, - * please email chartl at broadinstitute dot org and he will gladly explain them in more detail. - * *

Input

*

* One or more bam files (with proper headers) to be analyzed for coverage statistics @@ -108,10 +105,19 @@ public class DepthOfCoverageWalker extends LocusWalker out; + /** + * Sets the low-coverage cutoff for granular binning. All loci with depth < START are counted in the first bin. + */ @Argument(fullName = "start", doc = "Starting (left endpoint) for granular binning", required = false) int start = 1; + /** + * Sets the high-coverage cutoff for granular binning. All loci with depth > END are counted in the last bin. + */ @Argument(fullName = "stop", doc = "Ending (right endpoint) for granular binning", required = false) int stop = 500; + /** + * Sets the number of bins for granular binning + */ @Argument(fullName = "nBins", doc = "Number of bins to use for granular binning", required = false) int nBins = 499; @Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum mapping quality of reads to count towards depth. Defaults to -1.", required = false) @@ -122,28 +128,59 @@ public class DepthOfCoverageWalker extends LocusWalker partitionTypes = EnumSet.of(DoCOutputType.Partition.sample); + /** + * Consider a spanning deletion as contributing to coverage. Also enables deletion counts in per-base output. + */ @Argument(fullName = "includeDeletions", shortName = "dels", doc = "Include information on deletions", required = false) boolean includeDeletions = false; @Argument(fullName = "ignoreDeletionSites", doc = "Ignore sites consisting only of deletions", required = false) boolean ignoreDeletionSites = false; + + /** + * Path to the RefSeq file for use in aggregating coverage statistics over genes + */ @Argument(fullName = "calculateCoverageOverGenes", shortName = "geneList", doc = "Calculate the coverage statistics over this list of genes. Currently accepts RefSeq.", required = false) File refSeqGeneList = null; + /** + * The format of the output file + */ @Argument(fullName = "outputFormat", doc = "the format of the output file (e.g. csv, table, rtable); defaults to r-readable table", required = false) String outputFormat = "rtable"; + /** + * A coverage threshold for summarizing (e.g. % bases >= CT for each sample) + */ @Argument(fullName = "summaryCoverageThreshold", shortName = "ct", doc = "for summary file outputs, report the % of bases coverd to >= this number. Defaults to 15; can take multiple arguments.", required = false) int[] coverageThresholds = {15}; @@ -966,4 +1003,4 @@ class CoveragePartitioner { public Map> getIdentifiersByType() { return identifiersByType; } -} \ No newline at end of file +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java index 178b4c177..01e8cd321 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -39,9 +39,6 @@ import java.util.List; * reasons why the site may fail validation (nearby variation, for example). *

* - * If any of the command-line arguments for this tool are not clear to you, - * please email chartl at broadinstitute dot org and he will gladly explain them in more detail. - * *

Input

*

* Requires a VCF containing alleles to design amplicons towards, a VCF of variants to mask out of the amplicons, and an @@ -96,20 +93,30 @@ import java.util.List; */ @Requires(value={DataSource.REFERENCE}) public class ValidationAmplicons extends RodWalker { + /** + * A Table-formatted file listing amplicon contig, start, stop, and a name for the amplicon (or probe) + */ @Input(fullName = "ProbeIntervals", doc="A collection of intervals in table format with optional names that represent the "+ "intervals surrounding the probe sites amplicons should be designed for", required=true) RodBinding probeIntervals; - + /** + * A VCF file containing the bi-allelic sites for validation. Filtered records will prompt a warning, and will be flagged as filtered in the output fastq. + */ @Input(fullName = "ValidateAlleles", doc="A VCF containing the sites and alleles you want to validate. Restricted to *BI-Allelic* sites", required=true) RodBinding validateAlleles; - + /** + * A VCF file containing variants to be masked. A mask variant overlapping a validation site will be ignored at the validation site. + */ @Input(fullName = "MaskAlleles", doc="A VCF containing the sites you want to MASK from the designed amplicon (e.g. by Ns or lower-cased bases)", required=true) RodBinding maskAlleles; - @Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false) boolean lowerCaseSNPs = false; + /** + * BWA single-end alignment is used as a primer specificity proxy. Low-complexity regions (that don't align back to themselves as a best hit) are lowercased. + * This changes the size of the k-mer used for alignment. + */ @Argument(doc="Size of the virtual primer to use for lower-casing regions with low specificity",fullName="virtualPrimerSize",required=false) int virtualPrimerSize = 20; From 2f4cf82e3ba07884714b7f4911a005162ba12989 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 7 Sep 2011 10:43:53 -0400 Subject: [PATCH 13/26] VariantEval cleanup. Added VariantType Stratification -- ArrayList are List where possible -- states refactored into VariantStratifier base class (reduces many lines of duplicate code) -- Added VariantType stratification that partitions report by VariantContext.Type --- .../varianteval/VariantEvalWalker.java | 7 +-- .../stratifications/AlleleCount.java | 15 +++--- .../stratifications/AlleleFrequency.java | 16 +++--- .../varianteval/stratifications/CompRod.java | 13 +++-- .../varianteval/stratifications/Contig.java | 14 ++---- .../varianteval/stratifications/CpG.java | 10 +--- .../stratifications/Degeneracy.java | 12 ++--- .../varianteval/stratifications/EvalRod.java | 12 ++--- .../varianteval/stratifications/Filter.java | 14 ++---- .../stratifications/FunctionalClass.java | 13 ++--- .../stratifications/JexlExpression.java | 14 +++--- .../varianteval/stratifications/Novelty.java | 15 +++--- .../varianteval/stratifications/Sample.java | 27 +++++----- .../stratifications/VariantStratifier.java | 13 +++-- .../stratifications/VariantType.java | 49 +++++++++++++++++++ .../varianteval/util/VariantEvalUtils.java | 15 +++--- 16 files changed, 139 insertions(+), 120 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 613a31ed3..fe4729bdc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -122,9 +122,6 @@ public class VariantEvalWalker extends RodWalker implements Tr @Argument(fullName="doNotUseAllStandardStratifications", shortName="noST", doc="Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)", required=false) protected Boolean NO_STANDARD_STRATIFICATIONS = false; - @Argument(fullName="onlyVariantsOfType", shortName="VT", doc="If provided, only variants of these types will be considered during the evaluation, in ", required=false) - protected Set typesToUse = null; - /** * See the -list argument to view available modules. */ @@ -317,9 +314,9 @@ public class VariantEvalWalker extends RodWalker implements Tr // find the comp final VariantContext comp = findMatchingComp(eval, compSet); - HashMap> stateMap = new HashMap>(); + HashMap> stateMap = new HashMap>(); for ( VariantStratifier vs : stratificationObjects ) { - ArrayList states = vs.getRelevantStates(ref, tracker, comp, compRod.getName(), eval, evalRod.getName(), sampleName); + List states = vs.getRelevantStates(ref, tracker, comp, compRod.getName(), eval, evalRod.getName(), sampleName); stateMap.put(vs, states); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java index 5cdea4e00..3cc22cc52 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -10,10 +10,13 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.List; +/** + * Stratifies the eval RODs by the allele count of the alternate allele + * + * Looks at the AC value in the INFO field, and uses that value if present. If absent, + * computes the AC from the genotypes themselves. For no AC can be computed, 0 is used. + */ public class AlleleCount extends VariantStratifier { - // needs to know the variant context - private ArrayList states = new ArrayList(); - @Override public void initialize() { List> evals = getVariantEvalWalker().getEvals(); @@ -35,11 +38,7 @@ public class AlleleCount extends VariantStratifier { getVariantEvalWalker().getLogger().info("AlleleCount using " + nchrom + " chromosomes"); } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(1); if (eval != null) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java index 96d9f30ec..3d2dda651 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java @@ -6,11 +6,15 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; +/** + * Stratifies the eval RODs by the allele frequency of the alternate allele + * + * Uses a constant 0.005 frequency grid, and projects the AF INFO field value. Requires + * that AF be present in every ROD, otherwise this stratification throws an exception + */ public class AlleleFrequency extends VariantStratifier { - // needs to know the variant context - private ArrayList states; - @Override public void initialize() { states = new ArrayList(); @@ -19,11 +23,7 @@ public class AlleleFrequency extends VariantStratifier { } } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); if (eval != null) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java index 9f4123589..1f31ebfa7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java @@ -6,22 +6,21 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; + +/** + * Required stratification grouping output by each comp ROD + */ public class CompRod extends VariantStratifier implements RequiredStratification { - private ArrayList states; - @Override public void initialize() { - states = new ArrayList(); for ( RodBinding rod : getVariantEvalWalker().getComps() ) states.add(rod.getName()); } - public ArrayList getAllStates() { - return states; - } - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add(compName); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java index e12a1ba97..c45a73231 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java @@ -5,23 +5,19 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; +/** + * Stratifies the evaluation by each contig in the reference sequence + */ public class Contig extends VariantStratifier { - // needs to know the variant context - private ArrayList states; - @Override public void initialize() { - states = new ArrayList(); states.addAll(getVariantEvalWalker().getContigNames()); states.add("all"); } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); if (eval != null) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java index ff49c8ba9..539cd21ef 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; /** * CpG is a stratification module for VariantEval that divides the input data by within/not within a CpG site @@ -19,21 +20,14 @@ import java.util.ArrayList; * A CpG site is defined as a site where the reference base at a locus is a C and the adjacent reference base in the 3' direction is a G. */ public class CpG extends VariantStratifier { - private ArrayList states; - @Override public void initialize() { - states = new ArrayList(); states.add("all"); states.add("CpG"); states.add("non_CpG"); } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { boolean isCpG = false; if (ref != null && ref.getBases() != null) { String fwRefBases = new String(ref.getBases()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java index cc878e975..3223626c0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java @@ -7,10 +7,12 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import java.util.List; +/** + * Experimental stratification by the degeneracy of an amino acid, according to VCF annotation. Not safe + */ public class Degeneracy extends VariantStratifier { - private ArrayList states; - private HashMap> degeneracies; @Override @@ -77,11 +79,7 @@ public class Degeneracy extends VariantStratifier { } } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("all"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java index 0bfecee25..e276adc32 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java @@ -6,10 +6,12 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; +/** + * Required stratification grouping output by each eval ROD + */ public class EvalRod extends VariantStratifier implements RequiredStratification { - private ArrayList states; - @Override public void initialize() { states = new ArrayList(); @@ -17,11 +19,7 @@ public class EvalRod extends VariantStratifier implements RequiredStratification states.add(rod.getName()); } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add(evalName); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java index 3e3cbc224..aacfae993 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java @@ -5,24 +5,20 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; +/** + * Stratifies by the FILTER status (PASS, FAIL) of the eval records + */ public class Filter extends VariantStratifier { - // needs to know the variant context - private ArrayList states; - @Override public void initialize() { - states = new ArrayList(); states.add("called"); states.add("filtered"); states.add("raw"); } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("raw"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java index 0de871fe6..193a65591 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java @@ -5,25 +5,22 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.List; +/** + * Stratifies by nonsense, missense, silent, and all annotations in the input ROD, from the INFO field annotation. + */ public class FunctionalClass extends VariantStratifier { - // needs to know the variant context - private ArrayList states; - @Override public void initialize() { - states = new ArrayList(); states.add("all"); states.add("silent"); states.add("missense"); states.add("nonsense"); } - public ArrayList getAllStates() { - return states; - } - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("all"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java index 59b991c4d..c0cab4534 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java @@ -6,30 +6,30 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatc import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import java.util.List; import java.util.ArrayList; import java.util.Set; +/** + * Stratifies the eval RODs by user-supplied JEXL expressions + * + * See http://www.broadinstitute.org/gsa/wiki/index.php/Using_JEXL_expressions for more details + */ public class JexlExpression extends VariantStratifier implements StandardStratification { // needs to know the jexl expressions private Set jexlExpressions; - private ArrayList states; @Override public void initialize() { jexlExpressions = getVariantEvalWalker().getJexlExpressions(); - states = new ArrayList(); states.add("none"); for ( SortableJexlVCMatchExp jexlExpression : jexlExpressions ) { states.add(jexlExpression.name); } } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { ArrayList relevantStates = new ArrayList(); relevantStates.add("none"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java index a3810a4c0..77d98d33b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java @@ -7,32 +7,31 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; +/** + * Stratifies by whether a site in in the list of known RODs (e.g., dbsnp by default) + */ public class Novelty extends VariantStratifier implements StandardStratification { // needs the variant contexts and known names private List> knowns; - final private ArrayList states = new ArrayList(Arrays.asList("all", "known", "novel")); @Override public void initialize() { + states = new ArrayList(Arrays.asList("all", "known", "novel")); knowns = getVariantEvalWalker().getKnowns(); } - public ArrayList getAllStates() { - return states; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { if (tracker != null && eval != null) { final Collection knownComps = tracker.getValues(knowns, ref.getLocus()); for ( final VariantContext c : knownComps ) { // loop over sites, looking for something that matches the type eval if ( eval.getType() == c.getType() ) { - return new ArrayList(Arrays.asList("all", "known")); + return Arrays.asList("all", "known"); } } } - return new ArrayList(Arrays.asList("all", "novel")); + return Arrays.asList("all", "novel"); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java index b714fa291..c697b5b7a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java @@ -4,26 +4,23 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +/** + * Stratifies the eval RODs by each sample in the eval ROD. + * + * This allows the system to analyze each sample separately. Since many evaluations + * only consider non-reference sites, stratifying by sample results in meaningful + * calculations for CompOverlap + */ public class Sample extends VariantStratifier { - // needs the sample names - private ArrayList samples; - @Override public void initialize() { - samples = new ArrayList(); - samples.addAll(getVariantEvalWalker().getSampleNamesForStratification()); + states.addAll(getVariantEvalWalker().getSampleNamesForStratification()); } - public ArrayList getAllStates() { - return samples; - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { - ArrayList relevantStates = new ArrayList(); - relevantStates.add(sampleName); - - return relevantStates; + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + return Arrays.asList(sampleName); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java index df6523207..5cae2fb15 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java @@ -6,9 +6,12 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; public abstract class VariantStratifier implements Comparable { private VariantEvalWalker variantEvalWalker; + protected ArrayList states = new ArrayList(); /** * @return a reference to the parent VariantEvalWalker running this stratification @@ -27,15 +30,15 @@ public abstract class VariantStratifier implements Comparable { public abstract void initialize(); - public ArrayList getAllStates() { - return new ArrayList(); - } - - public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { return null; } public int compareTo(Object o1) { return this.getClass().getSimpleName().compareTo(o1.getClass().getSimpleName()); } + + public ArrayList getAllStates() { + return states; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java new file mode 100644 index 000000000..7d25498a5 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; + +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * Stratifies the eval variants by their type (SNP, INDEL, ETC) + */ +public class VariantType extends VariantStratifier { + @Override + public void initialize() { + for ( VariantContext.Type t : VariantContext.Type.values() ) { + states.add(t.toString()); + } + } + + public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { + return eval == null ? Collections.emptyList() : Arrays.asList(eval.getType().toString()); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index f31dd9f9f..92e7c6554 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -266,10 +266,7 @@ public class VariantEvalUtils { * @return a new VariantContext with just the requested sample */ public VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) { - ArrayList sampleNames = new ArrayList(); - sampleNames.add(sampleName); - - return getSubsetOfVariantContext(vc, sampleNames); + return getSubsetOfVariantContext(vc, Arrays.asList(sampleName)); } /** @@ -371,12 +368,12 @@ public class VariantEvalUtils { * @param stateKeys all the state keys * @return a list of state keys */ - public ArrayList initializeStateKeys(HashMap> stateMap, Stack>> stateStack, StateKey stateKey, ArrayList stateKeys) { + public ArrayList initializeStateKeys(HashMap> stateMap, Stack>> stateStack, StateKey stateKey, ArrayList stateKeys) { if (stateStack == null) { - stateStack = new Stack>>(); + stateStack = new Stack>>(); for (VariantStratifier vs : stateMap.keySet()) { - HashMap> oneSetOfStates = new HashMap>(); + HashMap> oneSetOfStates = new HashMap>(); oneSetOfStates.put(vs, stateMap.get(vs)); stateStack.add(oneSetOfStates); @@ -384,10 +381,10 @@ public class VariantEvalUtils { } if (!stateStack.isEmpty()) { - Stack>> newStateStack = new Stack>>(); + Stack>> newStateStack = new Stack>>(); newStateStack.addAll(stateStack); - HashMap> oneSetOfStates = newStateStack.pop(); + HashMap> oneSetOfStates = newStateStack.pop(); VariantStratifier vs = oneSetOfStates.keySet().iterator().next(); for (String state : oneSetOfStates.get(vs)) { From a1920397e8c4328f8c09006e5982afc2381ac71c Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 7 Sep 2011 12:18:11 -0400 Subject: [PATCH 14/26] Major bugfix for per sample VariantEval -- per sample stratification was not being calculated correctly. The alt allele was always remaining, even if the genotype of the sample was hom-ref. Although conceptually fine, this breaks the assumptions of all of the eval modules, so per sample stratifications actually included all variants for everything. Eric is going to fix the system in general, so this commit may break the build. --- .../sting/gatk/walkers/varianteval/util/VariantEvalUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 92e7c6554..3cc039141 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -277,7 +277,7 @@ public class VariantEvalUtils { * @return a new VariantContext with just the requested samples */ public VariantContext getSubsetOfVariantContext(VariantContext vc, Collection sampleNames) { - VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values(), vc.getAlleles()); + VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values()); HashMap newAts = new HashMap(vcsub.getAttributes()); From 6ff432e1f24860e8821e9f55fe71a0e470dce202 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 7 Sep 2011 12:50:17 -0400 Subject: [PATCH 15/26] BugFix for TF argument to VariantEval, actually making it work properly --- .../varianteval/VariantEvalWalker.java | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index fe4729bdc..65e3d3e5a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -15,6 +15,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; +import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.JexlExpression; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; import org.broadinstitute.sting.gatk.walkers.varianteval.util.*; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche; @@ -24,6 +25,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @@ -224,12 +226,6 @@ public class VariantEvalWalker extends RodWalker implements Tr } sampleNamesForStratification.add(ALL_SAMPLE_NAME); - // Initialize select expressions - for (VariantContextUtils.JexlVCMatchExp jexl : VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) { - SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp); - jexlExpressions.add(sjexl); - } - // Add select expressions for anything in the tranches file if ( TRANCHE_FILENAME != null ) { // we are going to build a few select names automatically from the tranches file @@ -240,16 +236,27 @@ public class VariantEvalWalker extends RodWalker implements Tr } } + // Initialize select expressions + for (VariantContextUtils.JexlVCMatchExp jexl : VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) { + SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp); + jexlExpressions.add(sjexl); + } + // Initialize the set of stratifications and evaluations to use stratificationObjects = variantEvalUtils.initializeStratificationObjects(this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE); Set> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE); + boolean usingJEXL = false; for ( VariantStratifier vs : getStratificationObjects() ) { if ( vs.getClass().getSimpleName().equals("Filter") ) byFilterIsEnabled = true; else if ( vs.getClass().getSimpleName().equals("Sample") ) perSampleIsEnabled = true; + usingJEXL = usingJEXL || vs.getClass().equals(JexlExpression.class); } + if ( TRANCHE_FILENAME != null && ! usingJEXL ) + throw new UserException.BadArgumentValue("tf", "Requires the JexlExpression ST to enabled"); + // Initialize the evaluation contexts evaluationContexts = variantEvalUtils.initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null); From d23d62049439870ea33fb4d1759c2349f2ad154d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 7 Sep 2011 12:52:33 -0400 Subject: [PATCH 16/26] Pushing traversal engine timer start to as close to actual start as possible -- Should make initial timings more accurate --- .../gatk/executive/LinearMicroScheduler.java | 2 +- .../sting/gatk/executive/ShardTraverser.java | 1 + .../sting/gatk/traversals/TraversalEngine.java | 15 ++++++++++----- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 65ff27497..09ab4bd44 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -44,7 +44,6 @@ public class LinearMicroScheduler extends MicroScheduler { * @param shardStrategy A strategy for sharding the data. */ public Object execute(Walker walker, ShardStrategy shardStrategy) { - traversalEngine.startTimers(); walker.initialize(); Accumulator accumulator = Accumulator.create(engine,walker); @@ -54,6 +53,7 @@ public class LinearMicroScheduler extends MicroScheduler { if ( done || shard == null ) // we ran out of shards that aren't owned break; + traversalEngine.startTimersIfNecessary(); if(shard.getShardType() == Shard.ShardType.LOCUS) { LocusWalker lWalker = (LocusWalker)walker; WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java index 6136bd68d..2b6488ada 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java @@ -57,6 +57,7 @@ public class ShardTraverser implements Callable { public Object call() { try { + traversalEngine.startTimersIfNecessary(); long startTime = System.currentTimeMillis(); Object accumulator = walker.reduceInit(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index 89a179d0e..dc6ab240e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -115,7 +115,7 @@ public abstract class TraversalEngine,Provide LinkedList history = new LinkedList(); /** We use the SimpleTimer to time our run */ - private SimpleTimer timer = new SimpleTimer("Traversal"); + private SimpleTimer timer = null; // How long can we go without printing some progress info? private static final int PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES = 1000; @@ -209,11 +209,16 @@ public abstract class TraversalEngine,Provide } } /** - * Should be called to indicate that we're going to process records and the timer should start ticking + * Should be called to indicate that we're going to process records and the timer should start ticking. This + * function should be called right before any traversal work is done, to avoid counting setup costs in the + * processing costs and inflating the estimated runtime. */ - public void startTimers() { - timer.start(); - lastProgressPrintTime = timer.currentTime(); + public void startTimersIfNecessary() { + if ( timer == null ) { + timer = new SimpleTimer("Traversal"); + timer.start(); + lastProgressPrintTime = timer.currentTime(); + } } /** From 7e9e20fed0ad5d9f2491f782de3c3850ad341a57 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 7 Sep 2011 12:54:52 -0400 Subject: [PATCH 17/26] Forgot to delete previous call --- .../sting/gatk/executive/HierarchicalMicroScheduler.java | 1 - 1 file changed, 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index 59fb4aa9e..3b9e35311 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -97,7 +97,6 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar if (!( walker instanceof TreeReducible )) throw new IllegalArgumentException("The GATK can currently run in parallel only with TreeReducible walkers"); - traversalEngine.startTimers(); ReduceTree reduceTree = new ReduceTree(this); initializeWalker(walker); From 430da2344609582d8de24edf52ed34cd2a426607 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 7 Sep 2011 13:13:07 -0400 Subject: [PATCH 18/26] At least 2 minutes must pass before a status message is printed, further stabilizing time estimates --- .../broadinstitute/sting/gatk/traversals/TraversalEngine.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index dc6ab240e..27fd173cb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -121,6 +121,7 @@ public abstract class TraversalEngine,Provide private static final int PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES = 1000; private int printProgressCheckCounter = 0; private long lastProgressPrintTime = -1; // When was the last time we printed progress log? + private long MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS = 120 * 1000; // in milliseconds private long PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0; private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0; @@ -229,7 +230,8 @@ public abstract class TraversalEngine,Provide * @return true if the maximum interval (in millisecs) has passed since the last printing */ private boolean maxElapsedIntervalForPrinting(final long curTime, long lastPrintTime, long printFreq) { - return (curTime - lastPrintTime) > printFreq; + long elapsed = curTime - lastPrintTime; + return elapsed > printFreq && elapsed > MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS; } /** From 5f22ef9a8c4b5fdf04b5730dc5d27ffa63c6f73c Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Wed, 7 Sep 2011 13:21:11 -0400 Subject: [PATCH 19/26] Added missing javadoc info to Beagle arguments --- .../beagle/BeagleOutputToVCFWalker.java | 21 +++++++++++++++++++ .../beagle/ProduceBeagleInputWalker.java | 3 +++ 2 files changed, 24 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 60f0fcb0a..880dba5d0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -79,24 +79,45 @@ public class BeagleOutputToVCFWalker extends RodWalker { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); + /** + * If this argument is present, the original allele frequencies and counts from this vcf are added as annotations ACH,AFH and ANH. at each record present in this vcf + */ @Input(fullName="comp", shortName = "comp", doc="Comparison VCF file", required=false) public RodBinding comp; + + /** + * This required argument is used to annotate each site in the vcf INFO field with R2 annotation. Will be NaN if Beagle determined there are no variant samples. + */ @Input(fullName="beagleR2", shortName = "beagleR2", doc="Beagle-produced .r2 file containing R^2 values for all markers", required=true) public RodBinding beagleR2; + /** + * These values will populate the GL field for each sample and contain the posterior probability of each genotype given the data after phasing and imputation. + */ @Input(fullName="beagleProbs", shortName = "beagleProbs", doc="Beagle-produced .probs file containing posterior genotype probabilities", required=true) public RodBinding beagleProbs; + /** + * By default, all genotypes will be marked in the VCF as "phased", using the "|" separator after Beagle. + */ @Input(fullName="beaglePhased", shortName = "beaglePhased", doc="Beagle-produced .phased file containing phased genotypes", required=true) public RodBinding beaglePhased; @Output(doc="VCF File to which variants should be written",required=true) protected VCFWriter vcfWriter = null; + /** + * If this argument is absent, and if Beagle determines that there is no sample in a site that has a variant genotype, the site will be marked as filtered (Default behavior). + * If the argument is present, the site won't be marked as filtered under this condition even if there are no variant genotypes. + */ @Argument(fullName="dont_mark_monomorphic_sites_as_filtered", shortName="keep_monomorphic", doc="If provided, we won't filter sites that beagle tags as monomorphic. Useful for imputing a sample's genotypes from a reference panel" ,required=false) public boolean DONT_FILTER_MONOMORPHIC_SITES = false; + /** + * Value between 0 and 1. If the probability of getting a genotype correctly (based on the posterior genotype probabilities and the actual genotype) is below this threshold, + * a genotype will be substitute by a no-call. + */ @Argument(fullName="no" + "call_threshold", shortName="ncthr", doc="Threshold of confidence at which a genotype won't be called", required=false) private double noCallThreshold = 0.0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index 07793fd7b..87695077d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -112,6 +112,9 @@ public class ProduceBeagleInputWalker extends RodWalker { @Argument(fullName = "bootstrap_vcf",shortName = "bvcf", doc = "Output a VCF with the records used for bootstrapping filtered out", required = false) VCFWriter bootstrapVCFOutput = null; + /** + * If sample gender is known, this flag should be set to true to ensure that Beagle treats male Chr X properly. + */ @Argument(fullName = "checkIsMaleOnChrX", shortName = "checkIsMaleOnChrX", doc = "Set to true when Beagle-ing chrX and want to ensure male samples don't have heterozygous calls.", required = false) public boolean CHECK_IS_MALE_ON_CHR_X = false; From ee9d59955857f3b3f0346c027251dc16592dde71 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 7 Sep 2011 13:31:20 -0400 Subject: [PATCH 20/26] Just cleaning up clean up old commented code from tha data processing pipeline. --- .../qscripts/DataProcessingPipeline.scala | 23 ------------------- 1 file changed, 23 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 2a135496d..f97ce4884 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -37,11 +37,6 @@ class DataProcessingPipeline extends QScript { * Optional Parameters ****************************************************************************/ - -// @Input(doc="path to Picard's SortSam.jar (if re-aligning a previously processed BAM file)", fullName="path_to_sort_jar", shortName="sort", required=false) -// var sortSamJar: File = _ -// - @Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false) var indels: List[File] = List() @@ -132,24 +127,6 @@ class DataProcessingPipeline extends QScript { } } return sampleTable.toMap - -// println("\n\n*** INPUT FILES ***\n") -// // Creating one file for each sample in the dataset -// val sampleBamFiles = scala.collection.mutable.Map.empty[String, File] -// for ((sample, flist) <- sampleTable) { -// -// println(sample + ":") -// for (f <- flist) -// println (f) -// println() -// -// val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".list") -// sampleBamFiles(sample) = sampleFileName -// //add(writeList(flist, sampleFileName)) -// } -// println("*** INPUT FILES ***\n\n") -// -// return sampleBamFiles.toMap } // Rebuilds the Read Group string to give BWA From 3a04955a3085cd87bfec758eb144e78d5bf19b20 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 7 Sep 2011 14:01:42 -0400 Subject: [PATCH 21/26] We already had isPolymorphic and isMonomorphic in the VariantContext, but the implementation was incorrect for many edge cases (e.g. sites-only files, sites with samples who were no-called). Fixing. Moving on to VE now. --- .../sting/utils/variantcontext/VariantContext.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 673fe4529..699133e38 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -983,7 +983,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @return true if it's monomorphic */ public boolean isMonomorphic() { - return ! isVariant() || getChromosomeCount(getReference()) == getChromosomeCount(); + return ! isVariant() || (hasGenotypes() && getHomRefCount() + getNoCallCount() == getNSamples()); } /** From 9127849f5d2871621945a4f005af91dc7cfa8dd9 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 7 Sep 2011 14:54:10 -0400 Subject: [PATCH 23/26] BugFix for unit test --- .../sting/gatk/traversals/TraverseReadsUnitTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java index c0d32a05b..7f4d96add 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java @@ -127,6 +127,7 @@ public class TraverseReadsUnitTest extends BaseTest { Object accumulator = countReadWalker.reduceInit(); while (shardStrategy.hasNext()) { + traversalEngine.startTimersIfNecessary(); Shard shard = shardStrategy.next(); if (shard == null) { From aa9e32f2f115a81b643b52317d40fc46e79195ef Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 7 Sep 2011 15:48:06 -0400 Subject: [PATCH 24/26] Reverting Mark's previous commit as per the open discussion. Now the eval modules check isPolymorphic() before accruing stats when appropriate. Fixed the IndelLengthHistogram module not to error out if the indel isn't simple (that would have been bad). Only integration test that needed to be updated was the tranches one based on a separate commit from Mark. --- .../varianteval/evaluators/CompOverlap.java | 2 +- .../varianteval/evaluators/CountVariants.java | 3 +- .../evaluators/IndelLengthHistogram.java | 15 +-- .../evaluators/IndelStatistics.java | 2 +- .../evaluators/SimpleMetricsByAC.java | 2 +- .../evaluators/ThetaVariantEvaluator.java | 103 +++++++++--------- .../evaluators/TiTvVariantEvaluator.java | 2 +- .../evaluators/ValidationReport.java | 5 +- .../evaluators/VariantQualityScore.java | 2 +- .../varianteval/util/VariantEvalUtils.java | 2 +- .../VariantEvalIntegrationTest.java | 2 +- 11 files changed, 69 insertions(+), 71 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java index 2ea64c49c..5ccacac37 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java @@ -75,7 +75,7 @@ public class CompOverlap extends VariantEvaluator implements StandardEval { } public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - boolean evalIsGood = eval != null && eval.isVariant(); + boolean evalIsGood = eval != null && eval.isPolymorphic(); boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType()); if (compIsGood) nCompVariants++; // count the number of comp events diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java index 59ef3d992..2913c97a6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java @@ -100,11 +100,12 @@ public class CountVariants extends VariantEvaluator implements StandardEval { // So in order to maintain consistency with the previous implementation (and the intention of the original author), I've // added in a proxy check for monomorphic status here. // Protect against case when vc only as no-calls too - can happen if we strafity by sample and sample as a single no-call. - if ( !vc1.isVariant() || (vc1.hasGenotypes() && vc1.getHomRefCount() + vc1.getNoCallCount() == vc1.getNSamples()) ) { + if ( vc1.isMonomorphic() ) { nRefLoci++; } else { switch (vc1.getType()) { case NO_VARIATION: + // shouldn't get here break; case SNP: nVariantLoci++; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java index 35fffd815..ffe7c185f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java @@ -90,18 +90,19 @@ public class IndelLengthHistogram extends VariantEvaluator { public int getComparisonOrder() { return 1; } // need only the evals public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( ! vc1.isBiallelic() && vc1.isIndel() ) { - //veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored."); - return vc1.toString(); // biallelic sites are output - } - if ( vc1.isIndel() ) { + if ( vc1.isIndel() && vc1.isPolymorphic() ) { + + if ( ! vc1.isBiallelic() ) { + //veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored."); + return vc1.toString(); // biallelic sites are output + } + + // only count simple insertions/deletions, not complex indels if ( vc1.isSimpleInsertion() ) { indelHistogram.update(vc1.getAlternateAllele(0).length()); } else if ( vc1.isSimpleDeletion() ) { indelHistogram.update(-vc1.getReference().length()); - } else { - throw new ReviewedStingException("Indel type that is not insertion or deletion."); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java index fc347339d..f70e6c2de 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java @@ -270,7 +270,7 @@ public class IndelStatistics extends VariantEvaluator { public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if (eval != null ) { + if (eval != null && eval.isPolymorphic()) { if ( indelStats == null ) { indelStats = new IndelStats(eval); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java index d466645ea..203c15a85 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java @@ -166,7 +166,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval } } - if ( eval.isSNP() && eval.isBiallelic() && metrics != null ) { + if ( eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() && metrics != null ) { metrics.incrValue(eval); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java index ec43cbd55..e51623c3c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java @@ -37,77 +37,74 @@ public class ThetaVariantEvaluator extends VariantEvaluator { } public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if (vc == null || !vc.isSNP() || !vc.hasGenotypes()) { + if (vc == null || !vc.isSNP() || !vc.hasGenotypes() || vc.isMonomorphic()) { return null; //no interesting sites } - if (vc.hasGenotypes()) { + //this maps allele to a count + ConcurrentMap alleleCounts = new ConcurrentHashMap(); - //this maps allele to a count - ConcurrentMap alleleCounts = new ConcurrentHashMap(); + int numHetsHere = 0; + float numGenosHere = 0; + int numIndsHere = 0; - int numHetsHere = 0; - float numGenosHere = 0; - int numIndsHere = 0; + for (Genotype genotype : vc.getGenotypes().values()) { + numIndsHere++; + if (!genotype.isNoCall()) { + //increment stats for heterozygosity + if (genotype.isHet()) { + numHetsHere++; + } - for (Genotype genotype : vc.getGenotypes().values()) { - numIndsHere++; - if (!genotype.isNoCall()) { - //increment stats for heterozygosity - if (genotype.isHet()) { - numHetsHere++; - } + numGenosHere++; + //increment stats for pairwise mismatches - numGenosHere++; - //increment stats for pairwise mismatches - - for (Allele allele : genotype.getAlleles()) { - if (allele.isNonNull() && allele.isCalled()) { - String alleleString = allele.toString(); - alleleCounts.putIfAbsent(alleleString, 0); - alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1); - } + for (Allele allele : genotype.getAlleles()) { + if (allele.isNonNull() && allele.isCalled()) { + String alleleString = allele.toString(); + alleleCounts.putIfAbsent(alleleString, 0); + alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1); } } } - if (numGenosHere > 0) { - //only if have one called genotype at least - this.numSites++; + } + if (numGenosHere > 0) { + //only if have one called genotype at least + this.numSites++; - this.totalHet += numHetsHere / numGenosHere; + this.totalHet += numHetsHere / numGenosHere; - //compute based on num sites - float harmonicFactor = 0; - for (int i = 1; i <= numIndsHere; i++) { - harmonicFactor += 1.0 / i; - } - this.thetaRegionNumSites += 1.0 / harmonicFactor; + //compute based on num sites + float harmonicFactor = 0; + for (int i = 1; i <= numIndsHere; i++) { + harmonicFactor += 1.0 / i; + } + this.thetaRegionNumSites += 1.0 / harmonicFactor; - //now compute pairwise mismatches - float numPairwise = 0; - float numDiffs = 0; - for (String allele1 : alleleCounts.keySet()) { - int allele1Count = alleleCounts.get(allele1); + //now compute pairwise mismatches + float numPairwise = 0; + float numDiffs = 0; + for (String allele1 : alleleCounts.keySet()) { + int allele1Count = alleleCounts.get(allele1); - for (String allele2 : alleleCounts.keySet()) { - if (allele1.compareTo(allele2) < 0) { - continue; - } - if (allele1 .compareTo(allele2) == 0) { - numPairwise += allele1Count * (allele1Count - 1) * .5; + for (String allele2 : alleleCounts.keySet()) { + if (allele1.compareTo(allele2) < 0) { + continue; + } + if (allele1 .compareTo(allele2) == 0) { + numPairwise += allele1Count * (allele1Count - 1) * .5; - } - else { - int allele2Count = alleleCounts.get(allele2); - numPairwise += allele1Count * allele2Count; - numDiffs += allele1Count * allele2Count; - } + } + else { + int allele2Count = alleleCounts.get(allele2); + numPairwise += allele1Count * allele2Count; + numDiffs += allele1Count * allele2Count; } } + } - if (numPairwise > 0) { - this.totalAvgDiffs += numDiffs / numPairwise; - } + if (numPairwise > 0) { + this.totalAvgDiffs += numDiffs / numPairwise; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java index be957abd7..1feb37e01 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java @@ -40,7 +40,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv } public void updateTiTv(VariantContext vc, boolean updateStandard) { - if (vc != null && vc.isSNP() && vc.isBiallelic()) { + if (vc != null && vc.isSNP() && vc.isBiallelic() && vc.isPolymorphic()) { if (VariantContextUtils.isTransition(vc)) { if (updateStandard) nTiInComp++; else nTi++; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java index 9c331b577..307b4f684 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java @@ -117,7 +117,8 @@ public class ValidationReport extends VariantEvaluator implements StandardEval { public SiteStatus calcSiteStatus(VariantContext vc) { if ( vc == null ) return SiteStatus.NO_CALL; if ( vc.isFiltered() ) return SiteStatus.FILTERED; - if ( ! vc.isVariant() ) return SiteStatus.MONO; + if ( vc.isMonomorphic() ) return SiteStatus.MONO; + if ( vc.hasGenotypes() ) return SiteStatus.POLY; // must be polymorphic if isMonomorphic was false and there are genotypes if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) { int ac = 0; @@ -132,8 +133,6 @@ public class ValidationReport extends VariantEvaluator implements StandardEval { else ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY); return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO; - } else if ( vc.hasGenotypes() ) { - return vc.isPolymorphic() ? SiteStatus.POLY : SiteStatus.MONO; } else { return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do //return SiteStatus.NO_CALL; // we can't figure out what to do diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java index b6ad55b18..263227938 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java @@ -232,7 +232,7 @@ public class VariantQualityScore extends VariantEvaluator { public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { final String interesting = null; - if( eval != null && eval.isSNP() && eval.isBiallelic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites) + if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites) if( titvStats == null ) { titvStats = new TiTvStats(); } titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 3cc039141..92e7c6554 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -277,7 +277,7 @@ public class VariantEvalUtils { * @return a new VariantContext with just the requested samples */ public VariantContext getSubsetOfVariantContext(VariantContext vc, Collection sampleNames) { - VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values()); + VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values(), vc.getAlleles()); HashMap newAts = new HashMap(vcsub.getAttributes()); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 3503a2353..7b6d13223 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -264,7 +264,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testTranches() { String extraArgs = "-T VariantEval -R "+ hg18Reference +" --eval " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("984df6e94a546294fc7e0846cbac2dfe")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6af2b9959aa1778a5b712536de453952")); executeTestParallel("testTranches",spec); } From 2ded0277628e97e0363b8af051580a87786f0459 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 7 Sep 2011 16:09:24 -0400 Subject: [PATCH 25/26] Removed dysfunctional tranches support from VariantEval --- .../walkers/varianteval/VariantEvalWalker.java | 18 ------------------ .../VariantEvalIntegrationTest.java | 2 +- 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 65e3d3e5a..0d09b7033 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -149,9 +149,6 @@ public class VariantEvalWalker extends RodWalker implements Tr @Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false) protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50; - @Argument(fullName="tranchesFile", shortName="tf", doc="The input tranches file describing where to cut the data", required=false) - private String TRANCHE_FILENAME = null; - @Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false) private File ancestralAlignmentsFile = null; @@ -226,16 +223,6 @@ public class VariantEvalWalker extends RodWalker implements Tr } sampleNamesForStratification.add(ALL_SAMPLE_NAME); - // Add select expressions for anything in the tranches file - if ( TRANCHE_FILENAME != null ) { - // we are going to build a few select names automatically from the tranches file - for ( Tranche t : Tranche.readTranches(new File(TRANCHE_FILENAME)) ) { - logger.info("Adding select for all variant above the pCut of : " + t); - SELECT_EXPS.add(String.format(VariantRecalibrator.VQS_LOD_KEY + " >= %.2f", t.minVQSLod)); - SELECT_NAMES.add(String.format("TS-%.2f", t.ts)); - } - } - // Initialize select expressions for (VariantContextUtils.JexlVCMatchExp jexl : VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) { SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp); @@ -245,18 +232,13 @@ public class VariantEvalWalker extends RodWalker implements Tr // Initialize the set of stratifications and evaluations to use stratificationObjects = variantEvalUtils.initializeStratificationObjects(this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE); Set> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE); - boolean usingJEXL = false; for ( VariantStratifier vs : getStratificationObjects() ) { if ( vs.getClass().getSimpleName().equals("Filter") ) byFilterIsEnabled = true; else if ( vs.getClass().getSimpleName().equals("Sample") ) perSampleIsEnabled = true; - usingJEXL = usingJEXL || vs.getClass().equals(JexlExpression.class); } - if ( TRANCHE_FILENAME != null && ! usingJEXL ) - throw new UserException.BadArgumentValue("tf", "Requires the JexlExpression ST to enabled"); - // Initialize the evaluation contexts evaluationContexts = variantEvalUtils.initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 7b6d13223..6c4393d6a 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -261,7 +261,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { return String.format("%s -select '%s' -selectName %s", cmd, select, name); } - @Test + @Test(enabled = false) // no longer supported in the GATK public void testTranches() { String extraArgs = "-T VariantEval -R "+ hg18Reference +" --eval " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt"; WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6af2b9959aa1778a5b712536de453952")); From 9604fb2ba34d619d96459938e32d757758216c91 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Wed, 7 Sep 2011 16:49:16 -0400 Subject: [PATCH 26/26] Necessary but not sufficient step to fix GenotypeGivenAlleles mode in UG which is now busted --- .../gatk/walkers/genotyper/UnifiedGenotyperEngine.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 06455df6d..b1332bdf9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -39,10 +39,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.*; import java.io.PrintStream; import java.util.*; @@ -239,7 +236,8 @@ public class UnifiedGenotyperEngine { VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, ref, false, logger, UAC.alleles); if ( vcInput == null ) return null; - vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()); + vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles(), InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, ref.getBase()); + } else { // deal with bad/non-standard reference bases if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) )