Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/ebanks/Sting_rodrefactor into rodrewrite
This commit is contained in:
commit
a02636a1ac
|
|
@ -12,7 +12,9 @@ if ( onCMDLine ) {
|
||||||
inputFileName = args[1]
|
inputFileName = args[1]
|
||||||
outputPDF = args[2]
|
outputPDF = args[2]
|
||||||
} else {
|
} else {
|
||||||
inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
|
#inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
|
||||||
|
inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
|
||||||
|
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
|
||||||
outputPDF = NA
|
outputPDF = NA
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -113,11 +115,22 @@ plotGroup <- function(groupTable) {
|
||||||
textplot(as.data.frame(sum), show.rownames=F)
|
textplot(as.data.frame(sum), show.rownames=F)
|
||||||
title(paste("Job summary for", name, "itemizing each iteration"), cex=3)
|
title(paste("Job summary for", name, "itemizing each iteration"), cex=3)
|
||||||
|
|
||||||
|
# histogram of job times by groupAnnotations
|
||||||
|
if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) {
|
||||||
|
# todo -- how do we group by annotations?
|
||||||
|
p <- ggplot(data=sub, aes(x=runtime)) + geom_histogram()
|
||||||
|
p <- p + xlab("runtime in seconds") + ylab("No. of jobs")
|
||||||
|
p <- p + opts(title=paste("Job runtime histogram for", name))
|
||||||
|
print(p)
|
||||||
|
}
|
||||||
|
|
||||||
# as above, but averaging over all iterations
|
# as above, but averaging over all iterations
|
||||||
groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration")
|
groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration")
|
||||||
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
|
if ( dim(sub)[1] > 1 ) {
|
||||||
textplot(as.data.frame(sum), show.rownames=F)
|
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
|
||||||
title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
|
textplot(as.data.frame(sum), show.rownames=F)
|
||||||
|
title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# print out some useful basic information
|
# print out some useful basic information
|
||||||
|
|
@ -146,7 +159,7 @@ plotJobsGantt(gatkReportData, T)
|
||||||
plotJobsGantt(gatkReportData, F)
|
plotJobsGantt(gatkReportData, F)
|
||||||
plotProgressByTime(gatkReportData)
|
plotProgressByTime(gatkReportData)
|
||||||
for ( group in gatkReportData ) {
|
for ( group in gatkReportData ) {
|
||||||
plotGroup(group)
|
plotGroup(group)
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( ! is.na(outputPDF) ) {
|
if ( ! is.na(outputPDF) ) {
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,11 @@ public class UnifiedArgumentCollection {
|
||||||
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
|
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
|
||||||
public Double heterozygosity = DiploidSNPGenotypePriors.HUMAN_HETEROZYGOSITY;
|
public Double heterozygosity = DiploidSNPGenotypePriors.HUMAN_HETEROZYGOSITY;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily
|
||||||
|
* distinguish between PCR errors vs. sequencing errors. The practical implication for this value is that it
|
||||||
|
* effectively acts as a cap on the base qualities.
|
||||||
|
*/
|
||||||
@Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
|
@Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
|
||||||
public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;
|
public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -214,7 +214,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
@Argument(fullName="sample_expressions", shortName="se", doc="Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times", required=false)
|
@Argument(fullName="sample_expressions", shortName="se", doc="Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times", required=false)
|
||||||
public Set<String> sampleExpressions ;
|
public Set<String> sampleExpressions ;
|
||||||
|
|
||||||
@Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false)
|
@Input(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false)
|
||||||
public Set<File> sampleFiles;
|
public Set<File> sampleFiles;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -226,7 +226,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
/**
|
/**
|
||||||
* Note that sample exclusion takes precedence over inclusion, so that if a sample is in both lists it will be excluded.
|
* Note that sample exclusion takes precedence over inclusion, so that if a sample is in both lists it will be excluded.
|
||||||
*/
|
*/
|
||||||
@Argument(fullName="exclude_sample_file", shortName="xl_sf", doc="File containing a list of samples (one per line) to exclude. Can be specified multiple times", required=false)
|
@Input(fullName="exclude_sample_file", shortName="xl_sf", doc="File containing a list of samples (one per line) to exclude. Can be specified multiple times", required=false)
|
||||||
public Set<File> XLsampleFiles = new HashSet<File>(0);
|
public Set<File> XLsampleFiles = new HashSet<File>(0);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -252,7 +252,8 @@ public class ClippingOp {
|
||||||
if (start == 0 && stop == read.getReadLength() -1)
|
if (start == 0 && stop == read.getReadLength() -1)
|
||||||
return new SAMRecord(read.getHeader());
|
return new SAMRecord(read.getHeader());
|
||||||
|
|
||||||
CigarShift cigarShift = hardClipCigar(read.getCigar(), start, stop);
|
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
|
||||||
|
CigarShift cigarShift = (read.getReadUnmappedFlag()) ? new CigarShift(new Cigar(), 0, 0) : hardClipCigar(read.getCigar(), start, stop);
|
||||||
|
|
||||||
// the cigar may force a shift left or right (or both) in case we are left with insertions
|
// the cigar may force a shift left or right (or both) in case we are left with insertions
|
||||||
// starting or ending the read after applying the hard clip on start/stop.
|
// starting or ending the read after applying the hard clip on start/stop.
|
||||||
|
|
|
||||||
|
|
@ -82,7 +82,7 @@ public class PileupElement {
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
private Integer getReducedReadQualityTagValue() {
|
private Integer getReducedReadQualityTagValue() {
|
||||||
return (Integer)getRead().getAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG);
|
return getRead().getIntegerAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isReducedRead() {
|
public boolean isReducedRead() {
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,10 @@ public class GATKSAMRecord extends SAMRecord {
|
||||||
// because some values can be null, we don't want to duplicate effort
|
// because some values can be null, we don't want to duplicate effort
|
||||||
private boolean retrievedReadGroup = false;
|
private boolean retrievedReadGroup = false;
|
||||||
|
|
||||||
|
/** A private cache for the reduced read quality. Null indicates the value hasn't be fetched yet or isn't available */
|
||||||
|
private boolean lookedUpReducedReadQuality = false;
|
||||||
|
private Integer reducedReadQuality;
|
||||||
|
|
||||||
// These temporary attributes were added here to make life easier for
|
// These temporary attributes were added here to make life easier for
|
||||||
// certain algorithms by providing a way to label or attach arbitrary data to
|
// certain algorithms by providing a way to label or attach arbitrary data to
|
||||||
// individual GATKSAMRecords.
|
// individual GATKSAMRecords.
|
||||||
|
|
@ -338,7 +342,17 @@ public class GATKSAMRecord extends SAMRecord {
|
||||||
|
|
||||||
public Object getAttribute(final String tag) { return mRecord.getAttribute(tag); }
|
public Object getAttribute(final String tag) { return mRecord.getAttribute(tag); }
|
||||||
|
|
||||||
public Integer getIntegerAttribute(final String tag) { return mRecord.getIntegerAttribute(tag); }
|
public Integer getIntegerAttribute(final String tag) {
|
||||||
|
if ( tag == ReadUtils.REDUCED_READ_QUALITY_TAG ) {
|
||||||
|
if ( ! lookedUpReducedReadQuality ) {
|
||||||
|
lookedUpReducedReadQuality = true;
|
||||||
|
reducedReadQuality = mRecord.getIntegerAttribute(tag);
|
||||||
|
}
|
||||||
|
return reducedReadQuality;
|
||||||
|
} else {
|
||||||
|
return mRecord.getIntegerAttribute(tag);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public Short getShortAttribute(final String tag) { return mRecord.getShortAttribute(tag); }
|
public Short getShortAttribute(final String tag) { return mRecord.getShortAttribute(tag); }
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ import net.sf.samtools.SAMFileReader
|
||||||
import net.sf.samtools.SAMFileHeader.SortOrder
|
import net.sf.samtools.SAMFileHeader.SortOrder
|
||||||
|
|
||||||
import org.broadinstitute.sting.queue.util.QScriptUtils
|
import org.broadinstitute.sting.queue.util.QScriptUtils
|
||||||
import org.broadinstitute.sting.queue.function.{CommandLineFunction, ListWriterFunction}
|
import org.broadinstitute.sting.queue.function.ListWriterFunction
|
||||||
|
|
||||||
class DataProcessingPipeline extends QScript {
|
class DataProcessingPipeline extends QScript {
|
||||||
qscript =>
|
qscript =>
|
||||||
|
|
@ -31,7 +31,7 @@ class DataProcessingPipeline extends QScript {
|
||||||
var reference: File = _
|
var reference: File = _
|
||||||
|
|
||||||
@Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=true)
|
@Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=true)
|
||||||
var dbSNP: File = _
|
var dbSNP: List[File] = List()
|
||||||
|
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
* Optional Parameters
|
* Optional Parameters
|
||||||
|
|
@ -43,7 +43,7 @@ class DataProcessingPipeline extends QScript {
|
||||||
//
|
//
|
||||||
|
|
||||||
@Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false)
|
@Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false)
|
||||||
var indels: File = _
|
var indels: List[File] = List()
|
||||||
|
|
||||||
@Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false)
|
@Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false)
|
||||||
var bwaPath: File = _
|
var bwaPath: File = _
|
||||||
|
|
@ -159,7 +159,7 @@ class DataProcessingPipeline extends QScript {
|
||||||
for (rg <- readGroups) {
|
for (rg <- readGroups) {
|
||||||
val intermediateInBam: File = if (index == readGroups.length) { inBam } else { swapExt(outBam, ".bam", index+1 + "-rg.bam") }
|
val intermediateInBam: File = if (index == readGroups.length) { inBam } else { swapExt(outBam, ".bam", index+1 + "-rg.bam") }
|
||||||
val intermediateOutBam: File = if (index > 1) {swapExt(outBam, ".bam", index + "-rg.bam") } else { outBam}
|
val intermediateOutBam: File = if (index > 1) {swapExt(outBam, ".bam", index + "-rg.bam") } else { outBam}
|
||||||
val readGroup = new ReadGroup(rg.getReadGroupId, rg.getPlatform, rg.getLibrary, rg.getPlatformUnit, rg.getSample, rg.getSequencingCenter, rg.getDescription)
|
val readGroup = new ReadGroup(rg.getReadGroupId, rg.getLibrary, rg.getPlatform, rg.getPlatformUnit, rg.getSample, rg.getSequencingCenter, rg.getDescription)
|
||||||
add(addReadGroup(intermediateInBam, intermediateOutBam, readGroup))
|
add(addReadGroup(intermediateInBam, intermediateOutBam, readGroup))
|
||||||
index = index - 1
|
index = index - 1
|
||||||
}
|
}
|
||||||
|
|
@ -321,9 +321,9 @@ class DataProcessingPipeline extends QScript {
|
||||||
this.input_file = inBams
|
this.input_file = inBams
|
||||||
this.out = outIntervals
|
this.out = outIntervals
|
||||||
this.mismatchFraction = 0.0
|
this.mismatchFraction = 0.0
|
||||||
this.known :+= qscript.dbSNP
|
this.known ++= qscript.dbSNP
|
||||||
if (indels != null)
|
if (indels != null)
|
||||||
this.known :+= qscript.indels
|
this.known ++= qscript.indels
|
||||||
this.scatterCount = nContigs
|
this.scatterCount = nContigs
|
||||||
this.analysisName = queueLogDir + outIntervals + ".target"
|
this.analysisName = queueLogDir + outIntervals + ".target"
|
||||||
this.jobName = queueLogDir + outIntervals + ".target"
|
this.jobName = queueLogDir + outIntervals + ".target"
|
||||||
|
|
@ -333,9 +333,9 @@ class DataProcessingPipeline extends QScript {
|
||||||
this.input_file = inBams
|
this.input_file = inBams
|
||||||
this.targetIntervals = tIntervals
|
this.targetIntervals = tIntervals
|
||||||
this.out = outBam
|
this.out = outBam
|
||||||
this.known :+= qscript.dbSNP
|
this.known ++= qscript.dbSNP
|
||||||
if (qscript.indels != null)
|
if (qscript.indels != null)
|
||||||
this.known :+= qscript.indels
|
this.known ++= qscript.indels
|
||||||
this.consensusDeterminationModel = cleanModelEnum
|
this.consensusDeterminationModel = cleanModelEnum
|
||||||
this.compress = 0
|
this.compress = 0
|
||||||
this.scatterCount = nContigs
|
this.scatterCount = nContigs
|
||||||
|
|
@ -344,7 +344,7 @@ class DataProcessingPipeline extends QScript {
|
||||||
}
|
}
|
||||||
|
|
||||||
case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs {
|
case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs {
|
||||||
this.knownSites :+= qscript.dbSNP
|
this.knownSites ++= qscript.dbSNP
|
||||||
this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
|
this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
|
||||||
this.input_file :+= inBam
|
this.input_file :+= inBam
|
||||||
this.recal_file = outRecalFile
|
this.recal_file = outRecalFile
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
package org.broadinstitute.sting.queue.qscripts
|
package org.broadinstitute.sting.queue.qscripts
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.Hidden
|
|
||||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||||
import org.broadinstitute.sting.queue.QScript
|
import org.broadinstitute.sting.queue.QScript
|
||||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport
|
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport
|
||||||
|
|
@ -70,6 +69,7 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
||||||
val goldStandardClusterFile = new File(goldStandardName + ".clusters")
|
val goldStandardClusterFile = new File(goldStandardName + ".clusters")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
val b37_decoy = new File("/humgen/1kg/reference/human_g1k_v37_decoy.fasta")
|
||||||
val hg19 = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
|
val hg19 = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
|
||||||
val hg18 = new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")
|
val hg18 = new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")
|
||||||
val b36 = new File("/humgen/1kg/reference/human_b36_both.fasta")
|
val b36 = new File("/humgen/1kg/reference/human_b36_both.fasta")
|
||||||
|
|
@ -124,6 +124,14 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
||||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.bwa.cleaned.recal.bam"),
|
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.bwa.cleaned.recal.bam"),
|
||||||
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3),
|
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3),
|
||||||
|
"WExTrioDecoy" -> new Target("CEUTrio.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||||
|
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.b37_decoy.list"),
|
||||||
|
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||||
|
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 3),
|
||||||
|
"WGSTrioDecoy" -> new Target("CEUTrio.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||||
|
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37_decoy.list"),
|
||||||
|
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||||
|
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3),
|
||||||
"FIN" -> new Target("FIN", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
|
"FIN" -> new Target("FIN", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||||
new File("/humgen/1kg/processing/pipeline_test_bams/FIN.79sample.Nov2010.chr20.bam"),
|
new File("/humgen/1kg/processing/pipeline_test_bams/FIN.79sample.Nov2010.chr20.bam"),
|
||||||
new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue