diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index e7f89bf08..ae419a5c4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -156,7 +156,7 @@ public class UnifiedArgumentCollection { public boolean OUTPUT_DEBUG_INDEL_INFO = false; @Hidden - @Argument(fullName = "dovit", shortName = "dovit", doc = "Output indel debug info", required = false) + @Argument(fullName = "dovit", shortName = "dovit", doc = "Perform full Viterbi calculation when evaluating the HMM", required = false) public boolean dovit = false; @Hidden diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 55450486b..2d7969230 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -274,7 +274,7 @@ public class PairHMMIndelErrorModel { this.doViterbi = dovit; } - public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP) { + public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP) { this.logGapOpenProbability = -indelGOP/10.0; // QUAL to log prob @@ -754,7 +754,7 @@ public class PairHMMIndelErrorModel { // check if we've already computed likelihoods for this pileup element (i.e. for this read at this location) if (indelLikelihoodMap.containsKey(p)) { - HashMap el = indelLikelihoodMap.get(p); + HashMap el = indelLikelihoodMap.get(p); int j=0; for (Allele a: haplotypeMap.keySet()) { readLikelihoods[readIdx][j++] = el.get(a); @@ -1055,7 +1055,6 @@ public class PairHMMIndelErrorModel { genotypeLikelihoods[i] -= maxElement; return genotypeLikelihoods; - } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 888dc1e98..1fde8879b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -209,6 +209,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati /** * the complete constructor. Makes a complete VariantContext from its arguments + * This is the only constructor that is able to create indels! DO NOT USE THE OTHER ONES. * * @param source source * @param contig the contig @@ -293,7 +294,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati } /** - * Create a new variant context without genotypes and no Perror, no filters, and no attributes + * Create a new variant context with genotypes but without Perror, filters, and attributes * * @param source source * @param contig the contig diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala index accc5c7f0..6947d4398 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala @@ -4,9 +4,9 @@ import org.broadinstitute.sting.queue.QScript import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.util.QScriptUtils import net.sf.samtools.SAMFileHeader.SortOrder -import org.broadinstitute.sting.queue.extensions.picard.{SortSam, AddOrReplaceReadGroups} import org.broadinstitute.sting.utils.exceptions.UserException import org.broadinstitute.sting.commandline.Hidden +import org.broadinstitute.sting.queue.extensions.picard.{ReorderSam, SortSam, AddOrReplaceReadGroups} /** * Created by IntelliJ IDEA. @@ -16,7 +16,7 @@ import org.broadinstitute.sting.commandline.Hidden */ -class RecalibrateBaseQualities extends QScript { +class PacbioProcessingPipeline extends QScript { @Input(doc="input FASTA/FASTQ/BAM file - or list of FASTA/FASTQ/BAM files. ", shortName="i", required=true) var input: File = _ @@ -39,13 +39,16 @@ class RecalibrateBaseQualities extends QScript { @Input(doc="The path to the binary of bwa to align fasta/fastq files", fullName="path_to_bwa", shortName="bwa", required=false) var bwaPath: File = _ + @Input(doc="Input is a BLASR generated BAM file", shortName = "blasr", fullName="blasr_bam", required=false) + var BLASR_BAM: Boolean = false + @Hidden @Input(doc="The default base qualities to use before recalibration. Default is Q20 (should be good for every dataset)." , shortName = "dbq", required=false) var dbq: Int = 20 - - - + @Hidden + @Input(shortName="bwastring", required=false) + var bwastring: String = "" val queueLogDir: String = ".qlog/" @@ -57,8 +60,6 @@ class RecalibrateBaseQualities extends QScript { var USE_BWA: Boolean = false - println("DEBUG: processing " + file + "\nDEBUG: name -- " + file.getName) - if (file.endsWith(".fasta") || file.endsWith(".fq")) { if (bwaPath == null) { throw new UserException("You provided a fasta/fastq file but didn't provide the path for BWA"); @@ -69,28 +70,34 @@ class RecalibrateBaseQualities extends QScript { // FASTA -> BAM steps val alignedSam: File = file.getName + ".aligned.sam" val sortedBam: File = swapExt(alignedSam, ".sam", ".bam") - val qualBam: File = swapExt(sortedBam, ".bam", ".q.bam") val rgBam: File = swapExt(file, ".bam", ".rg.bam") val bamBase = if (USE_BWA) {rgBam} else {file} // BAM Steps + val mqBAM: File = swapExt(bamBase, ".bam", ".mq.bam") val recalFile1: File = swapExt(bamBase, ".bam", ".recal1.csv") val recalFile2: File = swapExt(bamBase, ".bam", ".recal2.csv") val recalBam: File = swapExt(bamBase, ".bam", ".recal.bam") val path1: String = recalBam + ".before" val path2: String = recalBam + ".after" - if (USE_BWA) { add(align(file, alignedSam), sortSam(alignedSam, sortedBam), - addQuals(sortedBam, qualBam, dbq), - addReadGroup(qualBam, rgBam, sample)) + addReadGroup(sortedBam, rgBam, sample)) } - add(cov(bamBase, recalFile1), - recal(bamBase, recalFile1, recalBam), + else if (BLASR_BAM) { + val reorderedBAM = swapExt(bamBase, ".bam", ".reordered.bam") + add(reorder(bamBase, reorderedBAM), + changeMQ(reorderedBAM, mqBAM)) + } + + val bam = if (BLASR_BAM) {mqBAM} else {bamBase} + + add(cov(bam, recalFile1), + recal(bam, recalFile1, recalBam), cov(recalBam, recalFile2), analyzeCovariates(recalFile1, path1), analyzeCovariates(recalFile2, path2)) @@ -110,13 +117,13 @@ class RecalibrateBaseQualities extends QScript { case class align(@Input inFastq: File, @Output outSam: File) extends ExternalCommonArgs { - def commandLine = bwaPath + " bwasw -b5 -q2 -r1 -z10 -t8 " + reference + " " + inFastq + " > " + outSam + def commandLine = bwaPath + " bwasw -b5 -q2 -r1 -z20 -t16 " + reference + " " + inFastq + " > " + outSam + this.memoryLimit = 8 this.analysisName = queueLogDir + outSam + ".bwa_sam_se" this.jobName = queueLogDir + outSam + ".bwa_sam_se" } - case class sortSam (@Input inSam: File, @Output outBam: File) extends SortSam with ExternalCommonArgs { - @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") + case class sortSam (inSam: File, outBam: File) extends SortSam with ExternalCommonArgs { this.input = List(inSam) this.output = outBam this.sortOrder = SortOrder.coordinate @@ -124,10 +131,16 @@ class RecalibrateBaseQualities extends QScript { this.jobName = queueLogDir + outBam + ".sortSam" } - case class addQuals(inBam: File, outBam: File, qual: Int) extends PrintReads with CommandLineGATKArgs { + case class reorder (inSam: File, outSam: File) extends ReorderSam with ExternalCommonArgs { + this.input = List(inSam) + this.output = outSam + this.sortReference = reference + } + + case class changeMQ(inBam: File, outBam: File) extends PrintReads with CommandLineGATKArgs { this.input_file :+= inBam this.out = outBam - this.DBQ = qual + this.read_filter :+= "ReassignMappingQuality" } case class addReadGroup (inBam: File, outBam: File, sample: String) extends AddOrReplaceReadGroups with ExternalCommonArgs { @@ -145,6 +158,7 @@ class RecalibrateBaseQualities extends QScript { } case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs { + this.DBQ = dbq this.knownSites :+= dbSNP this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate") this.input_file :+= inBam @@ -155,6 +169,7 @@ class RecalibrateBaseQualities extends QScript { } case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs { + this.DBQ = dbq this.input_file :+= inBam this.recal_file = inRecalFile this.out = outBam diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/ReorderSam.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/ReorderSam.scala new file mode 100644 index 000000000..72489dc87 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/ReorderSam.scala @@ -0,0 +1,48 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline._ + +import java.io.File +/* + * Created by IntelliJ IDEA. + * User: carneiro + * Date: 6/22/11 + * Time: 10:35 AM + */ +class ReorderSam extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { + analysisName = "ReorderSam" + javaMainClass = "net.sf.picard.sam.ReorderSam" + + @Input(doc="Input file (bam or sam) to extract reads from.", shortName = "input", fullName = "input_bam_files", required = true) + var input: List[File] = Nil + + @Output(doc="Output file (bam or sam) to write extracted reads to.", shortName = "output", fullName = "output_bam_file", required = true) + var output: File = _ + + @Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false) + var outputIndex: File = _ + + @Argument(doc="Reference sequence to reorder reads to match.", shortName = "ref", fullName = "sort_reference", required = true) + var sortReference: File = _ + + @Argument(doc="If true, then allows only a partial overlap of the BAM contigs with the new reference sequence contigs. By default, this tool requires a corresponding contig in the new reference for each read contig.", shortName = "aic", fullName = "allow_incomplete_concordance", required = false) + var ALLOW_INCOMPLETE_DICT_CONCORDANCE: Boolean = _ + + @Argument(doc="If true, then permits mapping from a read contig to a new reference contig with the same name but a different length. Highly dangerous, only use if you know what you are doing.", shortName = "acld", fullName = "allow_contig_length_discordance", required = false) + var ALLOW_CONTIG_LENGTH_DISCORDANCE: Boolean = _ + + override def freezeFieldValues() { + super.freezeFieldValues() + if (outputIndex == null && output != null) + outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai") + } + + override def inputBams = input + override def outputBam = output + this.createIndex = Some(true) + this.sortOrder = null + override def commandLine = super.commandLine + + " REFERENCE=" + sortReference + + optional(" ALLOW_INCOMPLETE_DICT_CONCORDANCE=", ALLOW_INCOMPLETE_DICT_CONCORDANCE) + optional(" ALLOW_CONTIG_LENGTH_DISCORDANCE=", ALLOW_CONTIG_LENGTH_DISCORDANCE) +} \ No newline at end of file