diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/SAMFileReader.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/SAMFileReader.java deleted file mode 100644 index f52a075ac..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/SAMFileReader.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ - -package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller; - -import java.io.*; -import java.util.ArrayList; -/** - * - * @author shermanjia - */ -public class SAMFileReader { - ArrayList ReadStrings = new ArrayList(); - ArrayList CigarStrings = new ArrayList(); - ArrayList ReadNames = new ArrayList(); - ArrayList ReadStartPositions = new ArrayList(); - ArrayList ReadStopPositions = new ArrayList(); - int minstartpos; - int maxstoppos; - - CigarParser formatter = new CigarParser(); - - public String[] GetReads(){ - return ReadStrings.toArray(new String[ReadStrings.size()]); - } - - public String[] GetReadNames(){ - return ReadNames.toArray(new String[ReadNames.size()]); - } - - public String[] GetCigarStrings(){ - return CigarStrings.toArray(new String[CigarStrings.size()]); - } - - public Integer[] GetStartPositions(){ - return ReadStartPositions.toArray(new Integer[ReadStartPositions.size()]); - } - - public Integer[] GetStopPositions(){ - return ReadStopPositions.toArray(new Integer[ReadStopPositions.size()]); - } - - public Integer GetMinStartPos(){ - return minstartpos; - } - - public Integer GetMaxStopPos(){ - return maxstoppos; - } - - public int GetReadIndex(String readname){ - if (ReadNames.contains(readname)){ - return ReadNames.indexOf(readname); - }else{ - return -1; - } - } - - public void ReadFile(String filename){ - try{ - FileInputStream fstream = new FileInputStream(filename); - DataInputStream in = new DataInputStream(fstream); - BufferedReader br = new BufferedReader(new InputStreamReader(in)); - String strLine; String [] s = null; - //Read File Line By Line - int i = 0; - while ((strLine = br.readLine()) != null) { - s = strLine.split("\\t"); - if (s.length>=10){ - //Parse the reads with cigar parser - String read = formatter.FormatRead(s[5],s[9]); - ReadStrings.add(read); - CigarStrings.add(s[5]); - ReadNames.add(s[0]); - ReadStartPositions.add(Integer.valueOf(s[3])); - ReadStopPositions.add(Integer.valueOf(s[3]) + read.length() - 1); - if (i == 0){ - minstartpos = Integer.valueOf(s[3]); - maxstoppos = Integer.valueOf(Integer.valueOf(s[3]) + read.length() - 1); - } - minstartpos = Math.min(minstartpos, Integer.valueOf(s[3])); - maxstoppos = Math.max(maxstoppos, Integer.valueOf(Integer.valueOf(s[3]) + read.length() - 1)); - i++; - } - } - in.close(); - }catch (Exception e){//Catch exception if any - System.err.println("SAMFileReader Error: " + e.getMessage()); - } - } -} - diff --git a/scala/qscript/oneoffs/depristo/CleaningTest.scala b/scala/qscript/oneoffs/depristo/CleaningTest.scala new file mode 100755 index 000000000..1b08d9ca5 --- /dev/null +++ b/scala/qscript/oneoffs/depristo/CleaningTest.scala @@ -0,0 +1,145 @@ +package oneoffs.depristo + +//import org.broadinstitute.sting.datasources.pipeline.Pipeline +import org.broadinstitute.sting.queue.extensions.gatk._ +import org.broadinstitute.sting.queue.QScript +import collection.JavaConversions._ +import org.broadinstitute.sting.queue.extensions.picard.PicardBamJarFunction +import org.broadinstitute.sting.queue.function.JarCommandLineFunction + + +class CleaningTest extends QScript { + qscript => + + @Input(doc="path to GATK jar", shortName="gatk", required=false) + var gatkJar: File = new File("/home/radon01/depristo/dev/GenomeAnalysisTKFromLaptop/trunk/dist/GenomeAnalysisTK.jar") + + @Input(doc="the chromosome to process", shortName="chr", required=false) + var chr: String = "20" + + @Input(doc="the chromosome to process", shortName="L", required=false) + var range: String = _ + + @Input(doc="output path", shortName="outputDir", required=false) + var outputDir: String = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/isizeConstrainedRealigner/" + + @Input(doc="base output filename", shortName="baseName", required=false) + var baseName: String = "" + + @Input(doc="path to tmp space for storing intermediate bam files", shortName="outputTmpDir", required=false) + var outputTmpDir: String = "/broad/shptmp/depristo/tmp" + + @Input(doc="path to Picard FixMateInformation.jar. See http://picard.sourceforge.net/ .", required=false) + var picardFixMatesJar: File = new java.io.File("/seq/software/picard/current/bin/FixMateInformation.jar") + var picardValidateJar: File = new java.io.File("/seq/software/picard/current/bin/ValidateSamFile.jar") + var picardSortSamJar: File = new java.io.File("/seq/software/picard/current/bin/SortSam.jar") + + private val tmpDir: File = new File("/broad/shptmp/depristo/tmp/") + private val reference: File = new File("/humgen/1kg/reference/human_g1k_v37.fasta") + private val dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf") + private val dindelEURCalls: String = "/humgen/1kg/DCC/ftp/technical/working/20110111_august_dindel_indel_calls/EUR.dindel_august_release.20110110.sites.vcf.gz" +// val chromosomeLength = List(249250621,243199373,198022430,191154276,180915260,171115067,159138663,146364022,141213431,135534747,135006516,133851895,115169878,107349540,102531392,90354753,81195210,78077248,59128983,63025520,48129895,51304566) + +// private var pipeline: Pipeline = _ + + trait CommandLineGATKArgs extends CommandLineGATK { + this.jarFile = qscript.gatkJar + this.reference_sequence = qscript.reference + this.memoryLimit = Some(4) + this.jobTempDir = qscript.tmpDir + } + + def script = { + val interval = qscript.chr + val bamList: File = new File("/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/isizeConstrainedRealigner/CEU.chr%s.list".format(qscript.chr)) + //val bamList: File = new File("/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/isizeConstrainedRealigner/FIN.chr%s.3samples.list".format(qscript.chr)) + val targetIntervals: File = new File("%s/chr_%s.intervals".format(outputDir, qscript.chr)) + + Console.println("interval " + interval) + + // 1.) Create cleaning targets + var target = new RealignerTargetCreator with CommandLineGATKArgs + target.input_file :+= bamList + target.intervalsString :+= interval + target.out = targetIntervals + target.mismatchFraction = Some(0.0) + target.rodBind :+= RodBind("dbsnp", "VCF", qscript.dbSNP) + target.rodBind :+= RodBind("indels3", "VCF", qscript.dindelEURCalls) + //target.jobName = baseName + ".target" + add(target) + + for ( cm <- List(true, false) ) { + // 2.) Clean without SW + var clean = new IndelRealigner with CommandLineGATKArgs + val cleanedBam = new File(outputDir + "cleaned.cm_%b.bam".format(cm)) + + clean.input_file :+= bamList + clean.intervalsString :+= interval + (if ( range != null ) ":" + range else "") + clean.targetIntervals = targetIntervals + clean.out = if ( cm ) cleanedBam else new File(cleanedBam + ".intermediate.bam") + clean.doNotUseSW = true + clean.constrainMovement = cm + clean.baq = Some(org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.OFF) + clean.rodBind :+= RodBind("dbsnp", "VCF", qscript.dbSNP) + clean.rodBind :+= RodBind("indels3", "VCF", qscript.dindelEURCalls) + //clean.sortInCoordinateOrderEvenThoughItIsHighlyUnsafe = true + //clean.jobName = baseName + cm + ".clean" + + Console.println("CLEAN") + add(clean) + + if ( ! cm ) { + // Explicitly run fix mates if the function won't be scattered. + val fixMates = new PicardBamJarFunction { + // Declare inputs/outputs for dependency tracking. + @Input(doc="unfixed bam") var unfixed: File = _ + @Output(doc="fixed bam") var fixed: File = _ + def inputBams = List(unfixed) + def outputBam = fixed + } + + //fixMates.jobOutputFile = new File(".queue/logs/Cleaning/%s/FixMates.out".format(sampleId)) + fixMates.memoryLimit = Some(4) + fixMates.jarFile = qscript.picardFixMatesJar + fixMates.unfixed = clean.out + fixMates.fixed = cleanedBam + //fixMates.analysisName = "FixMates" + + // Add the fix mates explicitly + Console.println("fixMates") + add(fixMates) + } + + val validate = new JarCommandLineFunction { + // Declare inputs/outputs for dependency tracking. + @Input(doc="unfixed bam") var unfixed: File = _ + def inputBams = List(unfixed) + override def commandLine = super.commandLine + "%s%s%s IGNORE=INVALID_CIGAR IGNORE=MATE_NOT_FOUND".format( + optional(" VALIDATION_STRINGENCY=", "SILENT"), repeat(" INPUT=", inputBams), " TMP_DIR=" + jobTempDir) + } + + //fixMates.jobOutputFile = new File(".queue/logs/Cleaning/%s/FixMates.out".format(sampleId)) + validate.memoryLimit = Some(2) + validate.jarFile = qscript.picardValidateJar + validate.unfixed = cleanedBam + add(validate) + + val toQueryName = new PicardBamJarFunction { + // Declare inputs/outputs for dependency tracking. + @Input(doc="coordiante bam") var cobam: File = _ + @Output(doc="query bam") var qnbam: File = _ + def inputBams = List(cobam) + def outputBam = qnbam + } + + //fixMates.jobOutputFile = new File(".queue/logs/Cleaning/%s/FixMates.out".format(sampleId)) + toQueryName.memoryLimit = Some(4) + toQueryName.jarFile = qscript.picardSortSamJar + toQueryName.cobam = cleanedBam + toQueryName.qnbam = new File(cleanedBam.getAbsolutePath + ".qn.bam") + add(toQueryName) + + Console.println("loop done") + } + } +} diff --git a/scala/qscript/oneoffs/depristo/VQSRCutByNRS.scala b/scala/qscript/oneoffs/depristo/VQSRCutByNRS.scala index 185b54da2..0dfcc6b06 100755 --- a/scala/qscript/oneoffs/depristo/VQSRCutByNRS.scala +++ b/scala/qscript/oneoffs/depristo/VQSRCutByNRS.scala @@ -5,7 +5,7 @@ import org.broadinstitute.sting.queue.QScript import org.apache.commons.io.FilenameUtils; import scala.io.Source._ -class recalibrate extends QScript { +class VQSRCutByNRS extends QScript { // @Input(doc="bamIn", shortName="I", required=true) // var bamList: File = _