diff --git a/scala/qscript/playground/BatchMerge.q b/scala/qscript/playground/BatchMerge.q index 56867b0ed..2804e2df0 100755 --- a/scala/qscript/playground/BatchMerge.q +++ b/scala/qscript/playground/BatchMerge.q @@ -1,6 +1,7 @@ import java.io.{FileReader, BufferedReader} import org.broadinstitute.sting.commandline.Hidden import org.broadinstitute.sting.datasources.pipeline.Pipeline +import org.broadinstitute.sting.gatk.walkers.genotyper.{GenotypeLikelihoodsCalculationModel, UnifiedGenotyperEngine} import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.library.ipf.vcf.{VCFSimpleMerge, VCFExtractSites,VCFExtractIntervals} import org.broadinstitute.sting.queue.pipeline.{ProjectManagement, BamProcessing, VariantCalling} @@ -30,7 +31,7 @@ class batchMergePipeline extends QScript { var bams : List[File] = extractFileEntries(bamList) trait ExtractArgs extends VCFExtractSites { - this.keepFilters = true + this.keepFilters = false this.keepInfo = false this.keepQual = false } @@ -55,10 +56,11 @@ class batchMergePipeline extends QScript { } this.intervals :+= extractIntervals.listOut this.alleleVCF = combineVCFs.outVCF - this.output_all_callable_bases = true this.jarFile = new File(stingDir+"/dist/GenomeAnalysisTK.jar") this.memoryLimit = Some(4) this.scatterCount = 60 + this.output_mode = Some(UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES) + this.genotyping_mode = Some(GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) } def newUGCL( bams: (List[File],Int) ) : UGCalcLikelihoods = { @@ -72,18 +74,33 @@ class batchMergePipeline extends QScript { addAll(calcs) trait CallVariantsArgs extends UGCallVariants { - this.output_all_callable_bases = true this.reference_sequence = batchMerge.ref this.intervals :+= extractIntervals.listOut this.jarFile = new File(stingDir+"/dist/GenomeAnalysisTK.jar") this.scatterCount = 30 this.memoryLimit = Some(8) + this.output_mode = Some(UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES) + this.genotyping_mode = Some(GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) } var cVars : UGCallVariants = new UGCallVariants with CallVariantsArgs cVars.rodBind ++= calcs.map( a => new RodBind("variant"+a.out.getName.replace(".vcf",""),"vcf",a.out) ) cVars.out = batchOut add(cVars) + + trait CombineVariantsArgs extends CombineVariants { + this.reference_sequence = batchMerge.ref + this.intervals :+= extractIntervals.listOut + this.jarFile = new File(batchMerge.stingDir+"/dist/GenomeAnalysisTK.jar") + this.scatterCount = 10 + this.memoryLimit=Some(4) + } + + var combine : CombineVariants = new CombineVariants with CombineVariantsArgs + combine.out = swapExt(batchOut,".vcf",".variant.combined.vcf") + combine.rodBind ++= vcfs.map( u => new RodBind(u.getName,"vcf",u) ) + + add(combine) } def extractFileEntries(in: File): List[File] = { diff --git a/scala/src/org/broadinstitute/sting/queue/library/ipf/intervals/ExpandIntervals.scala b/scala/src/org/broadinstitute/sting/queue/library/ipf/intervals/ExpandIntervals.scala index 57b84e552..8fd0cb62f 100755 --- a/scala/src/org/broadinstitute/sting/queue/library/ipf/intervals/ExpandIntervals.scala +++ b/scala/src/org/broadinstitute/sting/queue/library/ipf/intervals/ExpandIntervals.scala @@ -68,7 +68,7 @@ class ExpandIntervals(in : File, start: Int, size: Int, out: File, ref: File, ip def ok( loc : GenomeLoc, prev: GenomeLoc, next: GenomeLoc ) : Boolean = { //System.out.println("%s - %s - %s".format(repr(next),repr(loc),repr(previous))) - ( next == null || loc.distance(next) >= start) && (prev == null || loc.distance(prev) >= start) + ( next == null || loc.distance(next) >= start) && (prev == null || loc.distance(prev) >= start) && (!loc.equals(prev) && !loc.equals(next)) } def repr(loc : GenomeLoc) : String = { diff --git a/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala b/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala index 398fc37fb..3935c2138 100755 --- a/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala +++ b/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFExtractIntervals.scala @@ -18,14 +18,42 @@ class VCFExtractIntervals(inVCF: File, outList: File, useFilterSites: Boolean) e def run = { out = new PrintWriter(new PrintStream(listOut)) - asScalaIterator(new XReadLines(vcfIn)).foreach(vcf2int) + var elems = asScalaIterator(new XReadLines(vcfIn)).map(vcf2int).filter(p => !p.equals("")) + var prev : String = null + if ( elems.hasNext ) { + prev = elems.next + } + var cur : String = null + if ( elems.hasNext ) { + cur = elems.next + } else { + out.printf("%s%n",prev) + } + while ( elems.hasNext ) { + out.printf("%s%n",prev) + while ( cur.equals(prev) && elems.hasNext && !cur.equals("") ) { + cur = elems.next + } + + if ( ! cur.equals(prev) ) { + if ( elems.hasNext ) { + prev = cur + cur = elems.next + } else { + out.printf("%s%n",cur) + } + } + } + out.close } - def vcf2int( vcfLine: String ) : Unit = { + def vcf2int( vcfLine: String ) : String = { var spline = vcfLine.split("\t") if ( ! vcfLine.startsWith("#") && (spline(6).equals("PASS") || spline(6).equals(".") || keepFilters) ) { - out.print("%s:%s%n".format(spline(0),spline(1))) + return("%s:%s".format(spline(0),spline(1))) + } else { + return "" } }