BatchMerge now uses the correct UG settings, recently added by Eric
ExpandIntervals now checks that identical intervals are not created by (un)fortunately-spaced targets VCFExtractIntervals no longer creates duplicate intervals in the case where a VCF has multiple entries at the same site git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5294 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
9357bee921
commit
07d381ec51
|
|
@ -1,6 +1,7 @@
|
|||
import java.io.{FileReader, BufferedReader}
|
||||
import org.broadinstitute.sting.commandline.Hidden
|
||||
import org.broadinstitute.sting.datasources.pipeline.Pipeline
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.{GenotypeLikelihoodsCalculationModel, UnifiedGenotyperEngine}
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.queue.library.ipf.vcf.{VCFSimpleMerge, VCFExtractSites,VCFExtractIntervals}
|
||||
import org.broadinstitute.sting.queue.pipeline.{ProjectManagement, BamProcessing, VariantCalling}
|
||||
|
|
@ -30,7 +31,7 @@ class batchMergePipeline extends QScript {
|
|||
var bams : List[File] = extractFileEntries(bamList)
|
||||
|
||||
trait ExtractArgs extends VCFExtractSites {
|
||||
this.keepFilters = true
|
||||
this.keepFilters = false
|
||||
this.keepInfo = false
|
||||
this.keepQual = false
|
||||
}
|
||||
|
|
@ -55,10 +56,11 @@ class batchMergePipeline extends QScript {
|
|||
}
|
||||
this.intervals :+= extractIntervals.listOut
|
||||
this.alleleVCF = combineVCFs.outVCF
|
||||
this.output_all_callable_bases = true
|
||||
this.jarFile = new File(stingDir+"/dist/GenomeAnalysisTK.jar")
|
||||
this.memoryLimit = Some(4)
|
||||
this.scatterCount = 60
|
||||
this.output_mode = Some(UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES)
|
||||
this.genotyping_mode = Some(GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES)
|
||||
}
|
||||
|
||||
def newUGCL( bams: (List[File],Int) ) : UGCalcLikelihoods = {
|
||||
|
|
@ -72,18 +74,33 @@ class batchMergePipeline extends QScript {
|
|||
addAll(calcs)
|
||||
|
||||
trait CallVariantsArgs extends UGCallVariants {
|
||||
this.output_all_callable_bases = true
|
||||
this.reference_sequence = batchMerge.ref
|
||||
this.intervals :+= extractIntervals.listOut
|
||||
this.jarFile = new File(stingDir+"/dist/GenomeAnalysisTK.jar")
|
||||
this.scatterCount = 30
|
||||
this.memoryLimit = Some(8)
|
||||
this.output_mode = Some(UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES)
|
||||
this.genotyping_mode = Some(GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES)
|
||||
}
|
||||
|
||||
var cVars : UGCallVariants = new UGCallVariants with CallVariantsArgs
|
||||
cVars.rodBind ++= calcs.map( a => new RodBind("variant"+a.out.getName.replace(".vcf",""),"vcf",a.out) )
|
||||
cVars.out = batchOut
|
||||
add(cVars)
|
||||
|
||||
trait CombineVariantsArgs extends CombineVariants {
|
||||
this.reference_sequence = batchMerge.ref
|
||||
this.intervals :+= extractIntervals.listOut
|
||||
this.jarFile = new File(batchMerge.stingDir+"/dist/GenomeAnalysisTK.jar")
|
||||
this.scatterCount = 10
|
||||
this.memoryLimit=Some(4)
|
||||
}
|
||||
|
||||
var combine : CombineVariants = new CombineVariants with CombineVariantsArgs
|
||||
combine.out = swapExt(batchOut,".vcf",".variant.combined.vcf")
|
||||
combine.rodBind ++= vcfs.map( u => new RodBind(u.getName,"vcf",u) )
|
||||
|
||||
add(combine)
|
||||
}
|
||||
|
||||
def extractFileEntries(in: File): List[File] = {
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ class ExpandIntervals(in : File, start: Int, size: Int, out: File, ref: File, ip
|
|||
|
||||
def ok( loc : GenomeLoc, prev: GenomeLoc, next: GenomeLoc ) : Boolean = {
|
||||
//System.out.println("%s - %s - %s".format(repr(next),repr(loc),repr(previous)))
|
||||
( next == null || loc.distance(next) >= start) && (prev == null || loc.distance(prev) >= start)
|
||||
( next == null || loc.distance(next) >= start) && (prev == null || loc.distance(prev) >= start) && (!loc.equals(prev) && !loc.equals(next))
|
||||
}
|
||||
|
||||
def repr(loc : GenomeLoc) : String = {
|
||||
|
|
|
|||
|
|
@ -18,14 +18,42 @@ class VCFExtractIntervals(inVCF: File, outList: File, useFilterSites: Boolean) e
|
|||
|
||||
def run = {
|
||||
out = new PrintWriter(new PrintStream(listOut))
|
||||
asScalaIterator(new XReadLines(vcfIn)).foreach(vcf2int)
|
||||
var elems = asScalaIterator(new XReadLines(vcfIn)).map(vcf2int).filter(p => !p.equals(""))
|
||||
var prev : String = null
|
||||
if ( elems.hasNext ) {
|
||||
prev = elems.next
|
||||
}
|
||||
var cur : String = null
|
||||
if ( elems.hasNext ) {
|
||||
cur = elems.next
|
||||
} else {
|
||||
out.printf("%s%n",prev)
|
||||
}
|
||||
while ( elems.hasNext ) {
|
||||
out.printf("%s%n",prev)
|
||||
while ( cur.equals(prev) && elems.hasNext && !cur.equals("") ) {
|
||||
cur = elems.next
|
||||
}
|
||||
|
||||
if ( ! cur.equals(prev) ) {
|
||||
if ( elems.hasNext ) {
|
||||
prev = cur
|
||||
cur = elems.next
|
||||
} else {
|
||||
out.printf("%s%n",cur)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out.close
|
||||
}
|
||||
|
||||
def vcf2int( vcfLine: String ) : Unit = {
|
||||
def vcf2int( vcfLine: String ) : String = {
|
||||
var spline = vcfLine.split("\t")
|
||||
if ( ! vcfLine.startsWith("#") && (spline(6).equals("PASS") || spline(6).equals(".") || keepFilters) ) {
|
||||
out.print("%s:%s%n".format(spline(0),spline(1)))
|
||||
return("%s:%s".format(spline(0),spline(1)))
|
||||
} else {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue