diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToPed.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToPed.java index aab230b69..d8b01e91d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToPed.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToPed.java @@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -45,6 +46,9 @@ public class VariantsToPed extends RodWalker { @Output(shortName="fam",fullName="fam",required=true,doc="output fam file") PrintStream outFam; + @Argument(shortName="mgq",fullName="minGenotypeQuality",required=true,doc="If genotype quality is lower than this value, output NO_CALL") + int minGenotypeQuality = 0; + private ValidateVariants vv = new ValidateVariants(); private static double APPROX_CM_PER_BP = 1000000.0/750000.0; @@ -173,9 +177,11 @@ public class VariantsToPed extends RodWalker { return 0; } - private static byte getEncoding(Genotype g, int offset) { + private byte getEncoding(Genotype g, int offset) { byte b; - if ( g.isHomRef() ) { + if ( g.hasAttribute(VCFConstants.GENOTYPE_QUALITY_KEY) && ((Integer) g.getAttribute(VCFConstants.GENOTYPE_QUALITY_KEY)) < minGenotypeQuality ) { + b = NO_CALL; + } else if ( g.isHomRef() ) { b = HOM_REF; } else if ( g.isHomVar() ) { b = HOM_VAR; diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/lib/ChunkVCF.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/lib/ChunkVCF.scala index 257fef021..0184b5d2c 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/lib/ChunkVCF.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/lib/ChunkVCF.scala @@ -23,7 +23,7 @@ class ChunkVCF extends QScript { @Input(shortName="N",fullName="numEntriesInChunk",doc="The number of variants per chunk",required=true) var numEntries : Int = _ - @Input(shortName="I",fullName="Intervals",doc="The SNP interval list to chunk. If not provided, one will be created for you to provide in a second run.") + @Input(shortName="I",fullName="Intervals",doc="The SNP interval list to chunk. If not provided, one will be created for you to provide in a second run.",required=false) var intervals : File = _ @Input(fullName="preserveChromosomes",doc="Restrict chunks to one chromosome (smaller chunk at end of chromosome)",required=false) @@ -40,8 +40,8 @@ class ChunkVCF extends QScript { def script = { if ( intervals == null ) { // create an interval list from the VCF - val ivals : File = swapExt(variants,".vcf",".intervals.list") - val extract : VCFExtractIntervals = new VCFExtractIntervals(variants,ivals,false) + val ivals : File = swapExt(inVCF,".vcf",".intervals.list") + val extract : VCFExtractIntervals = new VCFExtractIntervals(inVCF,ivals,false) add(extract) } else { var chunkNum = 1 @@ -54,11 +54,12 @@ class ChunkVCF extends QScript { if ( ( preserve && ! int.split(":")(0).equals(chromosome) ) || numLinesInChunk > numEntries ) { chunkWriter.close() val chunkSelect : SelectVariants = new SelectVariants + chunkSelect.variant = inVCF chunkSelect.reference_sequence = ref chunkSelect.memoryLimit = 2 chunkSelect.intervals :+= chunkFile if ( extractSamples != null ) - chunkSelect.sample_file = extractSamples + chunkSelect.sample_file :+= extractSamples chunkSelect.out = swapExt(inVCF,".vcf",".chunk%d.vcf".format(chunkNum)) add(chunkSelect) chunkNum += 1 @@ -74,12 +75,13 @@ class ChunkVCF extends QScript { if ( numLinesInChunk > 0 ) { // some work to do val chunkSelect : SelectVariants = new SelectVariants + chunkSelect.variant = inVCF chunkSelect.reference_sequence = ref chunkSelect.memoryLimit = 2 chunkSelect.intervals :+= chunkFile chunkWriter.close() if ( extractSamples != null ) - chunkSelect.sample_file = extractSamples + chunkSelect.sample_file :+= extractSamples chunkSelect.out = swapExt(inVCF,".vcf",".chunk%d.vcf".format(chunkNum)) add(chunkSelect) } diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/lib/VcfToPed.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/lib/VcfToPed.scala index 913a62e26..cad8af51d 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/lib/VcfToPed.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/lib/VcfToPed.scala @@ -47,9 +47,14 @@ class VcfToPed extends QScript { val extract : VCFExtractIntervals = new VCFExtractIntervals(variants,ivals,false) add(extract) } else { + val IS_GZ : Boolean = variants.getName.endsWith(".vcf.gz") var iXRL = new XReadLines(intervals) var chunk = 1; - var subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk)) + var subListFile : File = null + if ( IS_GZ ) + subListFile = swapExt(tmpdir,variants,".vcf.gz",".chunk%d.list".format(chunk)) + else + subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk)) var subList = new PrintStream(subListFile) var nL = 0; var bedOuts : List[File] = Nil; @@ -58,7 +63,7 @@ class VcfToPed extends QScript { while ( iXRL.hasNext ) { subList.printf("%s%n",iXRL.next()) nL = nL + 1 - if ( nL > 100000 ) { + if ( nL > 10000 ) { val toPed : VariantsToPed = new VariantsToPed toPed.memoryLimit = 2 toPed.reference_sequence = ref @@ -89,7 +94,10 @@ class VcfToPed extends QScript { add(toPed) subList.close() chunk = chunk + 1 - subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk)) + if ( IS_GZ ) + subListFile = swapExt(tmpdir,variants,".vcf.gz",".chunk%d.list".format(chunk)) + else + subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk)) subList = new PrintStream(subListFile) bedOuts :+= tBed bimOuts :+= bim