From 2c1b14d35e2db27d274a8bbe1336efd122034c2b Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Wed, 22 Feb 2012 17:20:04 -0500 Subject: [PATCH] Mostly small changes to my own scala scripts: .vcf.gz compatibility for output files, smarter beagle generation, simple script to scatter-gather combine variants. Whole genome indel calling now uses the gold standard indel set. --- .../gatk/walkers/variantutils/VariantsToPed.java | 10 ++++++++-- .../sting/queue/qscripts/lib/VcfToPed.scala | 14 +++++++++++--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToPed.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToPed.java index aab230b69..d8b01e91d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToPed.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToPed.java @@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -45,6 +46,9 @@ public class VariantsToPed extends RodWalker { @Output(shortName="fam",fullName="fam",required=true,doc="output fam file") PrintStream outFam; + @Argument(shortName="mgq",fullName="minGenotypeQuality",required=true,doc="If genotype quality is lower than this value, output NO_CALL") + int minGenotypeQuality = 0; + private ValidateVariants vv = new ValidateVariants(); private static double APPROX_CM_PER_BP = 1000000.0/750000.0; @@ -173,9 +177,11 @@ public class VariantsToPed extends RodWalker { return 0; } - private static byte getEncoding(Genotype g, int offset) { + private byte getEncoding(Genotype g, int offset) { byte b; - if ( g.isHomRef() ) { + if ( g.hasAttribute(VCFConstants.GENOTYPE_QUALITY_KEY) && ((Integer) g.getAttribute(VCFConstants.GENOTYPE_QUALITY_KEY)) < minGenotypeQuality ) { + b = NO_CALL; + } else if ( g.isHomRef() ) { b = HOM_REF; } else if ( g.isHomVar() ) { b = HOM_VAR; diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/lib/VcfToPed.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/lib/VcfToPed.scala index 913a62e26..cad8af51d 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/lib/VcfToPed.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/lib/VcfToPed.scala @@ -47,9 +47,14 @@ class VcfToPed extends QScript { val extract : VCFExtractIntervals = new VCFExtractIntervals(variants,ivals,false) add(extract) } else { + val IS_GZ : Boolean = variants.getName.endsWith(".vcf.gz") var iXRL = new XReadLines(intervals) var chunk = 1; - var subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk)) + var subListFile : File = null + if ( IS_GZ ) + subListFile = swapExt(tmpdir,variants,".vcf.gz",".chunk%d.list".format(chunk)) + else + subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk)) var subList = new PrintStream(subListFile) var nL = 0; var bedOuts : List[File] = Nil; @@ -58,7 +63,7 @@ class VcfToPed extends QScript { while ( iXRL.hasNext ) { subList.printf("%s%n",iXRL.next()) nL = nL + 1 - if ( nL > 100000 ) { + if ( nL > 10000 ) { val toPed : VariantsToPed = new VariantsToPed toPed.memoryLimit = 2 toPed.reference_sequence = ref @@ -89,7 +94,10 @@ class VcfToPed extends QScript { add(toPed) subList.close() chunk = chunk + 1 - subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk)) + if ( IS_GZ ) + subListFile = swapExt(tmpdir,variants,".vcf.gz",".chunk%d.list".format(chunk)) + else + subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk)) subList = new PrintStream(subListFile) bedOuts :+= tBed bimOuts :+= bim