Mostly small changes to my own scala scripts: .vcf.gz compatibility for output files, smarter beagle generation, simple script to scatter-gather combine variants. Whole genome indel calling now uses the gold standard indel set.
This commit is contained in:
parent
9b61a398b3
commit
2c1b14d35e
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -45,6 +46,9 @@ public class VariantsToPed extends RodWalker<Integer,Integer> {
|
|||
@Output(shortName="fam",fullName="fam",required=true,doc="output fam file")
|
||||
PrintStream outFam;
|
||||
|
||||
@Argument(shortName="mgq",fullName="minGenotypeQuality",required=true,doc="If genotype quality is lower than this value, output NO_CALL")
|
||||
int minGenotypeQuality = 0;
|
||||
|
||||
private ValidateVariants vv = new ValidateVariants();
|
||||
|
||||
private static double APPROX_CM_PER_BP = 1000000.0/750000.0;
|
||||
|
|
@ -173,9 +177,11 @@ public class VariantsToPed extends RodWalker<Integer,Integer> {
|
|||
return 0;
|
||||
}
|
||||
|
||||
private static byte getEncoding(Genotype g, int offset) {
|
||||
private byte getEncoding(Genotype g, int offset) {
|
||||
byte b;
|
||||
if ( g.isHomRef() ) {
|
||||
if ( g.hasAttribute(VCFConstants.GENOTYPE_QUALITY_KEY) && ((Integer) g.getAttribute(VCFConstants.GENOTYPE_QUALITY_KEY)) < minGenotypeQuality ) {
|
||||
b = NO_CALL;
|
||||
} else if ( g.isHomRef() ) {
|
||||
b = HOM_REF;
|
||||
} else if ( g.isHomVar() ) {
|
||||
b = HOM_VAR;
|
||||
|
|
|
|||
|
|
@ -47,9 +47,14 @@ class VcfToPed extends QScript {
|
|||
val extract : VCFExtractIntervals = new VCFExtractIntervals(variants,ivals,false)
|
||||
add(extract)
|
||||
} else {
|
||||
val IS_GZ : Boolean = variants.getName.endsWith(".vcf.gz")
|
||||
var iXRL = new XReadLines(intervals)
|
||||
var chunk = 1;
|
||||
var subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk))
|
||||
var subListFile : File = null
|
||||
if ( IS_GZ )
|
||||
subListFile = swapExt(tmpdir,variants,".vcf.gz",".chunk%d.list".format(chunk))
|
||||
else
|
||||
subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk))
|
||||
var subList = new PrintStream(subListFile)
|
||||
var nL = 0;
|
||||
var bedOuts : List[File] = Nil;
|
||||
|
|
@ -58,7 +63,7 @@ class VcfToPed extends QScript {
|
|||
while ( iXRL.hasNext ) {
|
||||
subList.printf("%s%n",iXRL.next())
|
||||
nL = nL + 1
|
||||
if ( nL > 100000 ) {
|
||||
if ( nL > 10000 ) {
|
||||
val toPed : VariantsToPed = new VariantsToPed
|
||||
toPed.memoryLimit = 2
|
||||
toPed.reference_sequence = ref
|
||||
|
|
@ -89,7 +94,10 @@ class VcfToPed extends QScript {
|
|||
add(toPed)
|
||||
subList.close()
|
||||
chunk = chunk + 1
|
||||
subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk))
|
||||
if ( IS_GZ )
|
||||
subListFile = swapExt(tmpdir,variants,".vcf.gz",".chunk%d.list".format(chunk))
|
||||
else
|
||||
subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk))
|
||||
subList = new PrintStream(subListFile)
|
||||
bedOuts :+= tBed
|
||||
bimOuts :+= bim
|
||||
|
|
|
|||
Loading…
Reference in New Issue