Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
2b6c0939ab
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
@ -45,6 +46,9 @@ public class VariantsToPed extends RodWalker<Integer,Integer> {
|
||||||
@Output(shortName="fam",fullName="fam",required=true,doc="output fam file")
|
@Output(shortName="fam",fullName="fam",required=true,doc="output fam file")
|
||||||
PrintStream outFam;
|
PrintStream outFam;
|
||||||
|
|
||||||
|
@Argument(shortName="mgq",fullName="minGenotypeQuality",required=true,doc="If genotype quality is lower than this value, output NO_CALL")
|
||||||
|
int minGenotypeQuality = 0;
|
||||||
|
|
||||||
private ValidateVariants vv = new ValidateVariants();
|
private ValidateVariants vv = new ValidateVariants();
|
||||||
|
|
||||||
private static double APPROX_CM_PER_BP = 1000000.0/750000.0;
|
private static double APPROX_CM_PER_BP = 1000000.0/750000.0;
|
||||||
|
|
@ -173,9 +177,11 @@ public class VariantsToPed extends RodWalker<Integer,Integer> {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static byte getEncoding(Genotype g, int offset) {
|
private byte getEncoding(Genotype g, int offset) {
|
||||||
byte b;
|
byte b;
|
||||||
if ( g.isHomRef() ) {
|
if ( g.hasAttribute(VCFConstants.GENOTYPE_QUALITY_KEY) && ((Integer) g.getAttribute(VCFConstants.GENOTYPE_QUALITY_KEY)) < minGenotypeQuality ) {
|
||||||
|
b = NO_CALL;
|
||||||
|
} else if ( g.isHomRef() ) {
|
||||||
b = HOM_REF;
|
b = HOM_REF;
|
||||||
} else if ( g.isHomVar() ) {
|
} else if ( g.isHomVar() ) {
|
||||||
b = HOM_VAR;
|
b = HOM_VAR;
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ class ChunkVCF extends QScript {
|
||||||
@Input(shortName="N",fullName="numEntriesInChunk",doc="The number of variants per chunk",required=true)
|
@Input(shortName="N",fullName="numEntriesInChunk",doc="The number of variants per chunk",required=true)
|
||||||
var numEntries : Int = _
|
var numEntries : Int = _
|
||||||
|
|
||||||
@Input(shortName="I",fullName="Intervals",doc="The SNP interval list to chunk. If not provided, one will be created for you to provide in a second run.")
|
@Input(shortName="I",fullName="Intervals",doc="The SNP interval list to chunk. If not provided, one will be created for you to provide in a second run.",required=false)
|
||||||
var intervals : File = _
|
var intervals : File = _
|
||||||
|
|
||||||
@Input(fullName="preserveChromosomes",doc="Restrict chunks to one chromosome (smaller chunk at end of chromosome)",required=false)
|
@Input(fullName="preserveChromosomes",doc="Restrict chunks to one chromosome (smaller chunk at end of chromosome)",required=false)
|
||||||
|
|
@ -40,8 +40,8 @@ class ChunkVCF extends QScript {
|
||||||
def script = {
|
def script = {
|
||||||
if ( intervals == null ) {
|
if ( intervals == null ) {
|
||||||
// create an interval list from the VCF
|
// create an interval list from the VCF
|
||||||
val ivals : File = swapExt(variants,".vcf",".intervals.list")
|
val ivals : File = swapExt(inVCF,".vcf",".intervals.list")
|
||||||
val extract : VCFExtractIntervals = new VCFExtractIntervals(variants,ivals,false)
|
val extract : VCFExtractIntervals = new VCFExtractIntervals(inVCF,ivals,false)
|
||||||
add(extract)
|
add(extract)
|
||||||
} else {
|
} else {
|
||||||
var chunkNum = 1
|
var chunkNum = 1
|
||||||
|
|
@ -54,11 +54,12 @@ class ChunkVCF extends QScript {
|
||||||
if ( ( preserve && ! int.split(":")(0).equals(chromosome) ) || numLinesInChunk > numEntries ) {
|
if ( ( preserve && ! int.split(":")(0).equals(chromosome) ) || numLinesInChunk > numEntries ) {
|
||||||
chunkWriter.close()
|
chunkWriter.close()
|
||||||
val chunkSelect : SelectVariants = new SelectVariants
|
val chunkSelect : SelectVariants = new SelectVariants
|
||||||
|
chunkSelect.variant = inVCF
|
||||||
chunkSelect.reference_sequence = ref
|
chunkSelect.reference_sequence = ref
|
||||||
chunkSelect.memoryLimit = 2
|
chunkSelect.memoryLimit = 2
|
||||||
chunkSelect.intervals :+= chunkFile
|
chunkSelect.intervals :+= chunkFile
|
||||||
if ( extractSamples != null )
|
if ( extractSamples != null )
|
||||||
chunkSelect.sample_file = extractSamples
|
chunkSelect.sample_file :+= extractSamples
|
||||||
chunkSelect.out = swapExt(inVCF,".vcf",".chunk%d.vcf".format(chunkNum))
|
chunkSelect.out = swapExt(inVCF,".vcf",".chunk%d.vcf".format(chunkNum))
|
||||||
add(chunkSelect)
|
add(chunkSelect)
|
||||||
chunkNum += 1
|
chunkNum += 1
|
||||||
|
|
@ -74,12 +75,13 @@ class ChunkVCF extends QScript {
|
||||||
if ( numLinesInChunk > 0 ) {
|
if ( numLinesInChunk > 0 ) {
|
||||||
// some work to do
|
// some work to do
|
||||||
val chunkSelect : SelectVariants = new SelectVariants
|
val chunkSelect : SelectVariants = new SelectVariants
|
||||||
|
chunkSelect.variant = inVCF
|
||||||
chunkSelect.reference_sequence = ref
|
chunkSelect.reference_sequence = ref
|
||||||
chunkSelect.memoryLimit = 2
|
chunkSelect.memoryLimit = 2
|
||||||
chunkSelect.intervals :+= chunkFile
|
chunkSelect.intervals :+= chunkFile
|
||||||
chunkWriter.close()
|
chunkWriter.close()
|
||||||
if ( extractSamples != null )
|
if ( extractSamples != null )
|
||||||
chunkSelect.sample_file = extractSamples
|
chunkSelect.sample_file :+= extractSamples
|
||||||
chunkSelect.out = swapExt(inVCF,".vcf",".chunk%d.vcf".format(chunkNum))
|
chunkSelect.out = swapExt(inVCF,".vcf",".chunk%d.vcf".format(chunkNum))
|
||||||
add(chunkSelect)
|
add(chunkSelect)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -47,9 +47,14 @@ class VcfToPed extends QScript {
|
||||||
val extract : VCFExtractIntervals = new VCFExtractIntervals(variants,ivals,false)
|
val extract : VCFExtractIntervals = new VCFExtractIntervals(variants,ivals,false)
|
||||||
add(extract)
|
add(extract)
|
||||||
} else {
|
} else {
|
||||||
|
val IS_GZ : Boolean = variants.getName.endsWith(".vcf.gz")
|
||||||
var iXRL = new XReadLines(intervals)
|
var iXRL = new XReadLines(intervals)
|
||||||
var chunk = 1;
|
var chunk = 1;
|
||||||
var subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk))
|
var subListFile : File = null
|
||||||
|
if ( IS_GZ )
|
||||||
|
subListFile = swapExt(tmpdir,variants,".vcf.gz",".chunk%d.list".format(chunk))
|
||||||
|
else
|
||||||
|
subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk))
|
||||||
var subList = new PrintStream(subListFile)
|
var subList = new PrintStream(subListFile)
|
||||||
var nL = 0;
|
var nL = 0;
|
||||||
var bedOuts : List[File] = Nil;
|
var bedOuts : List[File] = Nil;
|
||||||
|
|
@ -58,7 +63,7 @@ class VcfToPed extends QScript {
|
||||||
while ( iXRL.hasNext ) {
|
while ( iXRL.hasNext ) {
|
||||||
subList.printf("%s%n",iXRL.next())
|
subList.printf("%s%n",iXRL.next())
|
||||||
nL = nL + 1
|
nL = nL + 1
|
||||||
if ( nL > 100000 ) {
|
if ( nL > 10000 ) {
|
||||||
val toPed : VariantsToPed = new VariantsToPed
|
val toPed : VariantsToPed = new VariantsToPed
|
||||||
toPed.memoryLimit = 2
|
toPed.memoryLimit = 2
|
||||||
toPed.reference_sequence = ref
|
toPed.reference_sequence = ref
|
||||||
|
|
@ -89,6 +94,9 @@ class VcfToPed extends QScript {
|
||||||
add(toPed)
|
add(toPed)
|
||||||
subList.close()
|
subList.close()
|
||||||
chunk = chunk + 1
|
chunk = chunk + 1
|
||||||
|
if ( IS_GZ )
|
||||||
|
subListFile = swapExt(tmpdir,variants,".vcf.gz",".chunk%d.list".format(chunk))
|
||||||
|
else
|
||||||
subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk))
|
subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk))
|
||||||
subList = new PrintStream(subListFile)
|
subList = new PrintStream(subListFile)
|
||||||
bedOuts :+= tBed
|
bedOuts :+= tBed
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue