Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
d34f07dba0
|
|
@ -69,8 +69,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
for(int iii = prevLoc.getStart() + 1; iii < location.getStart(); iii++ ) {
|
||||
final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
|
||||
if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
|
||||
final double isActiveProb = ( walker.presetActiveRegions == null ? walker.isActive( null, null, null )
|
||||
: ( walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ) );
|
||||
final double isActiveProb = ( walker.presetActiveRegions == null ? 0.0 : ( walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ) );
|
||||
isActiveList.add( isActiveProb );
|
||||
if( firstIsActiveStart == null ) {
|
||||
firstIsActiveStart = fakeLoc;
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -141,62 +142,76 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
String indelString = p.getEventBases();
|
||||
if (p.isInsertion()) {
|
||||
boolean foundKey = false;
|
||||
// copy of hashmap into temp arrayList
|
||||
ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
|
||||
for (String s : consensusIndelStrings.keySet()) {
|
||||
cList.add(new Pair<String, Integer>(s,consensusIndelStrings.get(s)));
|
||||
}
|
||||
|
||||
if (read.getAlignmentEnd() == loc.getStart()) {
|
||||
// first corner condition: a read has an insertion at the end, and we're right at the insertion.
|
||||
// In this case, the read could have any of the inserted bases and we need to build a consensus
|
||||
for (String s : consensusIndelStrings.keySet()) {
|
||||
int cnt = consensusIndelStrings.get(s);
|
||||
|
||||
for (int k=0; k < cList.size(); k++) {
|
||||
String s = cList.get(k).getFirst();
|
||||
int cnt = cList.get(k).getSecond();
|
||||
// case 1: current insertion is prefix of indel in hash map
|
||||
if (s.startsWith(indelString)) {
|
||||
// case 1: current insertion is prefix of indel in hash map
|
||||
consensusIndelStrings.put(s, cnt + 1);
|
||||
cList.set(k,new Pair<String, Integer>(s,cnt+1));
|
||||
foundKey = true;
|
||||
break;
|
||||
} else if (indelString.startsWith(s)) {
|
||||
}
|
||||
else if (indelString.startsWith(s)) {
|
||||
// case 2: indel stored in hash table is prefix of current insertion
|
||||
// In this case, new bases are new key.
|
||||
consensusIndelStrings.remove(s);
|
||||
consensusIndelStrings.put(indelString, cnt + 1);
|
||||
foundKey = true;
|
||||
break;
|
||||
cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
|
||||
}
|
||||
}
|
||||
if (!foundKey)
|
||||
// none of the above: event bases not supported by previous table, so add new key
|
||||
consensusIndelStrings.put(indelString, 1);
|
||||
cList.add(new Pair<String, Integer>(indelString,1));
|
||||
|
||||
} else if (read.getAlignmentStart() == loc.getStart() + 1) {
|
||||
}
|
||||
else if (read.getAlignmentStart() == loc.getStart()+1) {
|
||||
// opposite corner condition: read will start at current locus with an insertion
|
||||
for (String s : consensusIndelStrings.keySet()) {
|
||||
int cnt = consensusIndelStrings.get(s);
|
||||
for (int k=0; k < cList.size(); k++) {
|
||||
String s = cList.get(k).getFirst();
|
||||
int cnt = cList.get(k).getSecond();
|
||||
if (s.endsWith(indelString)) {
|
||||
// case 1: current insertion is suffix of indel in hash map
|
||||
consensusIndelStrings.put(s, cnt + 1);
|
||||
// case 1: current insertion (indelString) is suffix of indel in hash map (s)
|
||||
cList.set(k,new Pair<String, Integer>(s,cnt+1));
|
||||
foundKey = true;
|
||||
break;
|
||||
} else if (indelString.endsWith(s)) {
|
||||
// case 2: indel stored in hash table is suffix of current insertion
|
||||
}
|
||||
else if (indelString.endsWith(s)) {
|
||||
// case 2: indel stored in hash table is prefix of current insertion
|
||||
// In this case, new bases are new key.
|
||||
|
||||
consensusIndelStrings.remove(s);
|
||||
consensusIndelStrings.put(indelString, cnt + 1);
|
||||
foundKey = true;
|
||||
break;
|
||||
cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
|
||||
}
|
||||
}
|
||||
if (!foundKey)
|
||||
// none of the above: event bases not supported by previous table, so add new key
|
||||
consensusIndelStrings.put(indelString, 1);
|
||||
cList.add(new Pair<String, Integer>(indelString,1));
|
||||
|
||||
} else {
|
||||
// normal case: insertion somewhere in the middle of a read: add count to hash map
|
||||
int cnt = consensusIndelStrings.containsKey(indelString) ? consensusIndelStrings.get(indelString) : 0;
|
||||
consensusIndelStrings.put(indelString, cnt + 1);
|
||||
|
||||
}
|
||||
else {
|
||||
// normal case: insertion somewhere in the middle of a read: add count to arrayList
|
||||
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
|
||||
cList.add(new Pair<String, Integer>(indelString,cnt+1));
|
||||
}
|
||||
|
||||
} else if (p.isDeletion()) {
|
||||
indelString = String.format("D%d", p.getEventLength());
|
||||
int cnt = consensusIndelStrings.containsKey(indelString) ? consensusIndelStrings.get(indelString) : 0;
|
||||
consensusIndelStrings.put(indelString, cnt + 1);
|
||||
// copy back arrayList into hashMap
|
||||
consensusIndelStrings.clear();
|
||||
for (Pair<String,Integer> pair : cList) {
|
||||
consensusIndelStrings.put(pair.getFirst(),pair.getSecond());
|
||||
}
|
||||
|
||||
}
|
||||
else if (p.isDeletion()) {
|
||||
indelString = String.format("D%d",p.getEventLength());
|
||||
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
|
||||
consensusIndelStrings.put(indelString,cnt+1);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -45,6 +46,9 @@ public class VariantsToPed extends RodWalker<Integer,Integer> {
|
|||
@Output(shortName="fam",fullName="fam",required=true,doc="output fam file")
|
||||
PrintStream outFam;
|
||||
|
||||
@Argument(shortName="mgq",fullName="minGenotypeQuality",required=true,doc="If genotype quality is lower than this value, output NO_CALL")
|
||||
int minGenotypeQuality = 0;
|
||||
|
||||
private ValidateVariants vv = new ValidateVariants();
|
||||
|
||||
private static double APPROX_CM_PER_BP = 1000000.0/750000.0;
|
||||
|
|
@ -173,9 +177,11 @@ public class VariantsToPed extends RodWalker<Integer,Integer> {
|
|||
return 0;
|
||||
}
|
||||
|
||||
private static byte getEncoding(Genotype g, int offset) {
|
||||
private byte getEncoding(Genotype g, int offset) {
|
||||
byte b;
|
||||
if ( g.isHomRef() ) {
|
||||
if ( g.hasAttribute(VCFConstants.GENOTYPE_QUALITY_KEY) && ((Integer) g.getAttribute(VCFConstants.GENOTYPE_QUALITY_KEY)) < minGenotypeQuality ) {
|
||||
b = NO_CALL;
|
||||
} else if ( g.isHomRef() ) {
|
||||
b = HOM_REF;
|
||||
} else if ( g.isHomVar() ) {
|
||||
b = HOM_VAR;
|
||||
|
|
|
|||
|
|
@ -24,11 +24,15 @@
|
|||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -109,8 +113,52 @@ public class Haplotype {
|
|||
return isReference;
|
||||
}
|
||||
|
||||
public byte[] insertAllele( final Allele a ) {
|
||||
return getBases();
|
||||
public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation, final byte[] paddedRef, final int refStart,
|
||||
final Cigar haplotypeCigar, final int numBasesAddedToStartOfHaplotype, final int refHaplotypeLength ) {
|
||||
|
||||
if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
|
||||
int haplotypeInsertLocation = getHaplotypeCoordinateForReferenceCoordinate(refStart + numBasesAddedToStartOfHaplotype, haplotypeCigar, refInsertLocation);
|
||||
if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype
|
||||
return getBases().clone();
|
||||
}
|
||||
haplotypeInsertLocation += numBasesAddedToStartOfHaplotype;
|
||||
final byte[] newHaplotype = getBases().clone();
|
||||
|
||||
try {
|
||||
if( refAllele.length() == altAllele.length() ) { // SNP or MNP
|
||||
for( int iii = 0; iii < altAllele.length(); iii++ ) {
|
||||
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
||||
}
|
||||
} else if( refAllele.length() < altAllele.length() ) { // insertion
|
||||
final int altAlleleLength = altAllele.length();
|
||||
for( int iii = newHaplotype.length - 1; iii > haplotypeInsertLocation + altAlleleLength - 1; iii-- ) {
|
||||
newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
|
||||
}
|
||||
for( int iii = 0; iii < altAlleleLength; iii++ ) {
|
||||
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
||||
}
|
||||
} else { // deletion
|
||||
int refHaplotypeOffset = 0;
|
||||
for( final CigarElement ce : haplotypeCigar.getCigarElements()) {
|
||||
if(ce.getOperator() == CigarOperator.D) { refHaplotypeOffset += ce.getLength(); }
|
||||
else if(ce.getOperator() == CigarOperator.I) { refHaplotypeOffset -= ce.getLength(); }
|
||||
}
|
||||
for( int iii = 0; iii < altAllele.length(); iii++ ) {
|
||||
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
||||
}
|
||||
final int shift = refAllele.length() - altAllele.length();
|
||||
for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length - shift; iii++ ) {
|
||||
newHaplotype[iii] = newHaplotype[iii+shift];
|
||||
}
|
||||
for( int iii = 0; iii < shift; iii++ ) {
|
||||
newHaplotype[iii+newHaplotype.length-shift] = paddedRef[refStart+refHaplotypeLength+refHaplotypeOffset+iii];
|
||||
}
|
||||
}
|
||||
} catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding
|
||||
return getBases().clone();
|
||||
}
|
||||
|
||||
return newHaplotype;
|
||||
}
|
||||
|
||||
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref,
|
||||
|
|
@ -169,4 +217,84 @@ public class Haplotype {
|
|||
return haplotypeMap;
|
||||
}
|
||||
|
||||
private static Integer getHaplotypeCoordinateForReferenceCoordinate( final int haplotypeStart, final Cigar haplotypeCigar, final int refCoord ) {
|
||||
int readBases = 0;
|
||||
int refBases = 0;
|
||||
boolean fallsInsideDeletion = false;
|
||||
|
||||
int goal = refCoord - haplotypeStart; // The goal is to move this many reference bases
|
||||
boolean goalReached = refBases == goal;
|
||||
|
||||
Iterator<CigarElement> cigarElementIterator = haplotypeCigar.getCigarElements().iterator();
|
||||
while (!goalReached && cigarElementIterator.hasNext()) {
|
||||
CigarElement cigarElement = cigarElementIterator.next();
|
||||
int shift = 0;
|
||||
|
||||
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) {
|
||||
if (refBases + cigarElement.getLength() < goal)
|
||||
shift = cigarElement.getLength();
|
||||
else
|
||||
shift = goal - refBases;
|
||||
|
||||
refBases += shift;
|
||||
}
|
||||
goalReached = refBases == goal;
|
||||
|
||||
if (!goalReached && cigarElement.getOperator().consumesReadBases())
|
||||
readBases += cigarElement.getLength();
|
||||
|
||||
if (goalReached) {
|
||||
// Is this base's reference position within this cigar element? Or did we use it all?
|
||||
boolean endsWithinCigar = shift < cigarElement.getLength();
|
||||
|
||||
// If it isn't, we need to check the next one. There should *ALWAYS* be a next one
|
||||
// since we checked if the goal coordinate is within the read length, so this is just a sanity check.
|
||||
if (!endsWithinCigar && !cigarElementIterator.hasNext())
|
||||
return -1;
|
||||
|
||||
CigarElement nextCigarElement;
|
||||
|
||||
// if we end inside the current cigar element, we just have to check if it is a deletion
|
||||
if (endsWithinCigar)
|
||||
fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION;
|
||||
|
||||
// if we end outside the current cigar element, we need to check if the next element is an insertion or deletion.
|
||||
else {
|
||||
nextCigarElement = cigarElementIterator.next();
|
||||
|
||||
// if it's an insertion, we need to clip the whole insertion before looking at the next element
|
||||
if (nextCigarElement.getOperator() == CigarOperator.INSERTION) {
|
||||
readBases += nextCigarElement.getLength();
|
||||
if (!cigarElementIterator.hasNext())
|
||||
return -1;
|
||||
|
||||
nextCigarElement = cigarElementIterator.next();
|
||||
}
|
||||
|
||||
// if it's a deletion, we will pass the information on to be handled downstream.
|
||||
fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION;
|
||||
}
|
||||
|
||||
// If we reached our goal outside a deletion, add the shift
|
||||
if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases())
|
||||
readBases += shift;
|
||||
|
||||
// If we reached our goal inside a deletion, but the deletion is the next cigar element then we need
|
||||
// to add the shift of the current cigar element but go back to it's last element to return the last
|
||||
// base before the deletion (see warning in function contracts)
|
||||
else if (fallsInsideDeletion && !endsWithinCigar)
|
||||
readBases += shift - 1;
|
||||
|
||||
// If we reached our goal inside a deletion then we must backtrack to the last base before the deletion
|
||||
else if (fallsInsideDeletion && endsWithinCigar)
|
||||
readBases--;
|
||||
}
|
||||
}
|
||||
|
||||
if (!goalReached)
|
||||
return -1;
|
||||
|
||||
return (fallsInsideDeletion ? -1 : readBases);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1613,4 +1613,36 @@ public class MathUtils {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an integer out of a bitset
|
||||
*
|
||||
* @param bitSet the bitset
|
||||
* @return an integer with the bitset representation
|
||||
*/
|
||||
public static int intFrom(final BitSet bitSet) {
|
||||
int integer = 0;
|
||||
for (int bitIndex = bitSet.nextSetBit(0); bitIndex >= 0; bitIndex = bitSet.nextSetBit(bitIndex+1))
|
||||
integer |= 1 << bitIndex;
|
||||
|
||||
return integer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a BitSet representation of a given integer
|
||||
*
|
||||
* @param integer the number to turn into a bitset
|
||||
* @return a bitset representation of the integer
|
||||
*/
|
||||
public static BitSet bitSetFrom(int integer) {
|
||||
BitSet bitSet = new BitSet((int) Math.ceil(Math.sqrt(integer)));
|
||||
int bitIndex = 0;
|
||||
while (integer > 0) {
|
||||
if (integer%2 > 0)
|
||||
bitSet.set(bitIndex);
|
||||
bitIndex++;
|
||||
integer /= 2;
|
||||
}
|
||||
return bitSet;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -190,11 +190,12 @@ public class BaseRecalibration {
|
|||
final Object[] fullCovariateKeyWithErrorMode = covariateKeySet.getKeySet(offset, errorModel);
|
||||
final Object[] fullCovariateKey = Arrays.copyOfRange(fullCovariateKeyWithErrorMode, 0, fullCovariateKeyWithErrorMode.length-1); // need to strip off the error mode which was appended to the list of covariates
|
||||
|
||||
Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKeyWithErrorMode);
|
||||
if( qualityScore == null ) {
|
||||
qualityScore = performSequentialQualityCalculation( errorModel, fullCovariateKey );
|
||||
qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKeyWithErrorMode);
|
||||
}
|
||||
// BUGBUG: This caching seems to put the entire key set into memory which negates the benefits of storing the delta delta tables?
|
||||
//Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKeyWithErrorMode);
|
||||
//if( qualityScore == null ) {
|
||||
final byte qualityScore = performSequentialQualityCalculation( errorModel, fullCovariateKey );
|
||||
// qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKeyWithErrorMode);
|
||||
//}
|
||||
|
||||
recalQuals[offset] = qualityScore;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -205,6 +205,16 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testIntAndBitSetConversion() {
|
||||
Assert.assertEquals(428, MathUtils.intFrom(MathUtils.bitSetFrom(428)));
|
||||
Assert.assertEquals(239847, MathUtils.intFrom(MathUtils.bitSetFrom(239847)));
|
||||
Assert.assertEquals(12726, MathUtils.intFrom(MathUtils.bitSetFrom(12726)));
|
||||
Assert.assertEquals(0, MathUtils.intFrom(MathUtils.bitSetFrom(0)));
|
||||
Assert.assertEquals(1, MathUtils.intFrom(MathUtils.bitSetFrom(1)));
|
||||
Assert.assertEquals(65536, MathUtils.intFrom(MathUtils.bitSetFrom(65536)));
|
||||
}
|
||||
|
||||
private boolean hasUniqueElements(Object[] x) {
|
||||
for (int i = 0; i < x.length; i++)
|
||||
for (int j = i + 1; j < x.length; j++)
|
||||
|
|
@ -220,10 +230,10 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
return set.isEmpty();
|
||||
}
|
||||
|
||||
|
||||
private void p (Object []x) {
|
||||
for (Object v: x)
|
||||
System.out.print((Integer) v + " ");
|
||||
System.out.println();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ class ChunkVCF extends QScript {
|
|||
@Input(shortName="N",fullName="numEntriesInChunk",doc="The number of variants per chunk",required=true)
|
||||
var numEntries : Int = _
|
||||
|
||||
@Input(shortName="I",fullName="Intervals",doc="The SNP interval list to chunk. If not provided, one will be created for you to provide in a second run.")
|
||||
@Input(shortName="I",fullName="Intervals",doc="The SNP interval list to chunk. If not provided, one will be created for you to provide in a second run.",required=false)
|
||||
var intervals : File = _
|
||||
|
||||
@Input(fullName="preserveChromosomes",doc="Restrict chunks to one chromosome (smaller chunk at end of chromosome)",required=false)
|
||||
|
|
@ -40,8 +40,8 @@ class ChunkVCF extends QScript {
|
|||
def script = {
|
||||
if ( intervals == null ) {
|
||||
// create an interval list from the VCF
|
||||
val ivals : File = swapExt(variants,".vcf",".intervals.list")
|
||||
val extract : VCFExtractIntervals = new VCFExtractIntervals(variants,ivals,false)
|
||||
val ivals : File = swapExt(inVCF,".vcf",".intervals.list")
|
||||
val extract : VCFExtractIntervals = new VCFExtractIntervals(inVCF,ivals,false)
|
||||
add(extract)
|
||||
} else {
|
||||
var chunkNum = 1
|
||||
|
|
@ -54,11 +54,12 @@ class ChunkVCF extends QScript {
|
|||
if ( ( preserve && ! int.split(":")(0).equals(chromosome) ) || numLinesInChunk > numEntries ) {
|
||||
chunkWriter.close()
|
||||
val chunkSelect : SelectVariants = new SelectVariants
|
||||
chunkSelect.variant = inVCF
|
||||
chunkSelect.reference_sequence = ref
|
||||
chunkSelect.memoryLimit = 2
|
||||
chunkSelect.intervals :+= chunkFile
|
||||
if ( extractSamples != null )
|
||||
chunkSelect.sample_file = extractSamples
|
||||
chunkSelect.sample_file :+= extractSamples
|
||||
chunkSelect.out = swapExt(inVCF,".vcf",".chunk%d.vcf".format(chunkNum))
|
||||
add(chunkSelect)
|
||||
chunkNum += 1
|
||||
|
|
@ -74,12 +75,13 @@ class ChunkVCF extends QScript {
|
|||
if ( numLinesInChunk > 0 ) {
|
||||
// some work to do
|
||||
val chunkSelect : SelectVariants = new SelectVariants
|
||||
chunkSelect.variant = inVCF
|
||||
chunkSelect.reference_sequence = ref
|
||||
chunkSelect.memoryLimit = 2
|
||||
chunkSelect.intervals :+= chunkFile
|
||||
chunkWriter.close()
|
||||
if ( extractSamples != null )
|
||||
chunkSelect.sample_file = extractSamples
|
||||
chunkSelect.sample_file :+= extractSamples
|
||||
chunkSelect.out = swapExt(inVCF,".vcf",".chunk%d.vcf".format(chunkNum))
|
||||
add(chunkSelect)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,9 +47,14 @@ class VcfToPed extends QScript {
|
|||
val extract : VCFExtractIntervals = new VCFExtractIntervals(variants,ivals,false)
|
||||
add(extract)
|
||||
} else {
|
||||
val IS_GZ : Boolean = variants.getName.endsWith(".vcf.gz")
|
||||
var iXRL = new XReadLines(intervals)
|
||||
var chunk = 1;
|
||||
var subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk))
|
||||
var subListFile : File = null
|
||||
if ( IS_GZ )
|
||||
subListFile = swapExt(tmpdir,variants,".vcf.gz",".chunk%d.list".format(chunk))
|
||||
else
|
||||
subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk))
|
||||
var subList = new PrintStream(subListFile)
|
||||
var nL = 0;
|
||||
var bedOuts : List[File] = Nil;
|
||||
|
|
@ -58,7 +63,7 @@ class VcfToPed extends QScript {
|
|||
while ( iXRL.hasNext ) {
|
||||
subList.printf("%s%n",iXRL.next())
|
||||
nL = nL + 1
|
||||
if ( nL > 100000 ) {
|
||||
if ( nL > 10000 ) {
|
||||
val toPed : VariantsToPed = new VariantsToPed
|
||||
toPed.memoryLimit = 2
|
||||
toPed.reference_sequence = ref
|
||||
|
|
@ -89,7 +94,10 @@ class VcfToPed extends QScript {
|
|||
add(toPed)
|
||||
subList.close()
|
||||
chunk = chunk + 1
|
||||
subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk))
|
||||
if ( IS_GZ )
|
||||
subListFile = swapExt(tmpdir,variants,".vcf.gz",".chunk%d.list".format(chunk))
|
||||
else
|
||||
subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk))
|
||||
subList = new PrintStream(subListFile)
|
||||
bedOuts :+= tBed
|
||||
bimOuts :+= bim
|
||||
|
|
|
|||
Loading…
Reference in New Issue