Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

2012-02-22 20:41:03 -05:00 · 2012-02-22 20:41:03 -05:00 · d34f07dba0
parent 2f33c57060 2b6c0939ab
commit d34f07dba0
9 changed files with 252 additions and 51 deletions
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@ -69,8 +69,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                    for(int iii = prevLoc.getStart() + 1; iii < location.getStart(); iii++ ) {       
                        final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
                        if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
-                            final double isActiveProb = ( walker.presetActiveRegions == null ? walker.isActive( null, null, null )
-                                    : ( walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ) );
+                            final double isActiveProb = ( walker.presetActiveRegions == null ? 0.0 : ( walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ) );
                            isActiveList.add( isActiveProb );
                            if( firstIsActiveStart == null ) {
                                firstIsActiveStart = fakeLoc;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
@ -37,6 +37,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.StingException;
 import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
@ -141,62 +142,76 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
                String indelString = p.getEventBases();
                if (p.isInsertion()) {
                    boolean foundKey = false;
+                    // copy of hashmap into temp arrayList
+                    ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
+                    for (String s : consensusIndelStrings.keySet()) {
+                        cList.add(new Pair<String, Integer>(s,consensusIndelStrings.get(s)));
+                    }
+
                    if (read.getAlignmentEnd() == loc.getStart()) {
                        // first corner condition: a read has an insertion at the end, and we're right at the insertion.
                        // In this case, the read could have any of the inserted bases and we need to build a consensus
-                        for (String s : consensusIndelStrings.keySet()) {
-                            int cnt = consensusIndelStrings.get(s);
+
+                        for (int k=0; k < cList.size(); k++) {
+                            String s = cList.get(k).getFirst();
+                            int cnt = cList.get(k).getSecond();
+                            // case 1: current insertion is prefix of indel in hash map
                            if (s.startsWith(indelString)) {
-                                // case 1: current insertion is prefix of indel in hash map
-                                consensusIndelStrings.put(s, cnt + 1);
+                                cList.set(k,new Pair<String, Integer>(s,cnt+1));
                                foundKey = true;
-                                break;
-                            } else if (indelString.startsWith(s)) {
+                            }
+                            else if (indelString.startsWith(s)) {
                                // case 2: indel stored in hash table is prefix of current insertion
                                // In this case, new bases are new key.
-                                consensusIndelStrings.remove(s);
-                                consensusIndelStrings.put(indelString, cnt + 1);
                                foundKey = true;
-                                break;
+                                cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
                            }
                        }
                        if (!foundKey)
                            // none of the above: event bases not supported by previous table, so add new key
-                            consensusIndelStrings.put(indelString, 1);
+                            cList.add(new Pair<String, Integer>(indelString,1));

-                    } else if (read.getAlignmentStart() == loc.getStart() + 1) {
+                    }
+                    else if (read.getAlignmentStart() == loc.getStart()+1) {
                        // opposite corner condition: read will start at current locus with an insertion
-                        for (String s : consensusIndelStrings.keySet()) {
-                            int cnt = consensusIndelStrings.get(s);
+                        for (int k=0; k < cList.size(); k++) {
+                            String s = cList.get(k).getFirst();
+                            int cnt = cList.get(k).getSecond();
                            if (s.endsWith(indelString)) {
-                                // case 1: current insertion is suffix of indel in hash map
-                                consensusIndelStrings.put(s, cnt + 1);
+                                // case 1: current insertion (indelString) is suffix of indel in hash map (s)
+                                cList.set(k,new Pair<String, Integer>(s,cnt+1));
                                foundKey = true;
-                                break;
-                            } else if (indelString.endsWith(s)) {
-                                // case 2: indel stored in hash table is suffix of current insertion
+                            }
+                            else if (indelString.endsWith(s)) {
+                                // case 2: indel stored in hash table is prefix of current insertion
                                // In this case, new bases are new key.
-
-                                consensusIndelStrings.remove(s);
-                                consensusIndelStrings.put(indelString, cnt + 1);
                                foundKey = true;
-                                break;
+                                cList.set(k,new Pair<String, Integer>(indelString,cnt+1));
                            }
                        }
                        if (!foundKey)
                            // none of the above: event bases not supported by previous table, so add new key
-                            consensusIndelStrings.put(indelString, 1);
+                            cList.add(new Pair<String, Integer>(indelString,1));

-                    } else {
-                        // normal case: insertion somewhere in the middle of a read: add count to hash map
-                        int cnt = consensusIndelStrings.containsKey(indelString) ? consensusIndelStrings.get(indelString) : 0;
-                        consensusIndelStrings.put(indelString, cnt + 1);
+
+                    }
+                    else {
+                        // normal case: insertion somewhere in the middle of a read: add count to arrayList
+                        int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
+                        cList.add(new Pair<String, Integer>(indelString,cnt+1));
                    }

-                } else if (p.isDeletion()) {
-                    indelString = String.format("D%d", p.getEventLength());
-                    int cnt = consensusIndelStrings.containsKey(indelString) ? consensusIndelStrings.get(indelString) : 0;
-                    consensusIndelStrings.put(indelString, cnt + 1);
+                    // copy back arrayList into hashMap
+                    consensusIndelStrings.clear();
+                    for (Pair<String,Integer> pair : cList) {
+                        consensusIndelStrings.put(pair.getFirst(),pair.getSecond());
+                    }
+
+                }
+                else if (p.isDeletion()) {
+                    indelString = String.format("D%d",p.getEventLength());
+                    int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
+                    consensusIndelStrings.put(indelString,cnt+1);

                }
            }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToPed.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToPed.java
@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -45,6 +46,9 @@ public class VariantsToPed extends RodWalker<Integer,Integer> {
    @Output(shortName="fam",fullName="fam",required=true,doc="output fam file")
    PrintStream outFam;

+    @Argument(shortName="mgq",fullName="minGenotypeQuality",required=true,doc="If genotype quality is lower than this value, output NO_CALL")
+    int minGenotypeQuality = 0;
+
    private ValidateVariants vv = new ValidateVariants();

    private static double APPROX_CM_PER_BP = 1000000.0/750000.0;
@ -173,9 +177,11 @@ public class VariantsToPed extends RodWalker<Integer,Integer> {
        return 0;
    }

-    private static byte getEncoding(Genotype g, int offset) {
+    private byte getEncoding(Genotype g, int offset) {
        byte b;
-        if ( g.isHomRef() ) {
+        if ( g.hasAttribute(VCFConstants.GENOTYPE_QUALITY_KEY) && ((Integer) g.getAttribute(VCFConstants.GENOTYPE_QUALITY_KEY)) < minGenotypeQuality ) {
+            b = NO_CALL;
+        } else if ( g.isHomRef() ) {
            b = HOM_REF;
        } else if ( g.isHomVar() ) {
            b = HOM_VAR;
--- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
@ -24,11 +24,15 @@

 package org.broadinstitute.sting.utils;

+import net.sf.samtools.Cigar;
+import net.sf.samtools.CigarElement;
+import net.sf.samtools.CigarOperator;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.variantcontext.Allele;

 import java.util.Arrays;
+import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;

@ -109,8 +113,52 @@ public class Haplotype {
        return isReference;
    }

-    public byte[] insertAllele( final Allele a ) {
-        return getBases();
+    public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation, final byte[] paddedRef, final int refStart,
+                                final Cigar haplotypeCigar, final int numBasesAddedToStartOfHaplotype, final int refHaplotypeLength ) {
+        
+        if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
+        int haplotypeInsertLocation = getHaplotypeCoordinateForReferenceCoordinate(refStart + numBasesAddedToStartOfHaplotype, haplotypeCigar, refInsertLocation);
+        if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype
+            return getBases().clone();
+        }
+        haplotypeInsertLocation += numBasesAddedToStartOfHaplotype;
+        final byte[] newHaplotype = getBases().clone();
+
+        try {
+            if( refAllele.length() == altAllele.length() ) { // SNP or MNP
+                for( int iii = 0; iii < altAllele.length(); iii++ ) {
+                    newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
+                }
+            } else if( refAllele.length() < altAllele.length() ) { // insertion
+                final int altAlleleLength = altAllele.length();
+                for( int iii = newHaplotype.length - 1; iii > haplotypeInsertLocation + altAlleleLength - 1; iii-- ) {
+                    newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
+                }
+                for( int iii = 0; iii < altAlleleLength; iii++ ) {
+                    newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
+                }
+            } else { // deletion
+                int refHaplotypeOffset = 0;
+                for( final CigarElement ce : haplotypeCigar.getCigarElements()) {
+                    if(ce.getOperator() == CigarOperator.D) { refHaplotypeOffset += ce.getLength(); }
+                    else if(ce.getOperator() == CigarOperator.I) { refHaplotypeOffset -= ce.getLength(); }
+                }
+                for( int iii = 0; iii < altAllele.length(); iii++ ) {
+                    newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
+                }
+                final int shift = refAllele.length() - altAllele.length();
+                for( int iii = haplotypeInsertLocation + altAllele.length(); iii < newHaplotype.length - shift; iii++ ) {
+                    newHaplotype[iii] = newHaplotype[iii+shift];
+                }
+                for( int iii = 0; iii < shift; iii++ ) {
+                    newHaplotype[iii+newHaplotype.length-shift] = paddedRef[refStart+refHaplotypeLength+refHaplotypeOffset+iii];
+                }
+            }
+        } catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding
+            return getBases().clone();
+        }
+        
+        return newHaplotype;
    }

    public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref,
@ -169,4 +217,84 @@ public class Haplotype {
        return haplotypeMap;
    }

+    private static Integer getHaplotypeCoordinateForReferenceCoordinate( final int haplotypeStart, final Cigar haplotypeCigar, final int refCoord ) {
+        int readBases = 0;
+        int refBases = 0;
+        boolean fallsInsideDeletion = false;
+
+        int goal = refCoord - haplotypeStart;  // The goal is to move this many reference bases
+        boolean goalReached = refBases == goal;
+
+        Iterator<CigarElement> cigarElementIterator = haplotypeCigar.getCigarElements().iterator();
+        while (!goalReached && cigarElementIterator.hasNext()) {
+            CigarElement cigarElement = cigarElementIterator.next();
+            int shift = 0;
+
+            if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) {
+                if (refBases + cigarElement.getLength() < goal)
+                    shift = cigarElement.getLength();
+                else
+                    shift = goal - refBases;
+
+                refBases += shift;
+            }
+            goalReached = refBases == goal;
+
+            if (!goalReached && cigarElement.getOperator().consumesReadBases())
+                readBases += cigarElement.getLength();
+
+            if (goalReached) {
+                // Is this base's reference position within this cigar element? Or did we use it all?
+                boolean endsWithinCigar = shift < cigarElement.getLength();
+
+                // If it isn't, we need to check the next one. There should *ALWAYS* be a next one
+                // since we checked if the goal coordinate is within the read length, so this is just a sanity check.
+                if (!endsWithinCigar && !cigarElementIterator.hasNext())
+                    return -1;
+
+                CigarElement nextCigarElement;
+
+                // if we end inside the current cigar element, we just have to check if it is a deletion
+                if (endsWithinCigar)
+                    fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION;
+
+                    // if we end outside the current cigar element, we need to check if the next element is an insertion or deletion.
+                else {
+                    nextCigarElement = cigarElementIterator.next();
+
+                    // if it's an insertion, we need to clip the whole insertion before looking at the next element
+                    if (nextCigarElement.getOperator() == CigarOperator.INSERTION) {
+                        readBases += nextCigarElement.getLength();
+                        if (!cigarElementIterator.hasNext())
+                            return -1;
+
+                        nextCigarElement = cigarElementIterator.next();
+                    }
+
+                    // if it's a deletion, we will pass the information on to be handled downstream.
+                    fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION;
+                }
+
+                // If we reached our goal outside a deletion, add the shift
+                if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases())
+                    readBases += shift;
+
+                    // If we reached our goal inside a deletion, but the deletion is the next cigar element then we need
+                    // to add the shift of the current cigar element but go back to it's last element to return the last
+                    // base before the deletion (see warning in function contracts)
+                else if (fallsInsideDeletion && !endsWithinCigar)
+                    readBases += shift - 1;
+
+                    // If we reached our goal inside a deletion then we must backtrack to the last base before the deletion
+                else if (fallsInsideDeletion && endsWithinCigar)
+                    readBases--;
+            }
+        }
+
+        if (!goalReached)
+            return -1;
+
+        return (fallsInsideDeletion ? -1 : readBases);
+    }
+
 }
--- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@ -1613,4 +1613,36 @@ public class MathUtils {

    }

+    /**
+     * Creates an integer out of a bitset
+     *
+     * @param bitSet the bitset
+     * @return an integer with the bitset representation
+     */
+    public static int intFrom(final BitSet bitSet) {
+        int integer = 0;
+        for (int bitIndex = bitSet.nextSetBit(0); bitIndex >= 0; bitIndex = bitSet.nextSetBit(bitIndex+1))
+            integer |= 1 << bitIndex;
+
+        return integer;
+    }
+
+    /**
+     * Creates a BitSet representation of a given integer
+     *
+     * @param integer the number to turn into a bitset
+     * @return a bitset representation of the integer
+     */
+    public static BitSet bitSetFrom(int integer) {
+        BitSet bitSet = new BitSet((int) Math.ceil(Math.sqrt(integer)));
+        int bitIndex = 0;
+        while (integer > 0) {
+            if (integer%2 > 0)
+                bitSet.set(bitIndex);
+            bitIndex++;
+            integer /= 2;
+        }
+        return bitSet;
+    }
+
 }
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
@ -190,11 +190,12 @@ public class BaseRecalibration {
                final Object[] fullCovariateKeyWithErrorMode = covariateKeySet.getKeySet(offset, errorModel);
                final Object[] fullCovariateKey = Arrays.copyOfRange(fullCovariateKeyWithErrorMode, 0, fullCovariateKeyWithErrorMode.length-1); // need to strip off the error mode which was appended to the list of covariates

-                Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKeyWithErrorMode);
-                if( qualityScore == null ) {
-                    qualityScore = performSequentialQualityCalculation( errorModel, fullCovariateKey );
-                    qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKeyWithErrorMode);
-                }
+                // BUGBUG: This caching seems to put the entire key set into memory which negates the benefits of storing the delta delta tables?
+                //Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKeyWithErrorMode);
+                //if( qualityScore == null ) {
+                    final byte qualityScore = performSequentialQualityCalculation( errorModel, fullCovariateKey );
+                //    qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKeyWithErrorMode);
+                //}
        
                recalQuals[offset] = qualityScore;
            }
--- a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java
@ -205,6 +205,16 @@ public class MathUtilsUnitTest extends BaseTest {
        }
    }

+    @Test(enabled = true)
+    public void testIntAndBitSetConversion() {
+        Assert.assertEquals(428,    MathUtils.intFrom(MathUtils.bitSetFrom(428)));
+        Assert.assertEquals(239847, MathUtils.intFrom(MathUtils.bitSetFrom(239847)));
+        Assert.assertEquals(12726,  MathUtils.intFrom(MathUtils.bitSetFrom(12726)));
+        Assert.assertEquals(0,      MathUtils.intFrom(MathUtils.bitSetFrom(0)));
+        Assert.assertEquals(1,      MathUtils.intFrom(MathUtils.bitSetFrom(1)));
+        Assert.assertEquals(65536,  MathUtils.intFrom(MathUtils.bitSetFrom(65536)));
+    }
+
    private boolean hasUniqueElements(Object[] x) {
        for (int i = 0; i < x.length; i++)
            for (int j = i + 1; j < x.length; j++)
@ -220,10 +230,10 @@ public class MathUtilsUnitTest extends BaseTest {
        return set.isEmpty();
    }

-
    private void p (Object []x) {
        for (Object v: x)
            System.out.print((Integer) v + " ");
        System.out.println();
    }
+
 }
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/lib/ChunkVCF.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/lib/ChunkVCF.scala
@ -23,7 +23,7 @@ class ChunkVCF extends QScript {
  @Input(shortName="N",fullName="numEntriesInChunk",doc="The number of variants per chunk",required=true)
  var numEntries : Int = _

-  @Input(shortName="I",fullName="Intervals",doc="The SNP interval list to chunk. If not provided, one will be created for you to provide in a second run.")
+  @Input(shortName="I",fullName="Intervals",doc="The SNP interval list to chunk. If not provided, one will be created for you to provide in a second run.",required=false)
  var intervals : File = _

  @Input(fullName="preserveChromosomes",doc="Restrict chunks to one chromosome (smaller chunk at end of chromosome)",required=false)
@ -40,8 +40,8 @@ class ChunkVCF extends QScript {
  def script = {
    if ( intervals == null ) {
      // create an interval list from the VCF
-      val ivals : File = swapExt(variants,".vcf",".intervals.list")
-      val extract : VCFExtractIntervals = new VCFExtractIntervals(variants,ivals,false)
+      val ivals : File = swapExt(inVCF,".vcf",".intervals.list")
+      val extract : VCFExtractIntervals = new VCFExtractIntervals(inVCF,ivals,false)
      add(extract)
    } else {
      var chunkNum = 1
@ -54,11 +54,12 @@ class ChunkVCF extends QScript {
        if ( ( preserve && ! int.split(":")(0).equals(chromosome) ) || numLinesInChunk > numEntries ) {
          chunkWriter.close()
          val chunkSelect : SelectVariants = new SelectVariants
+          chunkSelect.variant = inVCF
          chunkSelect.reference_sequence = ref
          chunkSelect.memoryLimit = 2
          chunkSelect.intervals :+= chunkFile
          if ( extractSamples != null )
-            chunkSelect.sample_file = extractSamples
+            chunkSelect.sample_file :+= extractSamples
          chunkSelect.out = swapExt(inVCF,".vcf",".chunk%d.vcf".format(chunkNum))
          add(chunkSelect)
          chunkNum += 1
@ -74,12 +75,13 @@ class ChunkVCF extends QScript {
      if ( numLinesInChunk > 0 ) {
        // some work to do
        val chunkSelect : SelectVariants = new SelectVariants
+        chunkSelect.variant = inVCF
        chunkSelect.reference_sequence = ref
        chunkSelect.memoryLimit = 2
        chunkSelect.intervals :+= chunkFile
        chunkWriter.close()
        if ( extractSamples != null )
-          chunkSelect.sample_file = extractSamples
+          chunkSelect.sample_file :+= extractSamples
        chunkSelect.out = swapExt(inVCF,".vcf",".chunk%d.vcf".format(chunkNum))
        add(chunkSelect)
      }
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/lib/VcfToPed.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/lib/VcfToPed.scala
@ -47,9 +47,14 @@ class VcfToPed extends QScript {
      val extract : VCFExtractIntervals = new VCFExtractIntervals(variants,ivals,false)
      add(extract)
    } else {
+      val IS_GZ : Boolean = variants.getName.endsWith(".vcf.gz")
      var iXRL = new XReadLines(intervals)
      var chunk = 1;
-      var subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk))
+      var subListFile : File = null
+      if ( IS_GZ )
+        subListFile = swapExt(tmpdir,variants,".vcf.gz",".chunk%d.list".format(chunk))
+      else
+        subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk))
      var subList = new PrintStream(subListFile)
      var nL = 0;
      var bedOuts : List[File] = Nil;
@ -58,7 +63,7 @@ class VcfToPed extends QScript {
      while ( iXRL.hasNext ) {
        subList.printf("%s%n",iXRL.next())
        nL = nL + 1
-        if ( nL > 100000 ) {
+        if ( nL > 10000 ) {
          val toPed : VariantsToPed = new VariantsToPed
          toPed.memoryLimit = 2
          toPed.reference_sequence = ref
@ -89,7 +94,10 @@ class VcfToPed extends QScript {
          add(toPed)
          subList.close()
          chunk = chunk + 1
-          subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk))
+          if ( IS_GZ  )
+            subListFile = swapExt(tmpdir,variants,".vcf.gz",".chunk%d.list".format(chunk))
+          else
+            subListFile = swapExt(tmpdir,variants,".vcf",".chunk%d.list".format(chunk))
          subList = new PrintStream(subListFile)
          bedOuts :+= tBed
          bimOuts :+= bim