Hooking up the liftover tool to the new on-the-fly sorting VCF writer so that records can now get emitted in order.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4499 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-10-15 07:27:01 +00:00
parent d41c252b13
commit 9f54170dff
2 changed files with 18 additions and 4 deletions

View File

@ -25,18 +25,20 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.VCFWriter;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.SortingVCFWriter;
import org.broad.tribble.TribbleException;
import java.io.File;
import java.util.*;
@ -53,7 +55,8 @@ import net.sf.samtools.SAMFileReader;
public class LiftoverVariants extends RodWalker<Integer, Integer> {
@Output(doc="File to which variants should be written",required=true)
protected VCFWriter writer = null;
protected File file = null;
protected SortingVCFWriter writer = null;
@Argument(fullName="chain", shortName="chain", doc="Chain file", required=true)
protected File CHAIN = null;
@ -61,6 +64,9 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="newSequenceDictionary", shortName="dict", doc="Sequence .dict file for the new build", required=true)
protected File NEW_SEQ_DICT = null;
@Argument(fullName="maxCachingDistance", shortName="cache", doc="Maximum genomic distance for which we will cache variants", required=false)
protected Integer maxCachingDistance = 50000;
private LiftOver liftOver;
private long successfulIntervals = 0, failedIntervals = 0;
@ -76,6 +82,8 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples);
writer = new SortingVCFWriter(file, maxCachingDistance);
writer.writeHeader(vcfHeader);
}
@ -87,7 +95,11 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
if ( toInterval != null ) {
vc = VariantContextUtils.modifyLocation(vc, GenomeLocParser.createPotentiallyInvalidGenomeLoc(toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length));
writer.add(vc, ref.getBase());
try {
writer.add(vc, ref.getBase());
} catch (TribbleException e) {
throw new UserException(e.getMessage());
}
successfulIntervals++;
} else {
failedIntervals++;
@ -110,6 +122,8 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
public Integer reduce(Integer value, Integer sum) { return 0; }
public void onTraversalDone(Integer result) {
writer.flush();
writer.close();
System.out.println("Converted " + successfulIntervals + " records; failed to convert " + failedIntervals + " records.");
}
}

View File

@ -38,7 +38,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
@Test
public void testb36Tohg19() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T LiftoverVariants -o %s -R " + b36KGReference + " -B:variant,vcf " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict -NO_HEADER",
"-T LiftoverVariants -o %s -R " + b36KGReference + " -B:variant,vcf " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
1,
Arrays.asList("1637877892a019061e74eb3d9a9d100f"));
executeTest("test b36 to hg19", spec);