Hooking up the liftover tool to the new on-the-fly sorting VCF writer so that records can now get emitted in order.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4499 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d41c252b13
commit
9f54170dff
|
|
@ -25,18 +25,20 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||||
|
|
||||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
import org.broad.tribble.vcf.VCFWriter;
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||||
import org.broad.tribble.vcf.VCFHeader;
|
import org.broad.tribble.vcf.VCFHeader;
|
||||||
|
import org.broad.tribble.vcf.SortingVCFWriter;
|
||||||
|
import org.broad.tribble.TribbleException;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -53,7 +55,8 @@ import net.sf.samtools.SAMFileReader;
|
||||||
public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
@Output(doc="File to which variants should be written",required=true)
|
@Output(doc="File to which variants should be written",required=true)
|
||||||
protected VCFWriter writer = null;
|
protected File file = null;
|
||||||
|
protected SortingVCFWriter writer = null;
|
||||||
|
|
||||||
@Argument(fullName="chain", shortName="chain", doc="Chain file", required=true)
|
@Argument(fullName="chain", shortName="chain", doc="Chain file", required=true)
|
||||||
protected File CHAIN = null;
|
protected File CHAIN = null;
|
||||||
|
|
@ -61,6 +64,9 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
||||||
@Argument(fullName="newSequenceDictionary", shortName="dict", doc="Sequence .dict file for the new build", required=true)
|
@Argument(fullName="newSequenceDictionary", shortName="dict", doc="Sequence .dict file for the new build", required=true)
|
||||||
protected File NEW_SEQ_DICT = null;
|
protected File NEW_SEQ_DICT = null;
|
||||||
|
|
||||||
|
@Argument(fullName="maxCachingDistance", shortName="cache", doc="Maximum genomic distance for which we will cache variants", required=false)
|
||||||
|
protected Integer maxCachingDistance = 50000;
|
||||||
|
|
||||||
private LiftOver liftOver;
|
private LiftOver liftOver;
|
||||||
|
|
||||||
private long successfulIntervals = 0, failedIntervals = 0;
|
private long successfulIntervals = 0, failedIntervals = 0;
|
||||||
|
|
@ -76,6 +82,8 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
||||||
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
|
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
|
||||||
|
|
||||||
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples);
|
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples);
|
||||||
|
|
||||||
|
writer = new SortingVCFWriter(file, maxCachingDistance);
|
||||||
writer.writeHeader(vcfHeader);
|
writer.writeHeader(vcfHeader);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -87,7 +95,11 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
if ( toInterval != null ) {
|
if ( toInterval != null ) {
|
||||||
vc = VariantContextUtils.modifyLocation(vc, GenomeLocParser.createPotentiallyInvalidGenomeLoc(toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length));
|
vc = VariantContextUtils.modifyLocation(vc, GenomeLocParser.createPotentiallyInvalidGenomeLoc(toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length));
|
||||||
writer.add(vc, ref.getBase());
|
try {
|
||||||
|
writer.add(vc, ref.getBase());
|
||||||
|
} catch (TribbleException e) {
|
||||||
|
throw new UserException(e.getMessage());
|
||||||
|
}
|
||||||
successfulIntervals++;
|
successfulIntervals++;
|
||||||
} else {
|
} else {
|
||||||
failedIntervals++;
|
failedIntervals++;
|
||||||
|
|
@ -110,6 +122,8 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
||||||
public Integer reduce(Integer value, Integer sum) { return 0; }
|
public Integer reduce(Integer value, Integer sum) { return 0; }
|
||||||
|
|
||||||
public void onTraversalDone(Integer result) {
|
public void onTraversalDone(Integer result) {
|
||||||
|
writer.flush();
|
||||||
|
writer.close();
|
||||||
System.out.println("Converted " + successfulIntervals + " records; failed to convert " + failedIntervals + " records.");
|
System.out.println("Converted " + successfulIntervals + " records; failed to convert " + failedIntervals + " records.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testb36Tohg19() {
|
public void testb36Tohg19() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T LiftoverVariants -o %s -R " + b36KGReference + " -B:variant,vcf " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict -NO_HEADER",
|
"-T LiftoverVariants -o %s -R " + b36KGReference + " -B:variant,vcf " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("1637877892a019061e74eb3d9a9d100f"));
|
Arrays.asList("1637877892a019061e74eb3d9a9d100f"));
|
||||||
executeTest("test b36 to hg19", spec);
|
executeTest("test b36 to hg19", spec);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue