diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index 016fde574..3ade0eebf 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -433,4 +433,7 @@ public class VariantContextUtils { return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.getFilters(), attributes); } + public static VariantContext modifyLocation(VariantContext vc, GenomeLoc loc) { + return new VariantContext(vc.getName(), loc, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.getFilters(), vc.getAttributes()); + } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index b974a4ebd..3fbfbc377 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -25,41 +25,46 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broad.tribble.vcf.VCFRecord; -import org.broad.tribble.vcf.VCFCodec; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broad.tribble.vcf.VCFHeader; -import java.util.List; +import java.util.*; /** * Filters a lifted-over VCF file for ref bases that have been changed. */ -@Requires(value={},referenceMetaData=@RMD(name="vcf",type= VCFRecord.class)) +@Requires(value={},referenceMetaData=@RMD(name="variant",type= ReferenceOrderedDatum.class)) public class FilterLiftedVariants extends RodWalker { private VCFWriter writer; private long failedLocs = 0, totalLocs = 0; - public void initialize() {} + public void initialize() { + Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant")); + Map vcfHeaders = SampleUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant")); - private void filterAndWrite(char ref, VCFRecord record) { + writer = new VCFWriter(out, true); + final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples); + writer.writeHeader(vcfHeader); + } + + private void filterAndWrite(byte ref, VariantContext vc) { totalLocs++; - char recordRef = record.getReference().charAt(0); + byte recordRef = vc.getReference().getBases()[0]; if ( recordRef != ref ) { failedLocs++; } else { - if ( writer == null ) { - writer = new VCFWriter(out); - writer.writeHeader(record.getHeader()); - } - writer.addRecord(record); + writer.add(vc, new byte[]{ref}); } } @@ -67,10 +72,9 @@ public class FilterLiftedVariants extends RodWalker { if ( tracker == null ) return 0; - List rods = tracker.getReferenceMetaData("vcf"); - - for ( Object rod : rods ) - filterAndWrite((char)ref.getBase(), (VCFRecord)rod); + Collection VCs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false); + for ( VariantContext vc : VCs ) + filterAndWrite(ref.getBase(), vc); return 0; } @@ -80,8 +84,7 @@ public class FilterLiftedVariants extends RodWalker { public Integer reduce(Integer value, Integer sum) { return 0; } public void onTraversalDone(Integer result) { - if ( writer != null ) - writer.close(); + writer.close(); System.out.println("Filtered " + failedLocs + " records out of " + totalLocs + " total records."); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 1308e5283..8ca828daf 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -26,15 +26,20 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broad.tribble.vcf.VCFRecord; -import org.broad.tribble.vcf.VCFCodec; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broad.tribble.vcf.VCFHeader; import java.io.File; -import java.util.List; +import java.util.*; import net.sf.picard.liftover.LiftOver; import net.sf.picard.util.Interval; @@ -44,7 +49,7 @@ import net.sf.samtools.SAMFileReader; /** * Lifts a VCF file over from one build to another. Note that the resulting VCF could be mis-sorted. */ -@Requires(value={},referenceMetaData=@RMD(name="vcf",type= VCFRecord.class)) +@Requires(value={},referenceMetaData=@RMD(name="variant", type=ReferenceOrderedDatum.class)) public class LiftoverVariants extends RodWalker { @Argument(fullName="chain", shortName="chain", doc="Chain file", required=true) @@ -65,20 +70,23 @@ public class LiftoverVariants extends RodWalker { final SAMFileHeader toHeader = new SAMFileReader(NEW_SEQ_DICT).getFileHeader(); liftOver.validateToSequences(toHeader.getSequenceDictionary()); + + Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant")); + Map vcfHeaders = SampleUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant")); + + writer = new VCFWriter(out, true); + final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples); + writer.writeHeader(vcfHeader); } - private void convertAndWrite(VCFRecord record) { + private void convertAndWrite(VariantContext vc, ReferenceContext ref) { - final Interval fromInterval = new Interval(record.getChr(), record.getStart(), record.getEnd()); + final Interval fromInterval = new Interval(vc.getChr(), vc.getStart(), vc.getEnd()); final Interval toInterval = liftOver.liftOver(fromInterval); if ( toInterval != null ) { - record.setLocation(toInterval.getSequence(), toInterval.getStart()); - if ( writer == null ) { - writer = new VCFWriter(out); - writer.writeHeader(record.getHeader()); - } - writer.addRecord(record); + vc = VariantContextUtils.modifyLocation(vc, GenomeLocParser.createGenomeLoc(toInterval.getSequence(), toInterval.getStart(), toInterval.getEnd())); + writer.add(vc, new byte[]{ref.getBase()}); successfulIntervals++; } else { failedIntervals++; @@ -89,10 +97,9 @@ public class LiftoverVariants extends RodWalker { if ( tracker == null ) return 0; - List rods = tracker.getReferenceMetaData("vcf"); - - for ( Object rod : rods ) - convertAndWrite((VCFRecord)rod); + Collection VCs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false); + for ( VariantContext vc : VCs ) + convertAndWrite(vc, ref); return 0; } @@ -102,8 +109,7 @@ public class LiftoverVariants extends RodWalker { public Integer reduce(Integer value, Integer sum) { return 0; } public void onTraversalDone(Integer result) { - if ( writer != null ) - writer.close(); + writer.close(); System.out.println("Converted " + successfulIntervals + " records; failed to convert " + failedIntervals + " records."); } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BeagleOutputToVCFWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BeagleOutputToVCFWalker.java index 6d37b48d5..d228de697 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BeagleOutputToVCFWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BeagleOutputToVCFWalker.java @@ -82,7 +82,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { hInfo.add(new VCFHeaderLine("source", "BeagleImputation")); // Open output file specified by output VCF ROD - vcfWriter = new VCFWriter(new File(OUTPUT_FILE)); + vcfWriter = new VCFWriter(new File(OUTPUT_FILE), true); Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(INPUT_ROD_NAME)); final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java index 470880c27..e74664018 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java @@ -168,10 +168,10 @@ public class VCFUtils { if (! (compLine).equalsExcludingDescription(compOther) ) throw new IllegalStateException("Incompatible header types, collision between these two types: " + line + " " + other ); if ( ! compLine.getDescription().equals(compOther) ) - logger.warn(String.format("Allowing unequal description fields through: keeping " + compOther + " excluding " + compLine)); + if ( logger != null ) logger.warn(String.format("Allowing unequal description fields through: keeping " + compOther + " excluding " + compLine)); } else { // we are not equal, but we're not anything special either - logger.warn(String.format("Ignoring header line already in map: this header line = " + line + " already present header = " + other)); + if ( logger != null ) logger.warn(String.format("Ignoring header line already in map: this header line = " + line + " already present header = " + other)); } } else { line.setVersion(VCFHeaderVersion.VCF4_0); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java index 63047555e..367d4daf8 100644 --- a/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java @@ -4,6 +4,7 @@ import org.broad.tribble.vcf.VCFHeader; import org.broad.tribble.vcf.VCFHeaderLine; import org.broadinstitute.sting.gatk.refdata.features.vcf4.VCF4Codec; import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderUnitTest; +import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils; import org.junit.Assert; import org.junit.Test; @@ -74,7 +75,7 @@ public class CombineVariantsUnitTest { ArrayList headers = new ArrayList(); headers.add(one); headers.add(two); - Set lines = CombineVariants.smartMergeHeaders(headers); + Set lines = VCFUtils.smartMergeHeaders(headers, null); Assert.assertEquals(VCFHeaderUnitTest.VCF4headerStrings.length,lines.size()); } @@ -85,7 +86,7 @@ public class CombineVariantsUnitTest { ArrayList headers = new ArrayList(); headers.add(one); headers.add(two); - Set lines = CombineVariants.smartMergeHeaders(headers); + Set lines = VCFUtils.smartMergeHeaders(headers, null); Assert.assertEquals(VCFHeaderUnitTest.VCF4headerStrings.length,lines.size()); } @@ -96,7 +97,7 @@ public class CombineVariantsUnitTest { ArrayList headers = new ArrayList(); headers.add(one); headers.add(two); - Set lines = CombineVariants.smartMergeHeaders(headers); + Set lines = VCFUtils.smartMergeHeaders(headers, null); Assert.assertEquals(VCFHeaderUnitTest.VCF4headerStrings.length,lines.size()); } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java new file mode 100755 index 000000000..9e1284332 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2010. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.variantutils; + +import org.broadinstitute.sting.WalkerTest; +import org.junit.Test; + +import java.util.Arrays; + +/** + * Tests LiftoverVariants + */ +public class LiftoverVariantsIntegrationTest extends WalkerTest { + + @Test + public void testb36Tohg19() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T LiftoverVariants -o %s -R " + oneKGLocation + "reference/human_b36_both.fasta -B variant,vcf," + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict " + oneKGLocation + "reference/human_g1k_v37.dict", + 1, + Arrays.asList("aa347e1d379ed090cbd54eef829611f9")); + executeTest("test b36 to hg19", spec); + } +} \ No newline at end of file diff --git a/perl/liftOverVCF.pl b/perl/liftOverVCF.pl index 42f56e632..f0934b9ca 100755 --- a/perl/liftOverVCF.pl +++ b/perl/liftOverVCF.pl @@ -34,7 +34,7 @@ my $unsorted_vcf = "$tmp_prefix.unsorted.vcf"; # lift over the file print "Lifting over the vcf..."; -my $cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T LiftoverVariants -R $oldRef.fasta -B vcf,vcf,$in -o $unsorted_vcf -chain $chain -dict $newRef.dict"; +my $cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T LiftoverVariants -R $oldRef.fasta -B variant,vcf,$in -o $unsorted_vcf -chain $chain -dict $newRef.dict"; system($cmd); # we need to sort the lifted over file now @@ -61,7 +61,7 @@ close(SORTED); # Filter the VCF for bad records print "\nFixing/removing bad records...\n"; -$cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T FilterLiftedVariants -R $newRef.fasta -B vcf,vcf,$sorted_vcf -o $out"; +$cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T FilterLiftedVariants -R $newRef.fasta -B variant,vcf,$sorted_vcf -o $out"; system($cmd); # clean up