1. Updated tests and added integration test for liftover code.

2. Updated liftover code (and scripts) to emit vcf 4.0 and no longer depend on VCFRecord.
3. Beagle walker now also emits vcf 4.0.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3767 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-07-12 17:58:18 +00:00
parent 2a7112302a
commit e50627a49e
8 changed files with 103 additions and 44 deletions

View File

@ -433,4 +433,7 @@ public class VariantContextUtils {
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.getFilters(), attributes);
}
public static VariantContext modifyLocation(VariantContext vc, GenomeLoc loc) {
return new VariantContext(vc.getName(), loc, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.getFilters(), vc.getAttributes());
}
}

View File

@ -25,41 +25,46 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broad.tribble.vcf.VCFRecord;
import org.broad.tribble.vcf.VCFCodec;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broad.tribble.vcf.VCFHeader;
import java.util.List;
import java.util.*;
/**
* Filters a lifted-over VCF file for ref bases that have been changed.
*/
@Requires(value={},referenceMetaData=@RMD(name="vcf",type= VCFRecord.class))
@Requires(value={},referenceMetaData=@RMD(name="variant",type= ReferenceOrderedDatum.class))
public class FilterLiftedVariants extends RodWalker<Integer, Integer> {
private VCFWriter writer;
private long failedLocs = 0, totalLocs = 0;
public void initialize() {}
public void initialize() {
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
Map<String, VCFHeader> vcfHeaders = SampleUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
private void filterAndWrite(char ref, VCFRecord record) {
writer = new VCFWriter(out, true);
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples);
writer.writeHeader(vcfHeader);
}
private void filterAndWrite(byte ref, VariantContext vc) {
totalLocs++;
char recordRef = record.getReference().charAt(0);
byte recordRef = vc.getReference().getBases()[0];
if ( recordRef != ref ) {
failedLocs++;
} else {
if ( writer == null ) {
writer = new VCFWriter(out);
writer.writeHeader(record.getHeader());
}
writer.addRecord(record);
writer.add(vc, new byte[]{ref});
}
}
@ -67,10 +72,9 @@ public class FilterLiftedVariants extends RodWalker<Integer, Integer> {
if ( tracker == null )
return 0;
List<Object> rods = tracker.getReferenceMetaData("vcf");
for ( Object rod : rods )
filterAndWrite((char)ref.getBase(), (VCFRecord)rod);
Collection<VariantContext> VCs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false);
for ( VariantContext vc : VCs )
filterAndWrite(ref.getBase(), vc);
return 0;
}
@ -80,8 +84,7 @@ public class FilterLiftedVariants extends RodWalker<Integer, Integer> {
public Integer reduce(Integer value, Integer sum) { return 0; }
public void onTraversalDone(Integer result) {
if ( writer != null )
writer.close();
writer.close();
System.out.println("Filtered " + failedLocs + " records out of " + totalLocs + " total records.");
}
}

View File

@ -26,15 +26,20 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broad.tribble.vcf.VCFRecord;
import org.broad.tribble.vcf.VCFCodec;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broad.tribble.vcf.VCFHeader;
import java.io.File;
import java.util.List;
import java.util.*;
import net.sf.picard.liftover.LiftOver;
import net.sf.picard.util.Interval;
@ -44,7 +49,7 @@ import net.sf.samtools.SAMFileReader;
/**
* Lifts a VCF file over from one build to another. Note that the resulting VCF could be mis-sorted.
*/
@Requires(value={},referenceMetaData=@RMD(name="vcf",type= VCFRecord.class))
@Requires(value={},referenceMetaData=@RMD(name="variant", type=ReferenceOrderedDatum.class))
public class LiftoverVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="chain", shortName="chain", doc="Chain file", required=true)
@ -65,20 +70,23 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
final SAMFileHeader toHeader = new SAMFileReader(NEW_SEQ_DICT).getFileHeader();
liftOver.validateToSequences(toHeader.getSequenceDictionary());
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
Map<String, VCFHeader> vcfHeaders = SampleUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
writer = new VCFWriter(out, true);
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples);
writer.writeHeader(vcfHeader);
}
private void convertAndWrite(VCFRecord record) {
private void convertAndWrite(VariantContext vc, ReferenceContext ref) {
final Interval fromInterval = new Interval(record.getChr(), record.getStart(), record.getEnd());
final Interval fromInterval = new Interval(vc.getChr(), vc.getStart(), vc.getEnd());
final Interval toInterval = liftOver.liftOver(fromInterval);
if ( toInterval != null ) {
record.setLocation(toInterval.getSequence(), toInterval.getStart());
if ( writer == null ) {
writer = new VCFWriter(out);
writer.writeHeader(record.getHeader());
}
writer.addRecord(record);
vc = VariantContextUtils.modifyLocation(vc, GenomeLocParser.createGenomeLoc(toInterval.getSequence(), toInterval.getStart(), toInterval.getEnd()));
writer.add(vc, new byte[]{ref.getBase()});
successfulIntervals++;
} else {
failedIntervals++;
@ -89,10 +97,9 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
if ( tracker == null )
return 0;
List<Object> rods = tracker.getReferenceMetaData("vcf");
for ( Object rod : rods )
convertAndWrite((VCFRecord)rod);
Collection<VariantContext> VCs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false);
for ( VariantContext vc : VCs )
convertAndWrite(vc, ref);
return 0;
}
@ -102,8 +109,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
public Integer reduce(Integer value, Integer sum) { return 0; }
public void onTraversalDone(Integer result) {
if ( writer != null )
writer.close();
writer.close();
System.out.println("Converted " + successfulIntervals + " records; failed to convert " + failedIntervals + " records.");
}
}

View File

@ -82,7 +82,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
hInfo.add(new VCFHeaderLine("source", "BeagleImputation"));
// Open output file specified by output VCF ROD
vcfWriter = new VCFWriter(new File(OUTPUT_FILE));
vcfWriter = new VCFWriter(new File(OUTPUT_FILE), true);
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(INPUT_ROD_NAME));
final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);

View File

@ -168,10 +168,10 @@ public class VCFUtils {
if (! (compLine).equalsExcludingDescription(compOther) )
throw new IllegalStateException("Incompatible header types, collision between these two types: " + line + " " + other );
if ( ! compLine.getDescription().equals(compOther) )
logger.warn(String.format("Allowing unequal description fields through: keeping " + compOther + " excluding " + compLine));
if ( logger != null ) logger.warn(String.format("Allowing unequal description fields through: keeping " + compOther + " excluding " + compLine));
} else {
// we are not equal, but we're not anything special either
logger.warn(String.format("Ignoring header line already in map: this header line = " + line + " already present header = " + other));
if ( logger != null ) logger.warn(String.format("Ignoring header line already in map: this header line = " + line + " already present header = " + other));
}
} else {
line.setVersion(VCFHeaderVersion.VCF4_0);

View File

@ -4,6 +4,7 @@ import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broadinstitute.sting.gatk.refdata.features.vcf4.VCF4Codec;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderUnitTest;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.junit.Assert;
import org.junit.Test;
@ -74,7 +75,7 @@ public class CombineVariantsUnitTest {
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
headers.add(one);
headers.add(two);
Set<VCFHeaderLine> lines = CombineVariants.smartMergeHeaders(headers);
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, null);
Assert.assertEquals(VCFHeaderUnitTest.VCF4headerStrings.length,lines.size());
}
@ -85,7 +86,7 @@ public class CombineVariantsUnitTest {
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
headers.add(one);
headers.add(two);
Set<VCFHeaderLine> lines = CombineVariants.smartMergeHeaders(headers);
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, null);
Assert.assertEquals(VCFHeaderUnitTest.VCF4headerStrings.length,lines.size());
}
@ -96,7 +97,7 @@ public class CombineVariantsUnitTest {
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
headers.add(one);
headers.add(two);
Set<VCFHeaderLine> lines = CombineVariants.smartMergeHeaders(headers);
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, null);
Assert.assertEquals(VCFHeaderUnitTest.VCF4headerStrings.length,lines.size());
}
}

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.WalkerTest;
import org.junit.Test;
import java.util.Arrays;
/**
* Tests LiftoverVariants
*/
public class LiftoverVariantsIntegrationTest extends WalkerTest {
@Test
public void testb36Tohg19() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T LiftoverVariants -o %s -R " + oneKGLocation + "reference/human_b36_both.fasta -B variant,vcf," + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict " + oneKGLocation + "reference/human_g1k_v37.dict",
1,
Arrays.asList("aa347e1d379ed090cbd54eef829611f9"));
executeTest("test b36 to hg19", spec);
}
}

View File

@ -34,7 +34,7 @@ my $unsorted_vcf = "$tmp_prefix.unsorted.vcf";
# lift over the file
print "Lifting over the vcf...";
my $cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T LiftoverVariants -R $oldRef.fasta -B vcf,vcf,$in -o $unsorted_vcf -chain $chain -dict $newRef.dict";
my $cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T LiftoverVariants -R $oldRef.fasta -B variant,vcf,$in -o $unsorted_vcf -chain $chain -dict $newRef.dict";
system($cmd);
# we need to sort the lifted over file now
@ -61,7 +61,7 @@ close(SORTED);
# Filter the VCF for bad records
print "\nFixing/removing bad records...\n";
$cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T FilterLiftedVariants -R $newRef.fasta -B vcf,vcf,$sorted_vcf -o $out";
$cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T FilterLiftedVariants -R $newRef.fasta -B variant,vcf,$sorted_vcf -o $out";
system($cmd);
# clean up