diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java new file mode 100755 index 000000000..8ef032f35 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.vcf; + +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.utils.genotype.vcf.*; + +import java.util.*; + +/** + * Combines VCF records from different sources; supports both full merges and set unions. + * Merge: combines multiple records into a single one; if sample names overlap then they are uniquified. + * Union: assumes each rod represents the same set of samples (although this is not enforced); using the + * priority list (if provided), emits a single record instance at every position represented in the rods. + */ +@Requires(value={}) +public class VCFCombine extends RodWalker { + // the types of combinations we currently allow + public enum ComboType { UNION, MERGE } + @Argument(fullName="combination_type", shortName="type", doc="combination type; MERGE are supported", required=true) + protected ComboType COMBO_TYPE; + + @Argument(fullName="rod_priority_list", shortName="priority", doc="When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided", required=true) + protected String PRIORITY_STRING = null; + + private VCFWriter vcfWriter = null; + private List priority = null; + protected EnumSet mergeOptions; + + protected final static EnumSet mergeTypeOptions = EnumSet.of(VariantContextUtils.MergeType.UNION_VARIANTS, VariantContextUtils.MergeType.UNIQUIFY_GENOTYPES); + protected final static EnumSet unionTypeOptions = EnumSet.of(VariantContextUtils.MergeType.UNION_VARIANTS, VariantContextUtils.MergeType.PRIORITIZE_GENOTYPES); + + public void initialize() { + + //Set hInfo = new HashSet(); + //hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); + + vcfWriter = new VCFWriter(out); + priority = new ArrayList(Arrays.asList(PRIORITY_STRING.split(","))); + + validateAnnotateUnionArguments(priority); + mergeOptions = COMBO_TYPE == ComboType.MERGE ? mergeTypeOptions : unionTypeOptions; + Set samples = getSampleList(SampleUtils.getRodsWithVCFHeader(getToolkit(), null), mergeOptions); + + Set metaData = new HashSet(); + metaData.add(new VCFHeaderLine("source", "VCFCombine")); + vcfWriter.writeHeader(new VCFHeader(metaData, samples)); + } + + private Set getSampleList(Map headers, EnumSet mergeOptions ) { + Set samples = new HashSet(); + for ( Map.Entry val : headers.entrySet() ) { + VCFHeader header = val.getValue(); + for ( String sample : header.getGenotypeSamples() ) { + samples.add(VariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOptions.contains(VariantContextUtils.MergeType.UNIQUIFY_GENOTYPES))); + } + } + + return samples; + } + + private void validateAnnotateUnionArguments(List priority) { + Set rodNames = SampleUtils.getRodsNamesWithVCFHeader(getToolkit(), null); + if ( priority == null || rodNames.size() != priority.size() ) + throw new StingException("A complete priority list must be provided when annotateUnion is provided"); + + if ( ! rodNames.containsAll(rodNames) ) + throw new StingException("Not all priority elements provided as input RODs: " + PRIORITY_STRING); + } + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null ) // RodWalkers can make funky map calls + return 0; + + // get all of the vcf rods at this locus + Collection vcs = tracker.getAllVariantContexts(ref, context.getLocation()); + VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, priority, mergeOptions, true); + if ( mergedVC != null ) // only operate at the start of events + if ( ! mergedVC.isMixed() ) // todo remove restriction when VCF4 writer is fixed + vcfWriter.add(mergedVC, ref.getBases()); + else + logger.info(String.format("Ignoring complex event: " + mergedVC)); + + return vcs.isEmpty() ? 0 : 1; + } + + public Integer reduceInit() { + return 0; + } + + public Integer reduce(Integer counter, Integer sum) { + return counter + sum; + } + + public void onTraversalDone(Integer sum) { + if ( vcfWriter != null ) + vcfWriter.close(); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVCF.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVCF.java new file mode 100755 index 000000000..c30dc1ee2 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVCF.java @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2010. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.vcf; + +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broad.tribble.vcf.VCFRecord; +import org.broad.tribble.vcf.VCFCodec; + +import java.util.List; + +/** + * Filters a lifted-over VCF file for ref bases that have been changed. + */ +@Requires(value={},referenceMetaData=@RMD(name="vcf",type= VCFRecord.class)) +public class FilterLiftedVCF extends RodWalker { + + private VCFWriter writer; + + private long failedLocs = 0, totalLocs = 0; + + public void initialize() {} + + private void filterAndWrite(char ref, VCFRecord record) { + + totalLocs++; + + char recordRef = record.getReference().charAt(0); + + if ( recordRef != ref ) { + failedLocs++; + } else { + if ( writer == null ) { + writer = new VCFWriter(out); + writer.writeHeader(record.getHeader()); + } + writer.addRecord(record); + } + } + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null ) + return 0; + + List rods = tracker.getReferenceMetaData("vcf"); + + for ( Object rod : rods ) + filterAndWrite((char)ref.getBase(), (VCFRecord)rod); + + return 0; + } + + public Integer reduceInit() { return 0; } + + public Integer reduce(Integer value, Integer sum) { return 0; } + + public void onTraversalDone(Integer result) { + if ( writer != null ) + writer.close(); + System.out.println("Filtered " + failedLocs + " records out of " + totalLocs + " total records."); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVCF.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVCF.java new file mode 100755 index 000000000..d2700f1a4 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVCF.java @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2010. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.vcf; + +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broad.tribble.vcf.VCFRecord; +import org.broad.tribble.vcf.VCFCodec; + +import java.io.File; +import java.util.List; + +import net.sf.picard.liftover.LiftOver; +import net.sf.picard.util.Interval; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMFileReader; + +/** + * Lifts a VCF file over from one build to another. Note that the resulting VCF could be mis-sorted. + */ +@Requires(value={},referenceMetaData=@RMD(name="vcf",type= VCFRecord.class)) +public class LiftoverVCF extends RodWalker { + + @Argument(fullName="chain", shortName="chain", doc="Chain file", required=true) + protected File CHAIN = null; + + @Argument(fullName="newSequenceDictionary", shortName="dict", doc="Sequence .dict file for the new build", required=true) + protected File NEW_SEQ_DICT = null; + + private VCFWriter writer; + + private LiftOver liftOver; + + private long successfulIntervals = 0, failedIntervals = 0; + + public void initialize() { + liftOver = new LiftOver(CHAIN); + liftOver.setLiftOverMinMatch(LiftOver.DEFAULT_LIFTOVER_MINMATCH); + + final SAMFileHeader toHeader = new SAMFileReader(NEW_SEQ_DICT).getFileHeader(); + liftOver.validateToSequences(toHeader.getSequenceDictionary()); + } + + private void convertAndWrite(VCFRecord record) { + + final Interval fromInterval = new Interval(record.getChr(), record.getStart(), record.getEnd()); + final Interval toInterval = liftOver.liftOver(fromInterval); + + if ( toInterval != null ) { + record.setLocation(toInterval.getSequence(), toInterval.getStart()); + if ( writer == null ) { + writer = new VCFWriter(out); + writer.writeHeader(record.getHeader()); + } + writer.addRecord(record); + successfulIntervals++; + } else { + failedIntervals++; + } + } + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null ) + return 0; + + List rods = tracker.getReferenceMetaData("vcf"); + + for ( Object rod : rods ) + convertAndWrite((VCFRecord)rod); + + return 0; + } + + public Integer reduceInit() { return 0; } + + public Integer reduce(Integer value, Integer sum) { return 0; } + + public void onTraversalDone(Integer result) { + if ( writer != null ) + writer.close(); + System.out.println("Converted " + successfulIntervals + " records; failed to convert " + failedIntervals + " records."); + } +}