deleting vcf
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3693 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4195fc5c4e
commit
aa20c52b88
|
|
@ -1,133 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010 The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.vcf;
|
|
||||||
|
|
||||||
import org.broad.tribble.vcf.VCFHeader;
|
|
||||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Combines VCF records from different sources; supports both full merges and set unions.
|
|
||||||
* Merge: combines multiple records into a single one; if sample names overlap then they are uniquified.
|
|
||||||
* Union: assumes each rod represents the same set of samples (although this is not enforced); using the
|
|
||||||
* priority list (if provided), emits a single record instance at every position represented in the rods.
|
|
||||||
*/
|
|
||||||
@Requires(value={})
|
|
||||||
public class VCFCombine extends RodWalker<Integer, Integer> {
|
|
||||||
// the types of combinations we currently allow
|
|
||||||
public enum ComboType { UNION, MERGE }
|
|
||||||
@Argument(fullName="combination_type", shortName="type", doc="combination type; MERGE are supported", required=true)
|
|
||||||
protected ComboType COMBO_TYPE;
|
|
||||||
|
|
||||||
@Argument(fullName="rod_priority_list", shortName="priority", doc="When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided", required=true)
|
|
||||||
protected String PRIORITY_STRING = null;
|
|
||||||
|
|
||||||
private VCFWriter vcfWriter = null;
|
|
||||||
private List<String> priority = null;
|
|
||||||
protected EnumSet<VariantContextUtils.MergeType> mergeOptions;
|
|
||||||
|
|
||||||
protected final static EnumSet<VariantContextUtils.MergeType> mergeTypeOptions = EnumSet.of(VariantContextUtils.MergeType.UNION_VARIANTS, VariantContextUtils.MergeType.UNIQUIFY_GENOTYPES);
|
|
||||||
protected final static EnumSet<VariantContextUtils.MergeType> unionTypeOptions = EnumSet.of(VariantContextUtils.MergeType.UNION_VARIANTS, VariantContextUtils.MergeType.PRIORITIZE_GENOTYPES);
|
|
||||||
|
|
||||||
public void initialize() {
|
|
||||||
|
|
||||||
//Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
|
||||||
//hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
|
||||||
|
|
||||||
vcfWriter = new VCFWriter(out);
|
|
||||||
priority = new ArrayList<String>(Arrays.asList(PRIORITY_STRING.split(",")));
|
|
||||||
|
|
||||||
validateAnnotateUnionArguments(priority);
|
|
||||||
mergeOptions = COMBO_TYPE == ComboType.MERGE ? mergeTypeOptions : unionTypeOptions;
|
|
||||||
Set<String> samples = getSampleList(SampleUtils.getRodsWithVCFHeader(getToolkit(), null), mergeOptions);
|
|
||||||
|
|
||||||
Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
|
|
||||||
metaData.add(new VCFHeaderLine("source", "VCFCombine"));
|
|
||||||
vcfWriter.writeHeader(new VCFHeader(metaData, samples));
|
|
||||||
}
|
|
||||||
|
|
||||||
private Set<String> getSampleList(Map<String, VCFHeader> headers, EnumSet<VariantContextUtils.MergeType> mergeOptions ) {
|
|
||||||
Set<String> samples = new HashSet<String>();
|
|
||||||
for ( Map.Entry<String, VCFHeader> val : headers.entrySet() ) {
|
|
||||||
VCFHeader header = val.getValue();
|
|
||||||
for ( String sample : header.getGenotypeSamples() ) {
|
|
||||||
samples.add(VariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOptions.contains(VariantContextUtils.MergeType.UNIQUIFY_GENOTYPES)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return samples;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void validateAnnotateUnionArguments(List<String> priority) {
|
|
||||||
Set<String> rodNames = SampleUtils.getRodsNamesWithVCFHeader(getToolkit(), null);
|
|
||||||
if ( priority == null || rodNames.size() != priority.size() )
|
|
||||||
throw new StingException("A complete priority list must be provided when annotateUnion is provided");
|
|
||||||
|
|
||||||
if ( ! rodNames.containsAll(rodNames) )
|
|
||||||
throw new StingException("Not all priority elements provided as input RODs: " + PRIORITY_STRING);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
||||||
if ( tracker == null ) // RodWalkers can make funky map calls
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
// get all of the vcf rods at this locus
|
|
||||||
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
|
|
||||||
VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, priority, mergeOptions, true);
|
|
||||||
if ( mergedVC != null ) // only operate at the start of events
|
|
||||||
if ( ! mergedVC.isMixed() ) // todo remove restriction when VCF4 writer is fixed
|
|
||||||
vcfWriter.add(mergedVC, ref.getBases());
|
|
||||||
else
|
|
||||||
logger.info(String.format("Ignoring complex event: " + mergedVC));
|
|
||||||
|
|
||||||
return vcs.isEmpty() ? 0 : 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer reduceInit() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer reduce(Integer counter, Integer sum) {
|
|
||||||
return counter + sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void onTraversalDone(Integer sum) {
|
|
||||||
if ( vcfWriter != null )
|
|
||||||
vcfWriter.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,88 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.vcf;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broad.tribble.vcf.VCFRecord;
|
|
||||||
import org.broad.tribble.vcf.VCFCodec;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Filters a lifted-over VCF file for ref bases that have been changed.
|
|
||||||
*/
|
|
||||||
@Requires(value={},referenceMetaData=@RMD(name="vcf",type= VCFRecord.class))
|
|
||||||
public class FilterLiftedVCF extends RodWalker<Integer, Integer> {
|
|
||||||
|
|
||||||
private VCFWriter writer;
|
|
||||||
|
|
||||||
private long failedLocs = 0, totalLocs = 0;
|
|
||||||
|
|
||||||
public void initialize() {}
|
|
||||||
|
|
||||||
private void filterAndWrite(char ref, VCFRecord record) {
|
|
||||||
|
|
||||||
totalLocs++;
|
|
||||||
|
|
||||||
char recordRef = record.getReference().charAt(0);
|
|
||||||
|
|
||||||
if ( recordRef != ref ) {
|
|
||||||
failedLocs++;
|
|
||||||
} else {
|
|
||||||
if ( writer == null ) {
|
|
||||||
writer = new VCFWriter(out);
|
|
||||||
writer.writeHeader(record.getHeader());
|
|
||||||
}
|
|
||||||
writer.addRecord(record);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
||||||
if ( tracker == null )
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
List<Object> rods = tracker.getReferenceMetaData("vcf");
|
|
||||||
|
|
||||||
for ( Object rod : rods )
|
|
||||||
filterAndWrite((char)ref.getBase(), (VCFRecord)rod);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer reduceInit() { return 0; }
|
|
||||||
|
|
||||||
public Integer reduce(Integer value, Integer sum) { return 0; }
|
|
||||||
|
|
||||||
public void onTraversalDone(Integer result) {
|
|
||||||
if ( writer != null )
|
|
||||||
writer.close();
|
|
||||||
System.out.println("Filtered " + failedLocs + " records out of " + totalLocs + " total records.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,110 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.vcf;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broad.tribble.vcf.VCFRecord;
|
|
||||||
import org.broad.tribble.vcf.VCFCodec;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import net.sf.picard.liftover.LiftOver;
|
|
||||||
import net.sf.picard.util.Interval;
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
|
||||||
import net.sf.samtools.SAMFileReader;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Lifts a VCF file over from one build to another. Note that the resulting VCF could be mis-sorted.
|
|
||||||
*/
|
|
||||||
@Requires(value={},referenceMetaData=@RMD(name="vcf",type= VCFRecord.class))
|
|
||||||
public class LiftoverVCF extends RodWalker<Integer, Integer> {
|
|
||||||
|
|
||||||
@Argument(fullName="chain", shortName="chain", doc="Chain file", required=true)
|
|
||||||
protected File CHAIN = null;
|
|
||||||
|
|
||||||
@Argument(fullName="newSequenceDictionary", shortName="dict", doc="Sequence .dict file for the new build", required=true)
|
|
||||||
protected File NEW_SEQ_DICT = null;
|
|
||||||
|
|
||||||
private VCFWriter writer;
|
|
||||||
|
|
||||||
private LiftOver liftOver;
|
|
||||||
|
|
||||||
private long successfulIntervals = 0, failedIntervals = 0;
|
|
||||||
|
|
||||||
public void initialize() {
|
|
||||||
liftOver = new LiftOver(CHAIN);
|
|
||||||
liftOver.setLiftOverMinMatch(LiftOver.DEFAULT_LIFTOVER_MINMATCH);
|
|
||||||
|
|
||||||
final SAMFileHeader toHeader = new SAMFileReader(NEW_SEQ_DICT).getFileHeader();
|
|
||||||
liftOver.validateToSequences(toHeader.getSequenceDictionary());
|
|
||||||
}
|
|
||||||
|
|
||||||
private void convertAndWrite(VCFRecord record) {
|
|
||||||
|
|
||||||
final Interval fromInterval = new Interval(record.getChr(), record.getStart(), record.getEnd());
|
|
||||||
final Interval toInterval = liftOver.liftOver(fromInterval);
|
|
||||||
|
|
||||||
if ( toInterval != null ) {
|
|
||||||
record.setLocation(toInterval.getSequence(), toInterval.getStart());
|
|
||||||
if ( writer == null ) {
|
|
||||||
writer = new VCFWriter(out);
|
|
||||||
writer.writeHeader(record.getHeader());
|
|
||||||
}
|
|
||||||
writer.addRecord(record);
|
|
||||||
successfulIntervals++;
|
|
||||||
} else {
|
|
||||||
failedIntervals++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
||||||
if ( tracker == null )
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
List<Object> rods = tracker.getReferenceMetaData("vcf");
|
|
||||||
|
|
||||||
for ( Object rod : rods )
|
|
||||||
convertAndWrite((VCFRecord)rod);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer reduceInit() { return 0; }
|
|
||||||
|
|
||||||
public Integer reduce(Integer value, Integer sum) { return 0; }
|
|
||||||
|
|
||||||
public void onTraversalDone(Integer result) {
|
|
||||||
if ( writer != null )
|
|
||||||
writer.close();
|
|
||||||
System.out.println("Converted " + successfulIntervals + " records; failed to convert " + failedIntervals + " records.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Loading…
Reference in New Issue