deleting vcf
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3693 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4195fc5c4e
commit
aa20c52b88
|
|
@ -1,133 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.vcf;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Combines VCF records from different sources; supports both full merges and set unions.
|
||||
* Merge: combines multiple records into a single one; if sample names overlap then they are uniquified.
|
||||
* Union: assumes each rod represents the same set of samples (although this is not enforced); using the
|
||||
* priority list (if provided), emits a single record instance at every position represented in the rods.
|
||||
*/
|
||||
@Requires(value={})
|
||||
public class VCFCombine extends RodWalker<Integer, Integer> {
|
||||
// the types of combinations we currently allow
|
||||
public enum ComboType { UNION, MERGE }
|
||||
@Argument(fullName="combination_type", shortName="type", doc="combination type; MERGE are supported", required=true)
|
||||
protected ComboType COMBO_TYPE;
|
||||
|
||||
@Argument(fullName="rod_priority_list", shortName="priority", doc="When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided", required=true)
|
||||
protected String PRIORITY_STRING = null;
|
||||
|
||||
private VCFWriter vcfWriter = null;
|
||||
private List<String> priority = null;
|
||||
protected EnumSet<VariantContextUtils.MergeType> mergeOptions;
|
||||
|
||||
protected final static EnumSet<VariantContextUtils.MergeType> mergeTypeOptions = EnumSet.of(VariantContextUtils.MergeType.UNION_VARIANTS, VariantContextUtils.MergeType.UNIQUIFY_GENOTYPES);
|
||||
protected final static EnumSet<VariantContextUtils.MergeType> unionTypeOptions = EnumSet.of(VariantContextUtils.MergeType.UNION_VARIANTS, VariantContextUtils.MergeType.PRIORITIZE_GENOTYPES);
|
||||
|
||||
public void initialize() {
|
||||
|
||||
//Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
//hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
|
||||
vcfWriter = new VCFWriter(out);
|
||||
priority = new ArrayList<String>(Arrays.asList(PRIORITY_STRING.split(",")));
|
||||
|
||||
validateAnnotateUnionArguments(priority);
|
||||
mergeOptions = COMBO_TYPE == ComboType.MERGE ? mergeTypeOptions : unionTypeOptions;
|
||||
Set<String> samples = getSampleList(SampleUtils.getRodsWithVCFHeader(getToolkit(), null), mergeOptions);
|
||||
|
||||
Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
|
||||
metaData.add(new VCFHeaderLine("source", "VCFCombine"));
|
||||
vcfWriter.writeHeader(new VCFHeader(metaData, samples));
|
||||
}
|
||||
|
||||
private Set<String> getSampleList(Map<String, VCFHeader> headers, EnumSet<VariantContextUtils.MergeType> mergeOptions ) {
|
||||
Set<String> samples = new HashSet<String>();
|
||||
for ( Map.Entry<String, VCFHeader> val : headers.entrySet() ) {
|
||||
VCFHeader header = val.getValue();
|
||||
for ( String sample : header.getGenotypeSamples() ) {
|
||||
samples.add(VariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOptions.contains(VariantContextUtils.MergeType.UNIQUIFY_GENOTYPES)));
|
||||
}
|
||||
}
|
||||
|
||||
return samples;
|
||||
}
|
||||
|
||||
private void validateAnnotateUnionArguments(List<String> priority) {
|
||||
Set<String> rodNames = SampleUtils.getRodsNamesWithVCFHeader(getToolkit(), null);
|
||||
if ( priority == null || rodNames.size() != priority.size() )
|
||||
throw new StingException("A complete priority list must be provided when annotateUnion is provided");
|
||||
|
||||
if ( ! rodNames.containsAll(rodNames) )
|
||||
throw new StingException("Not all priority elements provided as input RODs: " + PRIORITY_STRING);
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null ) // RodWalkers can make funky map calls
|
||||
return 0;
|
||||
|
||||
// get all of the vcf rods at this locus
|
||||
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
|
||||
VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, priority, mergeOptions, true);
|
||||
if ( mergedVC != null ) // only operate at the start of events
|
||||
if ( ! mergedVC.isMixed() ) // todo remove restriction when VCF4 writer is fixed
|
||||
vcfWriter.add(mergedVC, ref.getBases());
|
||||
else
|
||||
logger.info(String.format("Ignoring complex event: " + mergedVC));
|
||||
|
||||
return vcs.isEmpty() ? 0 : 1;
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public Integer reduce(Integer counter, Integer sum) {
|
||||
return counter + sum;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer sum) {
|
||||
if ( vcfWriter != null )
|
||||
vcfWriter.close();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,88 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.vcf;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broad.tribble.vcf.VCFCodec;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Filters a lifted-over VCF file for ref bases that have been changed.
|
||||
*/
|
||||
@Requires(value={},referenceMetaData=@RMD(name="vcf",type= VCFRecord.class))
|
||||
public class FilterLiftedVCF extends RodWalker<Integer, Integer> {
|
||||
|
||||
private VCFWriter writer;
|
||||
|
||||
private long failedLocs = 0, totalLocs = 0;
|
||||
|
||||
public void initialize() {}
|
||||
|
||||
private void filterAndWrite(char ref, VCFRecord record) {
|
||||
|
||||
totalLocs++;
|
||||
|
||||
char recordRef = record.getReference().charAt(0);
|
||||
|
||||
if ( recordRef != ref ) {
|
||||
failedLocs++;
|
||||
} else {
|
||||
if ( writer == null ) {
|
||||
writer = new VCFWriter(out);
|
||||
writer.writeHeader(record.getHeader());
|
||||
}
|
||||
writer.addRecord(record);
|
||||
}
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null )
|
||||
return 0;
|
||||
|
||||
List<Object> rods = tracker.getReferenceMetaData("vcf");
|
||||
|
||||
for ( Object rod : rods )
|
||||
filterAndWrite((char)ref.getBase(), (VCFRecord)rod);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) { return 0; }
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
if ( writer != null )
|
||||
writer.close();
|
||||
System.out.println("Filtered " + failedLocs + " records out of " + totalLocs + " total records.");
|
||||
}
|
||||
}
|
||||
|
|
@ -1,110 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.vcf;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broad.tribble.vcf.VCFCodec;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
|
||||
import net.sf.picard.liftover.LiftOver;
|
||||
import net.sf.picard.util.Interval;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
|
||||
/**
|
||||
* Lifts a VCF file over from one build to another. Note that the resulting VCF could be mis-sorted.
|
||||
*/
|
||||
@Requires(value={},referenceMetaData=@RMD(name="vcf",type= VCFRecord.class))
|
||||
public class LiftoverVCF extends RodWalker<Integer, Integer> {
|
||||
|
||||
@Argument(fullName="chain", shortName="chain", doc="Chain file", required=true)
|
||||
protected File CHAIN = null;
|
||||
|
||||
@Argument(fullName="newSequenceDictionary", shortName="dict", doc="Sequence .dict file for the new build", required=true)
|
||||
protected File NEW_SEQ_DICT = null;
|
||||
|
||||
private VCFWriter writer;
|
||||
|
||||
private LiftOver liftOver;
|
||||
|
||||
private long successfulIntervals = 0, failedIntervals = 0;
|
||||
|
||||
public void initialize() {
|
||||
liftOver = new LiftOver(CHAIN);
|
||||
liftOver.setLiftOverMinMatch(LiftOver.DEFAULT_LIFTOVER_MINMATCH);
|
||||
|
||||
final SAMFileHeader toHeader = new SAMFileReader(NEW_SEQ_DICT).getFileHeader();
|
||||
liftOver.validateToSequences(toHeader.getSequenceDictionary());
|
||||
}
|
||||
|
||||
private void convertAndWrite(VCFRecord record) {
|
||||
|
||||
final Interval fromInterval = new Interval(record.getChr(), record.getStart(), record.getEnd());
|
||||
final Interval toInterval = liftOver.liftOver(fromInterval);
|
||||
|
||||
if ( toInterval != null ) {
|
||||
record.setLocation(toInterval.getSequence(), toInterval.getStart());
|
||||
if ( writer == null ) {
|
||||
writer = new VCFWriter(out);
|
||||
writer.writeHeader(record.getHeader());
|
||||
}
|
||||
writer.addRecord(record);
|
||||
successfulIntervals++;
|
||||
} else {
|
||||
failedIntervals++;
|
||||
}
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null )
|
||||
return 0;
|
||||
|
||||
List<Object> rods = tracker.getReferenceMetaData("vcf");
|
||||
|
||||
for ( Object rod : rods )
|
||||
convertAndWrite((VCFRecord)rod);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) { return 0; }
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
if ( writer != null )
|
||||
writer.close();
|
||||
System.out.println("Converted " + successfulIntervals + " records; failed to convert " + failedIntervals + " records.");
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue