renaming part 2...
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3692 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
6c9da5525d
commit
4195fc5c4e
|
|
@ -0,0 +1,133 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.vcf;
|
||||||
|
|
||||||
|
import org.broad.tribble.vcf.VCFHeader;
|
||||||
|
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Combines VCF records from different sources; supports both full merges and set unions.
|
||||||
|
* Merge: combines multiple records into a single one; if sample names overlap then they are uniquified.
|
||||||
|
* Union: assumes each rod represents the same set of samples (although this is not enforced); using the
|
||||||
|
* priority list (if provided), emits a single record instance at every position represented in the rods.
|
||||||
|
*/
|
||||||
|
@Requires(value={})
|
||||||
|
public class VCFCombine extends RodWalker<Integer, Integer> {
|
||||||
|
// the types of combinations we currently allow
|
||||||
|
public enum ComboType { UNION, MERGE }
|
||||||
|
@Argument(fullName="combination_type", shortName="type", doc="combination type; MERGE are supported", required=true)
|
||||||
|
protected ComboType COMBO_TYPE;
|
||||||
|
|
||||||
|
@Argument(fullName="rod_priority_list", shortName="priority", doc="When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided", required=true)
|
||||||
|
protected String PRIORITY_STRING = null;
|
||||||
|
|
||||||
|
private VCFWriter vcfWriter = null;
|
||||||
|
private List<String> priority = null;
|
||||||
|
protected EnumSet<VariantContextUtils.MergeType> mergeOptions;
|
||||||
|
|
||||||
|
protected final static EnumSet<VariantContextUtils.MergeType> mergeTypeOptions = EnumSet.of(VariantContextUtils.MergeType.UNION_VARIANTS, VariantContextUtils.MergeType.UNIQUIFY_GENOTYPES);
|
||||||
|
protected final static EnumSet<VariantContextUtils.MergeType> unionTypeOptions = EnumSet.of(VariantContextUtils.MergeType.UNION_VARIANTS, VariantContextUtils.MergeType.PRIORITIZE_GENOTYPES);
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
|
||||||
|
//Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||||
|
//hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||||
|
|
||||||
|
vcfWriter = new VCFWriter(out);
|
||||||
|
priority = new ArrayList<String>(Arrays.asList(PRIORITY_STRING.split(",")));
|
||||||
|
|
||||||
|
validateAnnotateUnionArguments(priority);
|
||||||
|
mergeOptions = COMBO_TYPE == ComboType.MERGE ? mergeTypeOptions : unionTypeOptions;
|
||||||
|
Set<String> samples = getSampleList(SampleUtils.getRodsWithVCFHeader(getToolkit(), null), mergeOptions);
|
||||||
|
|
||||||
|
Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
|
||||||
|
metaData.add(new VCFHeaderLine("source", "VCFCombine"));
|
||||||
|
vcfWriter.writeHeader(new VCFHeader(metaData, samples));
|
||||||
|
}
|
||||||
|
|
||||||
|
private Set<String> getSampleList(Map<String, VCFHeader> headers, EnumSet<VariantContextUtils.MergeType> mergeOptions ) {
|
||||||
|
Set<String> samples = new HashSet<String>();
|
||||||
|
for ( Map.Entry<String, VCFHeader> val : headers.entrySet() ) {
|
||||||
|
VCFHeader header = val.getValue();
|
||||||
|
for ( String sample : header.getGenotypeSamples() ) {
|
||||||
|
samples.add(VariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOptions.contains(VariantContextUtils.MergeType.UNIQUIFY_GENOTYPES)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return samples;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void validateAnnotateUnionArguments(List<String> priority) {
|
||||||
|
Set<String> rodNames = SampleUtils.getRodsNamesWithVCFHeader(getToolkit(), null);
|
||||||
|
if ( priority == null || rodNames.size() != priority.size() )
|
||||||
|
throw new StingException("A complete priority list must be provided when annotateUnion is provided");
|
||||||
|
|
||||||
|
if ( ! rodNames.containsAll(rodNames) )
|
||||||
|
throw new StingException("Not all priority elements provided as input RODs: " + PRIORITY_STRING);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
if ( tracker == null ) // RodWalkers can make funky map calls
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
// get all of the vcf rods at this locus
|
||||||
|
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
|
||||||
|
VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, priority, mergeOptions, true);
|
||||||
|
if ( mergedVC != null ) // only operate at the start of events
|
||||||
|
if ( ! mergedVC.isMixed() ) // todo remove restriction when VCF4 writer is fixed
|
||||||
|
vcfWriter.add(mergedVC, ref.getBases());
|
||||||
|
else
|
||||||
|
logger.info(String.format("Ignoring complex event: " + mergedVC));
|
||||||
|
|
||||||
|
return vcs.isEmpty() ? 0 : 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduceInit() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduce(Integer counter, Integer sum) {
|
||||||
|
return counter + sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void onTraversalDone(Integer sum) {
|
||||||
|
if ( vcfWriter != null )
|
||||||
|
vcfWriter.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,88 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.vcf;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broad.tribble.vcf.VCFRecord;
|
||||||
|
import org.broad.tribble.vcf.VCFCodec;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filters a lifted-over VCF file for ref bases that have been changed.
|
||||||
|
*/
|
||||||
|
@Requires(value={},referenceMetaData=@RMD(name="vcf",type= VCFRecord.class))
|
||||||
|
public class FilterLiftedVCF extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
|
private VCFWriter writer;
|
||||||
|
|
||||||
|
private long failedLocs = 0, totalLocs = 0;
|
||||||
|
|
||||||
|
public void initialize() {}
|
||||||
|
|
||||||
|
private void filterAndWrite(char ref, VCFRecord record) {
|
||||||
|
|
||||||
|
totalLocs++;
|
||||||
|
|
||||||
|
char recordRef = record.getReference().charAt(0);
|
||||||
|
|
||||||
|
if ( recordRef != ref ) {
|
||||||
|
failedLocs++;
|
||||||
|
} else {
|
||||||
|
if ( writer == null ) {
|
||||||
|
writer = new VCFWriter(out);
|
||||||
|
writer.writeHeader(record.getHeader());
|
||||||
|
}
|
||||||
|
writer.addRecord(record);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
if ( tracker == null )
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
List<Object> rods = tracker.getReferenceMetaData("vcf");
|
||||||
|
|
||||||
|
for ( Object rod : rods )
|
||||||
|
filterAndWrite((char)ref.getBase(), (VCFRecord)rod);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduceInit() { return 0; }
|
||||||
|
|
||||||
|
public Integer reduce(Integer value, Integer sum) { return 0; }
|
||||||
|
|
||||||
|
public void onTraversalDone(Integer result) {
|
||||||
|
if ( writer != null )
|
||||||
|
writer.close();
|
||||||
|
System.out.println("Filtered " + failedLocs + " records out of " + totalLocs + " total records.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,110 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.vcf;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broad.tribble.vcf.VCFRecord;
|
||||||
|
import org.broad.tribble.vcf.VCFCodec;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import net.sf.picard.liftover.LiftOver;
|
||||||
|
import net.sf.picard.util.Interval;
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMFileReader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lifts a VCF file over from one build to another. Note that the resulting VCF could be mis-sorted.
|
||||||
|
*/
|
||||||
|
@Requires(value={},referenceMetaData=@RMD(name="vcf",type= VCFRecord.class))
|
||||||
|
public class LiftoverVCF extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
|
@Argument(fullName="chain", shortName="chain", doc="Chain file", required=true)
|
||||||
|
protected File CHAIN = null;
|
||||||
|
|
||||||
|
@Argument(fullName="newSequenceDictionary", shortName="dict", doc="Sequence .dict file for the new build", required=true)
|
||||||
|
protected File NEW_SEQ_DICT = null;
|
||||||
|
|
||||||
|
private VCFWriter writer;
|
||||||
|
|
||||||
|
private LiftOver liftOver;
|
||||||
|
|
||||||
|
private long successfulIntervals = 0, failedIntervals = 0;
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
liftOver = new LiftOver(CHAIN);
|
||||||
|
liftOver.setLiftOverMinMatch(LiftOver.DEFAULT_LIFTOVER_MINMATCH);
|
||||||
|
|
||||||
|
final SAMFileHeader toHeader = new SAMFileReader(NEW_SEQ_DICT).getFileHeader();
|
||||||
|
liftOver.validateToSequences(toHeader.getSequenceDictionary());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void convertAndWrite(VCFRecord record) {
|
||||||
|
|
||||||
|
final Interval fromInterval = new Interval(record.getChr(), record.getStart(), record.getEnd());
|
||||||
|
final Interval toInterval = liftOver.liftOver(fromInterval);
|
||||||
|
|
||||||
|
if ( toInterval != null ) {
|
||||||
|
record.setLocation(toInterval.getSequence(), toInterval.getStart());
|
||||||
|
if ( writer == null ) {
|
||||||
|
writer = new VCFWriter(out);
|
||||||
|
writer.writeHeader(record.getHeader());
|
||||||
|
}
|
||||||
|
writer.addRecord(record);
|
||||||
|
successfulIntervals++;
|
||||||
|
} else {
|
||||||
|
failedIntervals++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
if ( tracker == null )
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
List<Object> rods = tracker.getReferenceMetaData("vcf");
|
||||||
|
|
||||||
|
for ( Object rod : rods )
|
||||||
|
convertAndWrite((VCFRecord)rod);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduceInit() { return 0; }
|
||||||
|
|
||||||
|
public Integer reduce(Integer value, Integer sum) { return 0; }
|
||||||
|
|
||||||
|
public void onTraversalDone(Integer result) {
|
||||||
|
if ( writer != null )
|
||||||
|
writer.close();
|
||||||
|
System.out.println("Converted " + successfulIntervals + " records; failed to convert " + failedIntervals + " records.");
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue