Moving over to 4.0 and away from VCFRecord
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3778 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d896d03554
commit
7e7da75d27
|
|
@ -1,178 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010 The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.vcftools;
|
|
||||||
|
|
||||||
import org.broad.tribble.vcf.*;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extracts subsets of a VCF file like one or more samples, all or only variant loci, all or filtered loci.
|
|
||||||
*/
|
|
||||||
public class VCFSubsetWalker extends RodWalker<ArrayList<VCFRecord>, VCFWriter> {
|
|
||||||
@Argument(fullName="sample", shortName="SN", doc="Sample to include (or nothing to specify all samples)", required=false)
|
|
||||||
private HashSet<String> SAMPLES;
|
|
||||||
|
|
||||||
@Argument(fullName="vcfsubset", shortName="O", doc="File to write VCF subset to", required=false)
|
|
||||||
private File VPATH = null;
|
|
||||||
|
|
||||||
@Argument(fullName="includeNonVariants", shortName="INV", doc="Include non-variant loci", required=false)
|
|
||||||
private boolean INCLUDE_NON_VARIANTS = false;
|
|
||||||
|
|
||||||
@Argument(fullName="includeFiltered", shortName="IF", doc="Include filtered loci", required=false)
|
|
||||||
private boolean INCLUDE_FILTERED = false;
|
|
||||||
|
|
||||||
private VCFHeader vheader = null;
|
|
||||||
private VCFWriter vwriter = null;
|
|
||||||
|
|
||||||
public void initializeWriter() {
|
|
||||||
Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
|
|
||||||
metaData.add(new VCFHeaderLine("source", "VariantsToVCF"));
|
|
||||||
metaData.add(new VCFHeaderLine("reference", this.getToolkit().getArguments().referenceFile.getAbsolutePath()));
|
|
||||||
|
|
||||||
Set<String> additionalColumns = new HashSet<String>();
|
|
||||||
additionalColumns.add("FORMAT");
|
|
||||||
additionalColumns.addAll(SAMPLES);
|
|
||||||
|
|
||||||
vheader = new VCFHeader(metaData, additionalColumns);
|
|
||||||
if (VPATH != null) {
|
|
||||||
vwriter = new VCFWriter(VPATH);
|
|
||||||
vwriter.writeHeader(vheader);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public ArrayList<VCFRecord> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
||||||
ArrayList<VCFRecord> records = new ArrayList<VCFRecord>();
|
|
||||||
|
|
||||||
if (tracker != null) {
|
|
||||||
for (GATKFeature feature : tracker.getAllRods()) {
|
|
||||||
Object rod = feature.getUnderlyingObject();
|
|
||||||
if (rod instanceof VCFRecord) {
|
|
||||||
VCFRecord vcfrod = (VCFRecord) rod;
|
|
||||||
|
|
||||||
if (SAMPLES == null) {
|
|
||||||
SAMPLES = new HashSet<String>();
|
|
||||||
SAMPLES.addAll(vcfrod.getHeader().getGenotypeSamples());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (VPATH != null && vwriter == null) {
|
|
||||||
initializeWriter();
|
|
||||||
}
|
|
||||||
|
|
||||||
//out.println(record.toStringEncoding(vcfrod.getHeader()));
|
|
||||||
|
|
||||||
records.add(vcfrod);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return records;
|
|
||||||
}
|
|
||||||
|
|
||||||
public VCFWriter reduceInit() {
|
|
||||||
return vwriter;
|
|
||||||
}
|
|
||||||
|
|
||||||
private VCFRecord subsetRecord(VCFRecord record) {
|
|
||||||
ArrayList<VCFGenotypeRecord> genotypeRecords = new ArrayList<VCFGenotypeRecord>();
|
|
||||||
HashSet<VCFGenotypeEncoding> genotypeEncodingSet = new HashSet<VCFGenotypeEncoding>();
|
|
||||||
|
|
||||||
for ( VCFGenotypeRecord gr : record.getVCFGenotypeRecords() ) {
|
|
||||||
if (SAMPLES.contains(gr.getSampleName())) {
|
|
||||||
genotypeRecords.add(gr);
|
|
||||||
|
|
||||||
for (VCFGenotypeEncoding allele : gr.getAlleles()) {
|
|
||||||
if (!allele.getBases().equalsIgnoreCase(record.getReference())) {
|
|
||||||
genotypeEncodingSet.add(allele);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ArrayList<VCFGenotypeEncoding> genotypeEncodings = new ArrayList<VCFGenotypeEncoding>();
|
|
||||||
for (VCFGenotypeEncoding allele : genotypeEncodingSet) {
|
|
||||||
if (!allele.getBases().equalsIgnoreCase(".")) {
|
|
||||||
genotypeEncodings.add(allele);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
VCFRecord subset = new VCFRecord(record.getReference(),
|
|
||||||
record.getChr(),
|
|
||||||
record.getStart(),
|
|
||||||
record.getID(),
|
|
||||||
genotypeEncodings,
|
|
||||||
record.getQual(),
|
|
||||||
record.getFilterString(),
|
|
||||||
record.getInfoValues(),
|
|
||||||
record.getGenotypeFormatString(),
|
|
||||||
genotypeRecords);
|
|
||||||
|
|
||||||
return subset;
|
|
||||||
}
|
|
||||||
|
|
||||||
public VCFWriter reduce(ArrayList<VCFRecord> records, VCFWriter writer) {
|
|
||||||
for (VCFRecord record : records) {
|
|
||||||
VCFRecord subset = subsetRecord(record);
|
|
||||||
|
|
||||||
boolean isVariant = false;
|
|
||||||
|
|
||||||
if (subset.getVCFGenotypeRecords().size() > 0) {
|
|
||||||
for ( VCFGenotypeEncoding ge : subset.getVCFGenotypeRecords().get(0).getAlleles() ) {
|
|
||||||
if (!record.getReference().equals(ge.getBases())) {
|
|
||||||
isVariant = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((isVariant || INCLUDE_NON_VARIANTS) && (!subset.isFiltered() || INCLUDE_FILTERED)) {
|
|
||||||
if (vwriter != null) {
|
|
||||||
vwriter.addRecord(subset);
|
|
||||||
} else {
|
|
||||||
out.println(subset.toStringEncoding(vheader));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return writer;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void onTraversalDone(VCFWriter writer) {
|
|
||||||
if (vwriter != null) {
|
|
||||||
vwriter.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2010 The Broad Institute
|
* Copyright (c) 2010.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person
|
* Permission is hereby granted, free of charge, to any person
|
||||||
* obtaining a copy of this software and associated documentation
|
* obtaining a copy of this software and associated documentation
|
||||||
|
|
@ -28,25 +28,30 @@ package org.broadinstitute.sting.playground.gatk.walkers.vcftools;
|
||||||
import org.apache.commons.jexl2.Expression;
|
import org.apache.commons.jexl2.Expression;
|
||||||
import org.apache.commons.jexl2.JexlContext;
|
import org.apache.commons.jexl2.JexlContext;
|
||||||
import org.apache.commons.jexl2.MapContext;
|
import org.apache.commons.jexl2.MapContext;
|
||||||
import org.broad.tribble.vcf.*;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||||
|
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||||
|
import org.broad.tribble.vcf.VCFFilterHeaderLine;
|
||||||
|
import org.broad.tribble.vcf.VCFHeader;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Selects variant calls for output from a user-supplied VCF file using a number of user-selectable, parameterizable criteria. [TODO -- update to new walker style]
|
* Selects variant calls for output from a user-supplied VCF file using a number of user-selectable, parameterizable criteria. [TODO -- update to new walker style]
|
||||||
*/
|
*/
|
||||||
@Requires(value={},referenceMetaData=@RMD(name="variant",type= VCFRecord.class))
|
@Requires(value={},referenceMetaData=@RMD(name="variant", type= ReferenceOrderedDatum.class))
|
||||||
public class VCFSelectWalker extends RodWalker<Integer, Integer> {
|
public class VariantSelect extends RodWalker<Integer, Integer> {
|
||||||
@Argument(fullName="match", shortName="match", doc="Expression used with INFO fields to select VCF records for inclusion in the output VCF(see wiki docs for more info)", required=false)
|
@Argument(fullName="match", shortName="match", doc="Expression used with INFO fields to select VCF records for inclusion in the output VCF(see wiki docs for more info)", required=false)
|
||||||
protected String[] MATCH_STRINGS = new String[]{null};
|
protected String[] MATCH_STRINGS = new String[]{null};
|
||||||
|
|
||||||
|
|
@ -66,21 +71,6 @@ public class VCFSelectWalker extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
private List<MatchExp> matchExpressions = new ArrayList<MatchExp>();
|
private List<MatchExp> matchExpressions = new ArrayList<MatchExp>();
|
||||||
|
|
||||||
private void initializeVcfWriter(VCFRecord record) {
|
|
||||||
// setup the header fields
|
|
||||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
|
||||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
|
||||||
hInfo.add(new VCFHeaderLine("source", "VariantSelect"));
|
|
||||||
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
|
||||||
|
|
||||||
for ( MatchExp exp : matchExpressions ) {
|
|
||||||
hInfo.add(new VCFFilterHeaderLine(exp.name, exp.expStr));
|
|
||||||
}
|
|
||||||
|
|
||||||
writer = new VCFWriter(out);
|
|
||||||
writer.writeHeader(new VCFHeader(hInfo, record.getHeader().getGenotypeSamples()));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
for ( int i = 0; i < MATCH_STRINGS.length; i++ ) {
|
for ( int i = 0; i < MATCH_STRINGS.length; i++ ) {
|
||||||
if ( MATCH_STRINGS[i] != null ) {
|
if ( MATCH_STRINGS[i] != null ) {
|
||||||
|
|
@ -92,6 +82,22 @@ public class VCFSelectWalker extends RodWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// setup the header fields
|
||||||
|
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||||
|
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||||
|
hInfo.add(new VCFHeaderLine("source", "VariantSelect"));
|
||||||
|
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||||
|
|
||||||
|
for ( MatchExp exp : matchExpressions ) {
|
||||||
|
hInfo.add(new VCFFilterHeaderLine(exp.name, exp.expStr));
|
||||||
|
}
|
||||||
|
|
||||||
|
writer = new VCFWriter(out, true);
|
||||||
|
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
|
||||||
|
|
||||||
|
final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||||
|
writer.writeHeader(vcfHeader);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer reduceInit() { return 0; }
|
public Integer reduceInit() { return 0; }
|
||||||
|
|
@ -108,15 +114,15 @@ public class VCFSelectWalker extends RodWalker<Integer, Integer> {
|
||||||
if ( tracker == null )
|
if ( tracker == null )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
VCFRecord variant = tracker.lookup("variant",VCFRecord.class);
|
VariantContext vc = tracker.getVariantContext(ref, "variant", null, context.getLocation(), false);
|
||||||
// ignore places where we don't have a variant
|
// ignore places where we don't have a variant
|
||||||
if ( variant == null )
|
if ( vc == null )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
boolean someoneMatched = false;
|
boolean someoneMatched = false;
|
||||||
for ( MatchExp exp : matchExpressions ) {
|
for ( MatchExp exp : matchExpressions ) {
|
||||||
Map<String, Object> infoMap = new HashMap<String, Object>(variant.getInfoValues());
|
Map<String, Object> infoMap = new HashMap<String, Object>(vc.getAttributes());
|
||||||
infoMap.put("QUAL", String.valueOf(variant.getQual()));
|
infoMap.put("QUAL", String.valueOf(vc.getPhredScaledQual()));
|
||||||
|
|
||||||
JexlContext jContext = new MapContext(infoMap);
|
JexlContext jContext = new MapContext(infoMap);
|
||||||
|
|
||||||
|
|
@ -133,29 +139,16 @@ public class VCFSelectWalker extends RodWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( someoneMatched )
|
if ( someoneMatched )
|
||||||
writeVCF(variant);
|
writer.add(vc, new byte[]{ref.getBase()});
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void writeVCF(VCFRecord variant) {
|
|
||||||
if ( writer == null )
|
|
||||||
initializeVcfWriter(variant);
|
|
||||||
|
|
||||||
writer.addRecord(variant);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Integer reduce(Integer value, Integer sum) {
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
return sum + value;
|
return sum + value;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Tell the user the number of loci processed and close out the new variants file.
|
|
||||||
*
|
|
||||||
* @param result the number of loci seen.
|
|
||||||
*/
|
|
||||||
public void onTraversalDone(Integer result) {
|
public void onTraversalDone(Integer result) {
|
||||||
if ( writer != null )
|
writer.close();
|
||||||
writer.close();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,106 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.vcftools;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||||
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
|
import org.broad.tribble.vcf.VCFHeader;
|
||||||
|
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts subsets of a VCF file like one or more samples, all or only variant loci, all or filtered loci.
|
||||||
|
*/
|
||||||
|
public class VariantSubset extends RodWalker<Integer, Integer> {
|
||||||
|
@Argument(fullName="sample", shortName="SN", doc="Sample to include (or nothing to specify all samples)", required=false)
|
||||||
|
private ArrayList<String> SAMPLES = null;
|
||||||
|
|
||||||
|
@Argument(fullName="includeNonVariants", shortName="INV", doc="Include non-variant loci", required=false)
|
||||||
|
private boolean INCLUDE_NON_VARIANTS = false;
|
||||||
|
|
||||||
|
@Argument(fullName="includeFiltered", shortName="IF", doc="Include filtered loci", required=false)
|
||||||
|
private boolean INCLUDE_FILTERED = false;
|
||||||
|
|
||||||
|
private VCFWriter writer;
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
|
||||||
|
metaData.add(new VCFHeaderLine("source", "VariantsToVCF"));
|
||||||
|
metaData.add(new VCFHeaderLine("reference", this.getToolkit().getArguments().referenceFile.getAbsolutePath()));
|
||||||
|
|
||||||
|
writer = new VCFWriter(out, true);
|
||||||
|
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
|
||||||
|
|
||||||
|
final VCFHeader vcfHeader = new VCFHeader(metaData, samples);
|
||||||
|
writer.writeHeader(vcfHeader);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
Collection<VariantContext> VCs = tracker.getAllVariantContexts(ref, null, context.getLocation(), true, false);
|
||||||
|
for (VariantContext vc : VCs) {
|
||||||
|
VariantContext subset = subsetRecord(vc);
|
||||||
|
|
||||||
|
if ( (vc.isPolymorphic() || INCLUDE_NON_VARIANTS) &&
|
||||||
|
(!subset.isFiltered() || INCLUDE_FILTERED) )
|
||||||
|
writer.add(subset, new byte[]{ref.getBase()});
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduceInit() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private VariantContext subsetRecord(VariantContext vc) {
|
||||||
|
if ( SAMPLES == null || SAMPLES.isEmpty() )
|
||||||
|
return vc;
|
||||||
|
|
||||||
|
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
|
||||||
|
for ( Map.Entry<String, Genotype> genotypePair : vc.getGenotypes().entrySet() ) {
|
||||||
|
if ( SAMPLES.contains(genotypePair.getKey()) )
|
||||||
|
genotypes.add(genotypePair.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
return vc.subContextFromGenotypes(genotypes);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduce(Integer sum, Integer value) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void onTraversalDone(Integer sum) {
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,46 +0,0 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.vcftools;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.WalkerTest;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
public class VCFSelectIntegrationTest extends WalkerTest {
|
|
||||||
|
|
||||||
public static String baseTestString() {
|
|
||||||
return "-T VCFSelect -o %s -R " + oneKGLocation + "reference/human_b36_both.fasta";
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testVCFSelect1() {
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
|
||||||
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'AF == 0.50' -L 1:10001290-10048590 ", 1,
|
|
||||||
Arrays.asList("b49ba344471444077bc6fe3c17e7bc3f"));
|
|
||||||
executeTest("testVCFSelect1", spec);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testVCFSelect2() {
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
|
||||||
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'HomopolymerRun == 6' -L 1:10001290-10048590 ", 1,
|
|
||||||
Arrays.asList("517b4ae7058c3125ad6846c33a1a2e57"));
|
|
||||||
executeTest("testVCFSelect2", spec);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testVCFSelectOr() {
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
|
||||||
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'HomopolymerRun == 6' -match 'AF == 0.50' -L 1:10001290-10048590 ", 1,
|
|
||||||
Arrays.asList("d77d8f938a61abd60fc813ff1a06bb0c"));
|
|
||||||
executeTest("testVCFSelectOr", spec);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testVCFSelectAnd() {
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
|
||||||
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'HomopolymerRun == 6 && AF == 0.50' -L 1:10001290-10048590 ", 1,
|
|
||||||
Arrays.asList("ef05fc766482ffade95f1bbdb777770d"));
|
|
||||||
executeTest("testVCFSelectAnd", spec);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,71 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.vcftools;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.WalkerTest;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
public class VariantSelectIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
|
public static String baseTestString() {
|
||||||
|
return "-T VariantSelect -o %s -R " + oneKGLocation + "reference/human_b36_both.fasta";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testVCFSelect1() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'AF == 0.50' -L 1:10001290-10048590 ", 1,
|
||||||
|
Arrays.asList("8b358e0cfa35de022a37360a6f28a839"));
|
||||||
|
executeTest("testVCFSelect1", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testVCFSelect2() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'HomopolymerRun == 6' -L 1:10001290-10048590 ", 1,
|
||||||
|
Arrays.asList("8e991b9d6d610c8f89c8557756fc8e34"));
|
||||||
|
executeTest("testVCFSelect2", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testVCFSelectOr() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'HomopolymerRun == 6' -match 'AF == 0.50' -L 1:10001290-10048590 ", 1,
|
||||||
|
Arrays.asList("7bd064c8d8bf5389fcd0b78a7c32b599"));
|
||||||
|
executeTest("testVCFSelectOr", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testVCFSelectAnd() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'HomopolymerRun == 6 && AF == 0.50' -L 1:10001290-10048590 ", 1,
|
||||||
|
Arrays.asList("5af565836fa926feaa130715b93188bc"));
|
||||||
|
executeTest("testVCFSelectAnd", spec);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue