VariantFilteration now accepts a VCF rod in addition to an input geli. It will then annotate this VCF file with filtering information in the INFO field too. --OnlyAnnotate will not write in filtering output
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2008 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f9819d5f13
commit
d316cbad4c
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.contexts;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RodGeliText;
|
import org.broadinstitute.sting.gatk.refdata.RodGeliText;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -37,19 +38,22 @@ public class VariantContext {
|
||||||
private ReferenceContext ref;
|
private ReferenceContext ref;
|
||||||
private AlignmentContext context;
|
private AlignmentContext context;
|
||||||
private RodGeliText variant;
|
private RodGeliText variant;
|
||||||
|
private RodVCF variantVCF;
|
||||||
private AlignmentContext Q0freeContext = null;
|
private AlignmentContext Q0freeContext = null;
|
||||||
|
|
||||||
public VariantContext(RefMetaDataTracker tracker, ReferenceContext ref,
|
public VariantContext(RefMetaDataTracker tracker, ReferenceContext ref,
|
||||||
AlignmentContext context, RodGeliText variant) {
|
AlignmentContext context, RodGeliText variant, RodVCF variantVCF) {
|
||||||
this.tracker = tracker;
|
this.tracker = tracker;
|
||||||
this.ref = ref;
|
this.ref = ref;
|
||||||
this.context = context;
|
this.context = context;
|
||||||
this.variant = variant;
|
this.variant = variant;
|
||||||
|
this.variantVCF = variantVCF;
|
||||||
}
|
}
|
||||||
|
|
||||||
public RefMetaDataTracker getTracker() { return tracker; }
|
public RefMetaDataTracker getTracker() { return tracker; }
|
||||||
public ReferenceContext getReferenceContext() { return ref; }
|
public ReferenceContext getReferenceContext() { return ref; }
|
||||||
public RodGeliText getVariant() { return variant; }
|
public RodGeliText getVariant() { return variant; }
|
||||||
|
public RodVCF getVariantVCF() { return variantVCF; }
|
||||||
public AlignmentContext getAlignmentContext(boolean useMQ0Reads) {
|
public AlignmentContext getAlignmentContext(boolean useMQ0Reads) {
|
||||||
return (useMQ0Reads ? context : getQ0freeContext());
|
return (useMQ0Reads ? context : getQ0freeContext());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -80,4 +80,7 @@ public class VECAlleleBalance extends RatioFilter {
|
||||||
return "AlleleBalance";
|
return "AlleleBalance";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getScoreString() {
|
||||||
|
return String.format("%.2f", ratio);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -60,5 +60,9 @@ public class VECClusteredSnps implements VariantExclusionCriterion {
|
||||||
return "ClusteredSnp";
|
return "ClusteredSnp";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getScoreString() {
|
||||||
|
return String.format("%d", distance);
|
||||||
|
}
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
public boolean useZeroQualityReads() { return false; }
|
||||||
}
|
}
|
||||||
|
|
@ -50,5 +50,10 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion {
|
||||||
return "DoC";
|
return "DoC";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getScoreString() {
|
||||||
|
return String.format("%d", depth);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
public boolean useZeroQualityReads() { return false; }
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,10 @@ public class VECFisherStrand implements VariantExclusionCriterion {
|
||||||
return "strand";
|
return "strand";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getScoreString() {
|
||||||
|
return String.format("%.4f",pValue);
|
||||||
|
}
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
public boolean useZeroQualityReads() { return false; }
|
||||||
|
|
||||||
public boolean strandTest(char ref, AlignmentContext context, int allele1, int allele2, double threshold, StringBuffer out) {
|
public boolean strandTest(char ref, AlignmentContext context, int allele1, int allele2, double threshold, StringBuffer out) {
|
||||||
|
|
|
||||||
|
|
@ -119,4 +119,8 @@ public class VECHomopolymer implements VariantExclusionCriterion {
|
||||||
public String getVCFFilterString() {
|
public String getVCFFilterString() {
|
||||||
return "homopolymer";
|
return "homopolymer";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getScoreString() {
|
||||||
|
return exclude ? "1" : "0";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -59,5 +59,9 @@ public class VECIndelArtifact implements VariantExclusionCriterion {
|
||||||
return "InDel";
|
return "InDel";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getScoreString() {
|
||||||
|
return exclude ? "1" : "0";
|
||||||
|
}
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
public boolean useZeroQualityReads() { return false; }
|
||||||
}
|
}
|
||||||
|
|
@ -38,4 +38,8 @@ public class VECLodThreshold implements VariantExclusionCriterion {
|
||||||
public String getVCFFilterString() {
|
public String getVCFFilterString() {
|
||||||
return "LOD";
|
return "LOD";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getScoreString() {
|
||||||
|
return String.format("%.2f",lod);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,10 @@ public class VECLodThresholdByCoverage implements VariantExclusionCriterion {
|
||||||
return "LODbyDoC";
|
return "LODbyDoC";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getScoreString() {
|
||||||
|
return exclude ? "1" : "0";
|
||||||
|
}
|
||||||
|
|
||||||
public boolean useZeroQualityReads() {
|
public boolean useZeroQualityReads() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -45,5 +45,9 @@ public class VECMappingQuality implements VariantExclusionCriterion {
|
||||||
return "MQ";
|
return "MQ";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getScoreString() {
|
||||||
|
return String.format("%.2f", rms);
|
||||||
|
}
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return true; }
|
public boolean useZeroQualityReads() { return true; }
|
||||||
}
|
}
|
||||||
|
|
@ -45,5 +45,9 @@ public class VECMappingQualityZero implements VariantExclusionCriterion {
|
||||||
return "MQzero";
|
return "MQzero";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getScoreString() {
|
||||||
|
return String.format("%d", mq0Count);
|
||||||
|
}
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return true; }
|
public boolean useZeroQualityReads() { return true; }
|
||||||
}
|
}
|
||||||
|
|
@ -24,6 +24,10 @@ public class VECNull implements VariantExclusionCriterion {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getScoreString() {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
public boolean useZeroQualityReads() {
|
public boolean useZeroQualityReads() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -75,4 +75,8 @@ public class VECOnOffGenotypeRatio extends RatioFilter {
|
||||||
public String getVCFFilterString() {
|
public String getVCFFilterString() {
|
||||||
return "onOffGenotype";
|
return "onOffGenotype";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getScoreString() {
|
||||||
|
return String.format("%.2f", ratio);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ public interface VariantExclusionCriterion {
|
||||||
public String getStudyHeader();
|
public String getStudyHeader();
|
||||||
public String getStudyInfo();
|
public String getStudyInfo();
|
||||||
public String getVCFFilterString();
|
public String getVCFFilterString();
|
||||||
|
public String getScoreString();
|
||||||
|
|
||||||
public boolean useZeroQualityReads();
|
public boolean useZeroQualityReads();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ import java.io.*;
|
||||||
* variant outright. At the moment, the variants are expected to be in gelitext format.
|
* variant outright. At the moment, the variants are expected to be in gelitext format.
|
||||||
*/
|
*/
|
||||||
@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type= RodGeliText.class))
|
@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type= RodGeliText.class))
|
||||||
|
//@Allows(value={DataSource.READS, DataSource.REFERENCE}, referenceMetaData={@RMD(name="variant",type= RodGeliText.class), @RMD(name="variantvcf",type= RodVCF.class), @RMD(name="dbsnp",type=rodDbSNP.class), @RMD(name="interval",type= IntervalRod.class)})
|
||||||
public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
@Argument(fullName="vcfOutput", shortName="vcf", doc="VCF file to which all variants should be written with annotations", required=true) public File VCF_OUT;
|
@Argument(fullName="vcfOutput", shortName="vcf", doc="VCF file to which all variants should be written with annotations", required=true) public File VCF_OUT;
|
||||||
@Argument(fullName="sampleName", shortName="sample", doc="Temporary hack to get VCF to work: the sample (NA-ID) corresponding to the variants", required=true) public String sampleName;
|
@Argument(fullName="sampleName", shortName="sample", doc="Temporary hack to get VCF to work: the sample (NA-ID) corresponding to the variants", required=true) public String sampleName;
|
||||||
|
|
@ -31,6 +32,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
@Argument(fullName="inclusion_threshold", shortName="IT", doc="The product of the probability to include variants based on these filters must be greater than the value specified here in order to be included", required=false) public Double INCLUSION_THRESHOLD = 0.9;
|
@Argument(fullName="inclusion_threshold", shortName="IT", doc="The product of the probability to include variants based on these filters must be greater than the value specified here in order to be included", required=false) public Double INCLUSION_THRESHOLD = 0.9;
|
||||||
@Argument(fullName="verbose", shortName="V", doc="Show how the variant likelihoods are changing with the application of each feature") public Boolean VERBOSE = false;
|
@Argument(fullName="verbose", shortName="V", doc="Show how the variant likelihoods are changing with the application of each feature") public Boolean VERBOSE = false;
|
||||||
@Argument(fullName="list", shortName="ls", doc="List the available features and exclusion criteria and exit") public Boolean LIST = false;
|
@Argument(fullName="list", shortName="ls", doc="List the available features and exclusion criteria and exit") public Boolean LIST = false;
|
||||||
|
@Argument(fullName="onlyAnnotate", shortName="oa", doc="If provided, do not write output into the FILTER field, but only annotate the VCF") public boolean onlyAnnotate = false;
|
||||||
|
|
||||||
private List<Class<? extends IndependentVariantFeature>> featureClasses;
|
private List<Class<? extends IndependentVariantFeature>> featureClasses;
|
||||||
private List<Class<? extends VariantExclusionCriterion>> exclusionClasses;
|
private List<Class<? extends VariantExclusionCriterion>> exclusionClasses;
|
||||||
|
|
@ -220,7 +222,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
if ( variant == null || BaseUtils.simpleBaseToBaseIndex(ref.getBase()) == -1 )
|
if ( variant == null || BaseUtils.simpleBaseToBaseIndex(ref.getBase()) == -1 )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
VariantContext varContext = new VariantContext(tracker, ref, context, variant);
|
RodVCF variantVCF = (RodVCF) tracker.lookup("variantVCF", null);
|
||||||
|
VariantContext varContext = new VariantContext(tracker, ref, context, variant, variantVCF);
|
||||||
|
|
||||||
// if we're still initializing the context, do so
|
// if we're still initializing the context, do so
|
||||||
if ( windowInitializer != null ) {
|
if ( windowInitializer != null ) {
|
||||||
|
|
@ -242,22 +245,20 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
VariantContext context = variantContextWindow.getContext();
|
VariantContext context = variantContextWindow.getContext();
|
||||||
if ( context == null )
|
if ( context == null )
|
||||||
return;
|
return;
|
||||||
RodGeliText variant = context.getVariant();
|
|
||||||
|
|
||||||
HashMap<String, Double> exclusionResults = new HashMap<String, Double>();
|
HashMap<String, Double> exclusionResults = new HashMap<String, Double>();
|
||||||
|
RodGeliText variant = context.getVariant();
|
||||||
|
GenomeLoc loc = context.getAlignmentContext(true).getLocation();
|
||||||
|
|
||||||
if (VERBOSE) { out.println("Original:\n" + variant); }
|
if (VERBOSE) { out.println("Original:\n" + variant); }
|
||||||
|
|
||||||
GenomeLoc loc = context.getAlignmentContext(true).getLocation();
|
|
||||||
if ( annotatedWriter != null )
|
if ( annotatedWriter != null )
|
||||||
annotatedWriter.print(loc.getContig() + "\t" + loc.getStart() + "\t");
|
annotatedWriter.print(loc.getContig() + "\t" + loc.getStart() + "\t");
|
||||||
|
|
||||||
// Apply features that modify the likelihoods and LOD scores
|
// Apply features that modify the likelihoods and LOD scores
|
||||||
for ( IndependentVariantFeature ivf : requestedFeatures ) {
|
for ( IndependentVariantFeature ivf : requestedFeatures ) {
|
||||||
ivf.compute(variantContextWindow);
|
ivf.compute(variantContextWindow);
|
||||||
|
|
||||||
double[] weights = ivf.getLikelihoods();
|
double[] weights = ivf.getLikelihoods();
|
||||||
|
|
||||||
variant.adjustLikelihoods(weights);
|
variant.adjustLikelihoods(weights);
|
||||||
|
|
||||||
if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); }
|
if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); }
|
||||||
|
|
@ -273,6 +274,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
|
|
||||||
// Use the filters to score the variant
|
// Use the filters to score the variant
|
||||||
String filterFailureString = "";
|
String filterFailureString = "";
|
||||||
|
HashMap<String,String> filterINFOString = new HashMap<String,String>();
|
||||||
double jointInclusionProbability = 1.0;
|
double jointInclusionProbability = 1.0;
|
||||||
for ( VariantExclusionCriterion vec : requestedExclusions ) {
|
for ( VariantExclusionCriterion vec : requestedExclusions ) {
|
||||||
vec.compute(variantContextWindow);
|
vec.compute(variantContextWindow);
|
||||||
|
|
@ -286,6 +288,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
if (inclusionProbability < INCLUSION_THRESHOLD) {
|
if (inclusionProbability < INCLUSION_THRESHOLD) {
|
||||||
filterFailureString += vec.getVCFFilterString() + ";";
|
filterFailureString += vec.getVCFFilterString() + ";";
|
||||||
}
|
}
|
||||||
|
filterINFOString.put(vec.getVCFFilterString(), vec.getScoreString());
|
||||||
|
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
out.print(exclusionClassName + "=" + inclusionProbability + ";");
|
out.print(exclusionClassName + "=" + inclusionProbability + ";");
|
||||||
|
|
@ -298,14 +301,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
// Decide whether we should keep the call or not
|
// Decide whether we should keep the call or not
|
||||||
if (jointInclusionProbability >= INCLUSION_THRESHOLD) {
|
if (jointInclusionProbability >= INCLUSION_THRESHOLD) {
|
||||||
includedWriter.println(variant);
|
includedWriter.println(variant);
|
||||||
|
|
||||||
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); }
|
|
||||||
} else {
|
|
||||||
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:excluded\n"); }
|
|
||||||
}
|
}
|
||||||
|
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); }
|
||||||
|
|
||||||
rodDbSNP dbsnp = (rodDbSNP) context.getTracker().lookup("dbSNP", null);
|
|
||||||
if ( annotatedWriter != null ) {
|
if ( annotatedWriter != null ) {
|
||||||
|
rodDbSNP dbsnp = (rodDbSNP) context.getTracker().lookup("dbSNP", null);
|
||||||
if ( dbsnp == null )
|
if ( dbsnp == null )
|
||||||
annotatedWriter.print("false\tfalse\t");
|
annotatedWriter.print("false\tfalse\t");
|
||||||
else
|
else
|
||||||
|
|
@ -313,16 +313,31 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
annotatedWriter.println(GenotypeUtils.isHet(variant));
|
annotatedWriter.println(GenotypeUtils.isHet(variant));
|
||||||
}
|
}
|
||||||
|
|
||||||
List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
|
writeVCF(context, filterFailureString, filterINFOString);
|
||||||
Map<VCFHeader.HEADER_FIELDS,String> map = new HashMap<VCFHeader.HEADER_FIELDS,String>();
|
}
|
||||||
VCFRecord rec = VariantsToVCF.generateVCFRecord(context.getTracker(), context.getReferenceContext(), context.getAlignmentContext(true), vcfHeader, gt, map, sampleNames, out, false, false);
|
|
||||||
if ( rec != null) {
|
|
||||||
if ( !filterFailureString.equals("") )
|
private void writeVCF(VariantContext context, final String filterFailureString, HashMap<String,String> filterINFOString) {
|
||||||
|
VCFRecord rec = getVCFRecord(context);
|
||||||
|
if ( rec != null ) {
|
||||||
|
rec.addInfoFields(filterINFOString);
|
||||||
|
if ( ! onlyAnnotate && !filterFailureString.equals("") )
|
||||||
rec.setFilterString(filterFailureString);
|
rec.setFilterString(filterFailureString);
|
||||||
vcfWriter.addRecord(rec);
|
vcfWriter.addRecord(rec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private VCFRecord getVCFRecord(VariantContext context) {
|
||||||
|
if ( context.getVariantVCF() != null ) {
|
||||||
|
return context.getVariantVCF().mCurrentRecord;
|
||||||
|
} else {
|
||||||
|
List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
|
||||||
|
Map<VCFHeader.HEADER_FIELDS,String> map = new HashMap<VCFHeader.HEADER_FIELDS,String>();
|
||||||
|
return VariantsToVCF.generateVCFRecord(context.getTracker(), context.getReferenceContext(), context.getAlignmentContext(true), vcfHeader, gt, map, sampleNames, out, false, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Increment the number of loci processed.
|
* Increment the number of loci processed.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -8,63 +8,63 @@ import java.util.Arrays;
|
||||||
public class VariantFiltrationIntegrationTest extends WalkerTest {
|
public class VariantFiltrationIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testIntervals() {
|
public void testIntervals() {
|
||||||
String[] md5DoC = {"c0a7e2fc07d565e633b3064f9f3cdaf5", "21c8e1f9dc65fdfb39347547f9b04011"};
|
String[] md5DoC = {"d11547059193b90e2fd1eb47dac18999", "21c8e1f9dc65fdfb39347547f9b04011"};
|
||||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||||
"-T VariantFiltration -X DepthOfCoverage:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
"-T VariantFiltration -X DepthOfCoverage:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||||
2,
|
2,
|
||||||
Arrays.asList(md5DoC));
|
Arrays.asList(md5DoC));
|
||||||
executeTest("testDoCFilter", spec1);
|
executeTest("testDoCFilter", spec1);
|
||||||
|
|
||||||
String[] md5AlleleBalance = {"aa0f7800cfd346236620ae0eac220817", "a13e4ce6260bf9f33ca99dc808b8e6ad"};
|
String[] md5AlleleBalance = {"409f2b7fb193919d28c20be4dc5c4516", "a13e4ce6260bf9f33ca99dc808b8e6ad"};
|
||||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||||
"-T VariantFiltration -X AlleleBalance:low=0.25,high=0.75 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
"-T VariantFiltration -X AlleleBalance:low=0.25,high=0.75 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||||
2,
|
2,
|
||||||
Arrays.asList(md5AlleleBalance));
|
Arrays.asList(md5AlleleBalance));
|
||||||
executeTest("testAlleleBalanceFilter", spec2);
|
executeTest("testAlleleBalanceFilter", spec2);
|
||||||
|
|
||||||
String[] md5Strand = {"9f430f251dbeb58a2f80a1306a5dd492", "0f7db0aad764268ee8fa3b857df8d87d"};
|
String[] md5Strand = {"e5806925867088edeee9f6276ab030f3", "0f7db0aad764268ee8fa3b857df8d87d"};
|
||||||
WalkerTestSpec spec3 = new WalkerTestSpec(
|
WalkerTestSpec spec3 = new WalkerTestSpec(
|
||||||
"-T VariantFiltration -X FisherStrand:pvalue=0.0001 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
"-T VariantFiltration -X FisherStrand:pvalue=0.0001 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||||
2,
|
2,
|
||||||
Arrays.asList(md5Strand));
|
Arrays.asList(md5Strand));
|
||||||
executeTest("testStrandFilter", spec3);
|
executeTest("testStrandFilter", spec3);
|
||||||
|
|
||||||
String[] md5Lod = {"56177258c0b3944c043f86faee4b42ae", "7e0c4f2b0fda85fd2891eee76c396a55"};
|
String[] md5Lod = {"2c93f56e07e320ac7a012bd4940b1dbc", "7e0c4f2b0fda85fd2891eee76c396a55"};
|
||||||
WalkerTestSpec spec4 = new WalkerTestSpec(
|
WalkerTestSpec spec4 = new WalkerTestSpec(
|
||||||
"-T VariantFiltration -X LodThreshold:lod=10 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
"-T VariantFiltration -X LodThreshold:lod=10 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||||
2,
|
2,
|
||||||
Arrays.asList(md5Lod));
|
Arrays.asList(md5Lod));
|
||||||
executeTest("testLodFilter", spec4);
|
executeTest("testLodFilter", spec4);
|
||||||
|
|
||||||
String[] md5MQ0 = {"0e303c32f5c1503f4c875771f28fc46c", "3203de335621851bccf596242b079e23"};
|
String[] md5MQ0 = {"fde4be5eec12be3405863649170aaaac", "3203de335621851bccf596242b079e23"};
|
||||||
WalkerTestSpec spec5 = new WalkerTestSpec(
|
WalkerTestSpec spec5 = new WalkerTestSpec(
|
||||||
"-T VariantFiltration -X MappingQualityZero:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
"-T VariantFiltration -X MappingQualityZero:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||||
2,
|
2,
|
||||||
Arrays.asList(md5MQ0));
|
Arrays.asList(md5MQ0));
|
||||||
executeTest("testMappingQuality0Filter", spec5);
|
executeTest("testMappingQuality0Filter", spec5);
|
||||||
|
|
||||||
String[] md5MQ = {"946462a6199e9453784e0942e18e6830", "ecc777feedea61f7b570d114c2ab89b1"};
|
String[] md5MQ = {"17c35a5ccd814991dbcd3be6c2f244cd", "ecc777feedea61f7b570d114c2ab89b1"};
|
||||||
WalkerTestSpec spec6 = new WalkerTestSpec(
|
WalkerTestSpec spec6 = new WalkerTestSpec(
|
||||||
"-T VariantFiltration -X MappingQuality:min=20 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
"-T VariantFiltration -X MappingQuality:min=20 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||||
2,
|
2,
|
||||||
Arrays.asList(md5MQ));
|
Arrays.asList(md5MQ));
|
||||||
executeTest("testRMSMappingQualityFilter", spec6);
|
executeTest("testRMSMappingQualityFilter", spec6);
|
||||||
|
|
||||||
String[] md5OnOff = {"2ff84e104ce73e347e55d272170b4d03", "67f2e1bc025833b0fa31f47195198997"};
|
String[] md5OnOff = {"fc649a7ebd4a7b7fa739fc4fcb7704e2", "67f2e1bc025833b0fa31f47195198997"};
|
||||||
WalkerTestSpec spec7 = new WalkerTestSpec(
|
WalkerTestSpec spec7 = new WalkerTestSpec(
|
||||||
"-T VariantFiltration -X OnOffGenotypeRatio:threshold=0.9 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
"-T VariantFiltration -X OnOffGenotypeRatio:threshold=0.9 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||||
2,
|
2,
|
||||||
Arrays.asList(md5OnOff));
|
Arrays.asList(md5OnOff));
|
||||||
executeTest("testOnOffGenotypeFilter", spec7);
|
executeTest("testOnOffGenotypeFilter", spec7);
|
||||||
|
|
||||||
String[] md5Clusters = {"e6a1c088678b1c31ff340ebd622b476e", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"};
|
String[] md5Clusters = {"9b3aede689a25431ed2e1aac9dd73dc6", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"};
|
||||||
WalkerTestSpec spec8 = new WalkerTestSpec(
|
WalkerTestSpec spec8 = new WalkerTestSpec(
|
||||||
"-T VariantFiltration -X ClusteredSnps:window=10,snps=3 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
"-T VariantFiltration -X ClusteredSnps:window=10,snps=3 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||||
2,
|
2,
|
||||||
Arrays.asList(md5Clusters));
|
Arrays.asList(md5Clusters));
|
||||||
executeTest("testClusteredSnpsFilter", spec8);
|
executeTest("testClusteredSnpsFilter", spec8);
|
||||||
|
|
||||||
String[] md5Indels = {"82e555b76c12474154f8e5e402516d73", "8e0e915a1cb63d7049e0671ed00101fe"};
|
String[] md5Indels = {"c36cb0021f90b3509e483b57f4f41f6e", "8e0e915a1cb63d7049e0671ed00101fe"};
|
||||||
WalkerTestSpec spec9 = new WalkerTestSpec(
|
WalkerTestSpec spec9 = new WalkerTestSpec(
|
||||||
"-T VariantFiltration -X IndelArtifact -B indels,PointIndel,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.indels -B cleaned,CleanedOutSNP,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.realigner_badsnps -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
"-T VariantFiltration -X IndelArtifact -B indels,PointIndel,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.indels -B cleaned,CleanedOutSNP,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.realigner_badsnps -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||||
2,
|
2,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue