changes to add VCF to the rod system, fix VCF output in VariantsToVCF, and some other minor changes
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1715 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8349004414
commit
d262cbd41c
|
|
@ -75,6 +75,8 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
addModule("Intervals", IntervalRod.class);
|
||||
addModule("Variants", RodGeliText.class);
|
||||
addModule("GLF", RodGLF.class);
|
||||
addModule("VCF", RodVCF.class);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -256,7 +256,21 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
|||
*/
|
||||
@Override
|
||||
public Genotype getCalledGenotype() {
|
||||
throw new UnsupportedOperationException("We don't support this right now");
|
||||
double refQual = (this.getNegLog10PError());
|
||||
|
||||
if (this.mCurrentRecord != null && this.mCurrentRecord.hasGenotypeData()) {
|
||||
List<VCFGenotypeRecord> lst = this.mCurrentRecord.getVCFGenotypeRecords();
|
||||
if (lst.size() != 1) {
|
||||
throw new IllegalStateException("VCF object does not have one and only one genotype record");
|
||||
}
|
||||
double qual = 0;
|
||||
if (lst.get(0).getAlleles().equals(this.getReference()))
|
||||
qual = refQual;
|
||||
else if (lst.get(0).getFields().containsKey("GQ"))
|
||||
qual = Double.valueOf(lst.get(0).getFields().get("GQ")) / 10.0;
|
||||
return new BasicGenotype(this.getLocation(), Utils.join("", lst.get(0).getAlleles()), this.getReference().charAt(0), qual);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -315,10 +315,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
|
||||
List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
|
||||
Map<VCFHeader.HEADER_FIELDS,String> map = new HashMap<VCFHeader.HEADER_FIELDS,String>();
|
||||
if ( VariantsToVCF.generateVCFRecord(context.getTracker(), context.getReferenceContext(), context.getAlignmentContext(true), vcfHeader, gt, map, sampleNames, out, false, false) ) {
|
||||
VCFRecord rec = VariantsToVCF.generateVCFRecord(context.getTracker(), context.getReferenceContext(), context.getAlignmentContext(true), vcfHeader, gt, map, sampleNames, out, false, false);
|
||||
if ( rec != null) {
|
||||
if ( !filterFailureString.equals("") )
|
||||
map.put(VCFHeader.HEADER_FIELDS.FILTER, filterFailureString);
|
||||
vcfWriter.addRecord(new VCFRecord(map, "GT:GQ:DP", gt));
|
||||
rec.setFilterString(filterFailureString);
|
||||
vcfWriter.addRecord(rec);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,17 +3,24 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantstovcf;
|
|||
import org.broadinstitute.sting.gatk.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodGeliText;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
|
||||
import java.io.*;
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
||||
|
|
@ -24,6 +31,7 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
|||
private VCFWriter vcfwriter = null;
|
||||
private VCFHeader vcfheader = null;
|
||||
private TreeMap<String, String> sampleNames = null;
|
||||
private static String format = "GT:GQ:DP";
|
||||
|
||||
public void initialize() {
|
||||
sampleNames = new TreeMap<String, String>();
|
||||
|
|
@ -62,7 +70,9 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
public boolean filter(RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
if (BaseUtils.simpleBaseToBaseIndex(ref) > -1) { return true; }
|
||||
if (BaseUtils.simpleBaseToBaseIndex(ref) > -1) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (ReferenceOrderedDatum rod : tracker.getAllRods()) {
|
||||
if (rod != null && sampleNames.keySet().contains(rod.getName().toUpperCase())) {
|
||||
|
|
@ -75,73 +85,73 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
|||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
|
||||
Map<VCFHeader.HEADER_FIELDS,String> map = new HashMap<VCFHeader.HEADER_FIELDS,String>();
|
||||
if ( generateVCFRecord(tracker, ref, context, vcfheader, gt, map, sampleNames, out, SUPPRESS_MULTISTATE, VERBOSE) ) {
|
||||
vcfwriter.addRecord(new VCFRecord(map, "GT:GQ:DP", gt));
|
||||
//vcfwriter.addRecord(new VCFRecord(vcfheader, map, "GT", gt));
|
||||
return 1;
|
||||
Map<VCFHeader.HEADER_FIELDS, String> map = new HashMap<VCFHeader.HEADER_FIELDS, String>();
|
||||
VCFRecord rec = generateVCFRecord(tracker, ref, context, vcfheader, gt, map, sampleNames, out, SUPPRESS_MULTISTATE, VERBOSE);
|
||||
if (rec != null) {
|
||||
vcfwriter.addRecord(rec);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
public static boolean generateVCFRecord(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context,
|
||||
VCFHeader vcfheader, List<VCFGenotypeRecord> gt, Map<VCFHeader.HEADER_FIELDS,String> map,
|
||||
Map<String, String> sampleNamesToRods, PrintStream out, boolean SUPPRESS_MULTISTATE, boolean VERBOSE) {
|
||||
public static VCFRecord generateVCFRecord(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context,
|
||||
VCFHeader vcfheader, List<VCFGenotypeRecord> gt, Map<VCFHeader.HEADER_FIELDS, String> map,
|
||||
Map<String, String> sampleNamesToRods, PrintStream out, boolean SUPPRESS_MULTISTATE, boolean VERBOSE) {
|
||||
int[] alleleCounts = new int[4];
|
||||
int numSNPs = 0;
|
||||
int numRefs = 0;
|
||||
int[] alleleNames = { 0, 1, 2, 3 };
|
||||
int[] alleleNames = {0, 1, 2, 3};
|
||||
double snpQual = 0.0;
|
||||
int refbase = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
|
||||
|
||||
List<String> alts = new ArrayList<String>();
|
||||
for (String name : vcfheader.getGenotypeSamples()) {
|
||||
ReferenceOrderedDatum rod = tracker.lookup(sampleNamesToRods.get(name), null);
|
||||
if (rod != null) {
|
||||
AllelicVariant av = (AllelicVariant) rod;
|
||||
String lod = String.format("%d", av.getVariationConfidence() > 99 ? 99 : (int) av.getVariationConfidence());
|
||||
Variation av = (Variation) rod;
|
||||
String lod = String.format("%d", av.getNegLog10PError() > 99 ? 99 : (int) av.getNegLog10PError());
|
||||
int depth = 0;
|
||||
|
||||
if (rod instanceof RodGeliText) {
|
||||
RodGeliText rv = (RodGeliText) rod;
|
||||
depth = rv.depth;
|
||||
}
|
||||
|
||||
Map<String,String> str = new HashMap<String,String>();
|
||||
if (av.getGenotype().get(0).charAt(0) == av.getGenotype().get(0).charAt(1)) {
|
||||
str.put("key","1/1:" + lod + (depth > 0 ? ":" + depth : ""));
|
||||
//str.put("key","1/1");
|
||||
} else {
|
||||
str.put("key","0/1:" + lod + (depth > 0 ? ":" + depth : ""));
|
||||
//str.put("key","0/1");
|
||||
if (!(rod instanceof VariantBackedByGenotype))
|
||||
throw new IllegalArgumentException("The passed in variant type must be backed by genotype data");
|
||||
Genotype genotype = ((VariantBackedByGenotype) rod).getCalledGenotype();
|
||||
List<String> alleles = new ArrayList<String>();
|
||||
for (char base : genotype.getBases().toCharArray()) {
|
||||
alleles.add(String.valueOf(base));
|
||||
if (base != ref.getBase() && !alts.contains(String.valueOf(base))) alts.add(String.valueOf(base));
|
||||
}
|
||||
|
||||
List<String> alleles = av.getGenotype();
|
||||
int allele1 = BaseUtils.simpleBaseToBaseIndex(genotype.getBases().charAt(0));
|
||||
int allele2 = BaseUtils.simpleBaseToBaseIndex(genotype.getBases().charAt(1));
|
||||
if (allele1 >= 0 && allele1 != refbase) {
|
||||
alleleCounts[allele1]++;
|
||||
}
|
||||
if (allele2 >= 0 && allele2 != refbase) {
|
||||
alleleCounts[allele2]++;
|
||||
}
|
||||
Map<String, String> str = new HashMap<String, String>();
|
||||
str.put("GQ", lod);
|
||||
if (depth > 0) str.put("DP", String.valueOf(depth));
|
||||
|
||||
int allele1 = BaseUtils.simpleBaseToBaseIndex(alleles.get(0).charAt(0));
|
||||
if (allele1 >= 0 && allele1 != refbase) { alleleCounts[allele1]++; }
|
||||
|
||||
int allele2 = BaseUtils.simpleBaseToBaseIndex(alleles.get(0).charAt(1));
|
||||
if (allele2 >= 0 && allele2 != refbase) { alleleCounts[allele2]++; }
|
||||
|
||||
gt.add(new VCFGenotypeRecord(name, alleles, VCFGenotypeRecord.PHASE.UNPHASED, str ));
|
||||
gt.add(new VCFGenotypeRecord(name, alleles, VCFGenotypeRecord.PHASE.UNPHASED, str));
|
||||
|
||||
numSNPs++;
|
||||
snpQual += av.getVariationConfidence();
|
||||
snpQual += av.getNegLog10PError();
|
||||
} else {
|
||||
Map<String,String> str = new HashMap<String,String>();
|
||||
str.put("key","0/0");
|
||||
|
||||
Map<String, String> str = new HashMap<String, String>();
|
||||
List<String> alleles = new ArrayList<String>();
|
||||
alleles.add(ref.getBase() + "" + ref.getBase());
|
||||
alleles.add(String.valueOf(ref.getBase()));
|
||||
alleles.add(String.valueOf(ref.getBase()));
|
||||
gt.add(new VCFGenotypeRecord(name, alleles, VCFGenotypeRecord.PHASE.UNPHASED, str));
|
||||
|
||||
gt.add(new VCFGenotypeRecord(name, alleles, VCFGenotypeRecord.PHASE.UNPHASED, str ));
|
||||
|
||||
numRefs++;
|
||||
}
|
||||
}
|
||||
|
||||
if (numSNPs == 0)
|
||||
return false;
|
||||
return null;
|
||||
|
||||
|
||||
Integer[] perm = Utils.SortPermutation(alleleCounts);
|
||||
|
|
@ -151,61 +161,38 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
|||
rodDbSNP dbsnp = (rodDbSNP) tracker.lookup("dbsnp", null);
|
||||
|
||||
String infoString = String.format("locus=%s ref=%c allele_count=( %c:%d %c:%d %c:%d %c:%d )",
|
||||
context.getLocation(),
|
||||
ref.getBase(),
|
||||
BaseUtils.baseIndexToSimpleBase(sortedNames[0]), sortedCounts[0],
|
||||
BaseUtils.baseIndexToSimpleBase(sortedNames[1]), sortedCounts[1],
|
||||
BaseUtils.baseIndexToSimpleBase(sortedNames[2]), sortedCounts[2],
|
||||
BaseUtils.baseIndexToSimpleBase(sortedNames[3]), sortedCounts[3]
|
||||
context.getLocation(),
|
||||
ref.getBase(),
|
||||
BaseUtils.baseIndexToSimpleBase(sortedNames[0]), sortedCounts[0],
|
||||
BaseUtils.baseIndexToSimpleBase(sortedNames[1]), sortedCounts[1],
|
||||
BaseUtils.baseIndexToSimpleBase(sortedNames[2]), sortedCounts[2],
|
||||
BaseUtils.baseIndexToSimpleBase(sortedNames[3]), sortedCounts[3]
|
||||
);
|
||||
|
||||
if (SUPPRESS_MULTISTATE && sortedCounts[2] > 0) {
|
||||
out.println("[multistate] " + infoString);
|
||||
return false;
|
||||
return null;
|
||||
} else {
|
||||
if (VERBOSE) {
|
||||
out.println("[locus_info] " + infoString);
|
||||
}
|
||||
}
|
||||
|
||||
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
|
||||
map.put(field,String.valueOf(1));
|
||||
Map<String,String> info = new HashMap<String,String>();
|
||||
if (dbsnp != null) info.put("DB","1");
|
||||
if (dbsnp != null && dbsnp.isHapmap()) info.put("H2","1");
|
||||
|
||||
if (field == VCFHeader.HEADER_FIELDS.CHROM) {
|
||||
map.put(field, context.getContig());
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.POS) {
|
||||
map.put(field, String.valueOf(context.getPosition()));
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.REF) {
|
||||
map.put(field, String.valueOf(ref.getBases()));
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.ALT) {
|
||||
map.put(field, String.valueOf(BaseUtils.baseIndexToSimpleBase(sortedNames[3])));
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.ID) {
|
||||
map.put(field, (dbsnp == null) ? "." : dbsnp.name);
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.QUAL) {
|
||||
map.put(field, String.format("%d", snpQual > 99 ? 99 : (int) snpQual));
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.FILTER) {
|
||||
map.put(field, "0");
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.INFO) {
|
||||
String infostr = ".";
|
||||
ArrayList<String> info = new ArrayList<String>();
|
||||
return new VCFRecord(ref.getBase(),
|
||||
context.getContig(),
|
||||
(int) context.getPosition(),
|
||||
(dbsnp == null) ? "." : dbsnp.name,
|
||||
alts,
|
||||
snpQual > 99 ? 99 : (int) snpQual,
|
||||
"0",
|
||||
info,
|
||||
format,
|
||||
gt);
|
||||
|
||||
if (dbsnp != null) { info.add("DB=1"); }
|
||||
if (dbsnp != null && dbsnp.isHapmap()) { info.add("H2=1"); }
|
||||
|
||||
for (int i = 0; i < info.size(); i++) {
|
||||
if (i == 0) { infostr = ""; }
|
||||
|
||||
infostr += info.get(i);
|
||||
|
||||
if (i < info.size() - 1) {
|
||||
infostr += ";";
|
||||
}
|
||||
}
|
||||
|
||||
map.put(field, infostr);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ public interface Genotype {
|
|||
public double getNegLog10PError();
|
||||
|
||||
/**
|
||||
* get the bases that represent this
|
||||
* get the bases that represent this genotype
|
||||
*
|
||||
* @return the bases, as a string
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -42,9 +42,9 @@ public class VCFGenotypeRecord {
|
|||
*/
|
||||
public VCFGenotypeRecord(String sampleName, List<String> genotypes, PHASE phasing, Map<String, String> otherFlags) {
|
||||
this.mSampleName = sampleName;
|
||||
this.mGenotypeAlleles.addAll(genotypes);
|
||||
if (genotypes != null) this.mGenotypeAlleles.addAll(genotypes);
|
||||
this.mPhaseType = phasing;
|
||||
this.mFields.putAll(otherFlags);
|
||||
if (otherFlags != null) this.mFields.putAll(otherFlags);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -104,8 +104,9 @@ public class VCFGenotypeRecord {
|
|||
}
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return str;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -3,10 +3,7 @@ package org.broadinstitute.sting.utils.genotype.vcf;
|
|||
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
/** the basic VCF record type */
|
||||
public class VCFRecord {
|
||||
|
|
@ -204,9 +201,7 @@ public class VCFRecord {
|
|||
* @return an array of strings representing the filtering criteria, or null if none were applied
|
||||
*/
|
||||
public String[] getFilteringCodes() {
|
||||
//if (this.mFilterString.equals("0")) {
|
||||
// return null;
|
||||
//}
|
||||
if (mFilterString == null) return new String[]{"0"};
|
||||
return this.mFilterString.split(";");
|
||||
}
|
||||
|
||||
|
|
@ -220,6 +215,11 @@ public class VCFRecord {
|
|||
* @return a map, of the info key-value pairs
|
||||
*/
|
||||
public Map<String, String> getInfoValues() {
|
||||
if (this.mInfoFields.size() < 1) {
|
||||
Map<String,String> map = new HashMap<String, String>();
|
||||
map.put(".","");
|
||||
return map;
|
||||
}
|
||||
return this.mInfoFields;
|
||||
}
|
||||
|
||||
|
|
@ -316,7 +316,7 @@ public class VCFRecord {
|
|||
builder.append(getReferenceBase() + FIELD_SEPERATOR);
|
||||
String alts = "";
|
||||
for (String str : this.getAlternateAlleles()) alts += str + ",";
|
||||
builder.append(alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR);
|
||||
builder.append((alts.length() > 0) ? alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR : "." + FIELD_SEPERATOR);
|
||||
builder.append(getQual() + FIELD_SEPERATOR);
|
||||
builder.append(Utils.join(";", getFilteringCodes()) + FIELD_SEPERATOR);
|
||||
String info = "";
|
||||
|
|
|
|||
|
|
@ -8,63 +8,63 @@ import java.util.Arrays;
|
|||
public class VariantFiltrationIntegrationTest extends WalkerTest {
|
||||
@Test
|
||||
public void testIntervals() {
|
||||
String[] md5DoC = {"eada262e03738876a2b5b94d56f0951e", "21c8e1f9dc65fdfb39347547f9b04011"};
|
||||
String[] md5DoC = {"b222d15b300f989dd2a86ff1f500f64b", "21c8e1f9dc65fdfb39347547f9b04011"};
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X DepthOfCoverage:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5DoC));
|
||||
executeTest("testDoCFilter", spec1);
|
||||
|
||||
String[] md5AlleleBalance = {"0ade6c97a46245a65558d115ef1e3956", "a13e4ce6260bf9f33ca99dc808b8e6ad"};
|
||||
String[] md5AlleleBalance = {"9a59d33b55e5bad0228f2d2d67d4c17d", "a13e4ce6260bf9f33ca99dc808b8e6ad"};
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X AlleleBalance:low=0.25,high=0.75 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5AlleleBalance));
|
||||
executeTest("testAlleleBalanceFilter", spec2);
|
||||
|
||||
String[] md5Strand = {"1a9568e3f7e88483e7fbc5f981a7973c", "0f7db0aad764268ee8fa3b857df8d87d"};
|
||||
String[] md5Strand = {"b0a6fb821be2f7b26f8f6d77cbd758a9", "0f7db0aad764268ee8fa3b857df8d87d"};
|
||||
WalkerTestSpec spec3 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X FisherStrand:pvalue=0.0001 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5Strand));
|
||||
executeTest("testStrandFilter", spec3);
|
||||
|
||||
String[] md5Lod = {"64bc742d0f44a7279ac6c4ad5c28da28", "7e0c4f2b0fda85fd2891eee76c396a55"};
|
||||
String[] md5Lod = {"60624843c4c8ae561acc444df565da99", "7e0c4f2b0fda85fd2891eee76c396a55"};
|
||||
WalkerTestSpec spec4 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X LodThreshold:lod=10 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5Lod));
|
||||
executeTest("testLodFilter", spec4);
|
||||
|
||||
String[] md5MQ0 = {"1201176efca694e2566a9de1508517b8", "3203de335621851bccf596242b079e23"};
|
||||
String[] md5MQ0 = {"5e3d4d6b13e79a5df5171d3e5a9f1bd7", "3203de335621851bccf596242b079e23"};
|
||||
WalkerTestSpec spec5 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X MappingQualityZero:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5MQ0));
|
||||
executeTest("testMappingQuality0Filter", spec5);
|
||||
|
||||
String[] md5MQ = {"ed922e58c856e1bc3f125b635dea48fc", "ecc777feedea61f7b570d114c2ab89b1"};
|
||||
String[] md5MQ = {"fdbac9cf332dd45d9c92146157ace65f", "ecc777feedea61f7b570d114c2ab89b1"};
|
||||
WalkerTestSpec spec6 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X MappingQuality:min=20 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5MQ));
|
||||
executeTest("testRMSMappingQualityFilter", spec6);
|
||||
|
||||
String[] md5OnOff = {"8a00c5ad80c43e468186ce0147553048", "67f2e1bc025833b0fa31f47195198997"};
|
||||
String[] md5OnOff = {"57c5a92bde03adbff9c6ca6eada033c4", "67f2e1bc025833b0fa31f47195198997"};
|
||||
WalkerTestSpec spec7 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X OnOffGenotypeRatio:threshold=0.9 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5OnOff));
|
||||
executeTest("testOnOffGenotypeFilter", spec7);
|
||||
|
||||
String[] md5Clusters = {"ee9d8039490354cc7f2c574b5d10c7d8", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"};
|
||||
String[] md5Clusters = {"44223fa50dac2d9c1096558689cb8493", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"};
|
||||
WalkerTestSpec spec8 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X ClusteredSnps:window=10,snps=3 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5Clusters));
|
||||
executeTest("testClusteredSnpsFilter", spec8);
|
||||
|
||||
String[] md5Indels = {"90eb46f8d2bf4c0a72716e35c914839d", "8e0e915a1cb63d7049e0671ed00101fe"};
|
||||
String[] md5Indels = {"0f03727ac9e6fc43311377b29d12596c", "8e0e915a1cb63d7049e0671ed00101fe"};
|
||||
WalkerTestSpec spec9 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X IndelArtifact -B indels,PointIndel,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.indels -B cleaned,CleanedOutSNP,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.realigner_badsnps -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,79 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variantstovcf;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.io.File;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class VariantsToVCFIntegrationTest
|
||||
* <p/>
|
||||
* test(s) for the VariantsToVCF walker.
|
||||
*/
|
||||
public class VariantsToVCFIntegrationTest extends WalkerTest {
|
||||
|
||||
|
||||
@Test
|
||||
public void testVariantsToVCFUsingGeliInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("d1882fd8ecee6a95f561ed3be4d4a435");
|
||||
|
||||
/**
|
||||
* the above MD5 was calculated from running the following command:
|
||||
*
|
||||
* java -jar ./dist/GenomeAnalysisTK.jar \
|
||||
* -R /broad/1KG/reference/human_b36_both.fasta \
|
||||
* -T VariantEval \
|
||||
* --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod \
|
||||
* -L 1:10,000,000-11,000,000 \
|
||||
* --outerr myVariantEval \
|
||||
* --supressDateInformation \
|
||||
* --rodBind eval,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls
|
||||
*
|
||||
*/
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R /broad/1KG/reference/human_b36_both.fasta" +
|
||||
" --rodBind NA123AB,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" +
|
||||
" -T VariantsToVCF" +
|
||||
" -L 1:10,000,000-11,000,000" +
|
||||
" --vcfout %s",
|
||||
1, // just one output file
|
||||
md5);
|
||||
List<File> result = executeTest("testVariantsToVCFUsingGeliInput", spec).getFirst();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGenotypesToVCFUsingGeliInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("debeaf31846328eddc0abf226fc72ac0");
|
||||
|
||||
/**
|
||||
* the above MD5 was calculated from running the following command:
|
||||
*
|
||||
* java -jar ./dist/GenomeAnalysisTK.jar \
|
||||
* -R /broad/1KG/reference/human_b36_both.fasta \
|
||||
* -T VariantEval \
|
||||
* --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod \
|
||||
* -L 1:10,000,000-11,000,000 \
|
||||
* --outerr myVariantEval \
|
||||
* --supressDateInformation \
|
||||
* --rodBind eval,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls
|
||||
*
|
||||
*/
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R /broad/1KG/reference/human_b36_both.fasta" +
|
||||
" --rodBind NA123AB,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" +
|
||||
" -T VariantsToVCF" +
|
||||
" -L 1:10,000,000-11,000,000" +
|
||||
" --vcfout %s",
|
||||
1, // just one output file
|
||||
md5);
|
||||
List<File> result = executeTest("testVariantsToVCFUsingGeliInput", spec).getFirst();
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue