changes to add VCF to the rod system, fix VCF output in VariantsToVCF, and some other minor changes

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1715 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-09-24 15:16:11 +00:00
parent 8349004414
commit d262cbd41c
9 changed files with 192 additions and 108 deletions

View File

@ -75,6 +75,8 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
addModule("Intervals", IntervalRod.class);
addModule("Variants", RodGeliText.class);
addModule("GLF", RodGLF.class);
addModule("VCF", RodVCF.class);
}

View File

@ -256,7 +256,21 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
*/
@Override
public Genotype getCalledGenotype() {
throw new UnsupportedOperationException("We don't support this right now");
double refQual = (this.getNegLog10PError());
if (this.mCurrentRecord != null && this.mCurrentRecord.hasGenotypeData()) {
List<VCFGenotypeRecord> lst = this.mCurrentRecord.getVCFGenotypeRecords();
if (lst.size() != 1) {
throw new IllegalStateException("VCF object does not have one and only one genotype record");
}
double qual = 0;
if (lst.get(0).getAlleles().equals(this.getReference()))
qual = refQual;
else if (lst.get(0).getFields().containsKey("GQ"))
qual = Double.valueOf(lst.get(0).getFields().get("GQ")) / 10.0;
return new BasicGenotype(this.getLocation(), Utils.join("", lst.get(0).getAlleles()), this.getReference().charAt(0), qual);
}
return null;
}
/**

View File

@ -315,10 +315,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
Map<VCFHeader.HEADER_FIELDS,String> map = new HashMap<VCFHeader.HEADER_FIELDS,String>();
if ( VariantsToVCF.generateVCFRecord(context.getTracker(), context.getReferenceContext(), context.getAlignmentContext(true), vcfHeader, gt, map, sampleNames, out, false, false) ) {
VCFRecord rec = VariantsToVCF.generateVCFRecord(context.getTracker(), context.getReferenceContext(), context.getAlignmentContext(true), vcfHeader, gt, map, sampleNames, out, false, false);
if ( rec != null) {
if ( !filterFailureString.equals("") )
map.put(VCFHeader.HEADER_FIELDS.FILTER, filterFailureString);
vcfWriter.addRecord(new VCFRecord(map, "GT:GQ:DP", gt));
rec.setFilterString(filterFailureString);
vcfWriter.addRecord(rec);
}
}

View File

@ -3,17 +3,24 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantstovcf;
import org.broadinstitute.sting.gatk.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.RodGeliText;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import java.io.*;
import java.io.File;
import java.io.PrintStream;
import java.util.*;
public class VariantsToVCF extends RefWalker<Integer, Integer> {
@ -24,6 +31,7 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
private VCFWriter vcfwriter = null;
private VCFHeader vcfheader = null;
private TreeMap<String, String> sampleNames = null;
private static String format = "GT:GQ:DP";
public void initialize() {
sampleNames = new TreeMap<String, String>();
@ -62,7 +70,9 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
}
public boolean filter(RefMetaDataTracker tracker, char ref, AlignmentContext context) {
if (BaseUtils.simpleBaseToBaseIndex(ref) > -1) { return true; }
if (BaseUtils.simpleBaseToBaseIndex(ref) > -1) {
return true;
}
for (ReferenceOrderedDatum rod : tracker.getAllRods()) {
if (rod != null && sampleNames.keySet().contains(rod.getName().toUpperCase())) {
@ -75,73 +85,73 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
Map<VCFHeader.HEADER_FIELDS,String> map = new HashMap<VCFHeader.HEADER_FIELDS,String>();
if ( generateVCFRecord(tracker, ref, context, vcfheader, gt, map, sampleNames, out, SUPPRESS_MULTISTATE, VERBOSE) ) {
vcfwriter.addRecord(new VCFRecord(map, "GT:GQ:DP", gt));
//vcfwriter.addRecord(new VCFRecord(vcfheader, map, "GT", gt));
return 1;
Map<VCFHeader.HEADER_FIELDS, String> map = new HashMap<VCFHeader.HEADER_FIELDS, String>();
VCFRecord rec = generateVCFRecord(tracker, ref, context, vcfheader, gt, map, sampleNames, out, SUPPRESS_MULTISTATE, VERBOSE);
if (rec != null) {
vcfwriter.addRecord(rec);
return 1;
}
return 0;
}
public static boolean generateVCFRecord(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context,
VCFHeader vcfheader, List<VCFGenotypeRecord> gt, Map<VCFHeader.HEADER_FIELDS,String> map,
Map<String, String> sampleNamesToRods, PrintStream out, boolean SUPPRESS_MULTISTATE, boolean VERBOSE) {
public static VCFRecord generateVCFRecord(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context,
VCFHeader vcfheader, List<VCFGenotypeRecord> gt, Map<VCFHeader.HEADER_FIELDS, String> map,
Map<String, String> sampleNamesToRods, PrintStream out, boolean SUPPRESS_MULTISTATE, boolean VERBOSE) {
int[] alleleCounts = new int[4];
int numSNPs = 0;
int numRefs = 0;
int[] alleleNames = { 0, 1, 2, 3 };
int[] alleleNames = {0, 1, 2, 3};
double snpQual = 0.0;
int refbase = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
List<String> alts = new ArrayList<String>();
for (String name : vcfheader.getGenotypeSamples()) {
ReferenceOrderedDatum rod = tracker.lookup(sampleNamesToRods.get(name), null);
if (rod != null) {
AllelicVariant av = (AllelicVariant) rod;
String lod = String.format("%d", av.getVariationConfidence() > 99 ? 99 : (int) av.getVariationConfidence());
Variation av = (Variation) rod;
String lod = String.format("%d", av.getNegLog10PError() > 99 ? 99 : (int) av.getNegLog10PError());
int depth = 0;
if (rod instanceof RodGeliText) {
RodGeliText rv = (RodGeliText) rod;
depth = rv.depth;
}
Map<String,String> str = new HashMap<String,String>();
if (av.getGenotype().get(0).charAt(0) == av.getGenotype().get(0).charAt(1)) {
str.put("key","1/1:" + lod + (depth > 0 ? ":" + depth : ""));
//str.put("key","1/1");
} else {
str.put("key","0/1:" + lod + (depth > 0 ? ":" + depth : ""));
//str.put("key","0/1");
if (!(rod instanceof VariantBackedByGenotype))
throw new IllegalArgumentException("The passed in variant type must be backed by genotype data");
Genotype genotype = ((VariantBackedByGenotype) rod).getCalledGenotype();
List<String> alleles = new ArrayList<String>();
for (char base : genotype.getBases().toCharArray()) {
alleles.add(String.valueOf(base));
if (base != ref.getBase() && !alts.contains(String.valueOf(base))) alts.add(String.valueOf(base));
}
List<String> alleles = av.getGenotype();
int allele1 = BaseUtils.simpleBaseToBaseIndex(genotype.getBases().charAt(0));
int allele2 = BaseUtils.simpleBaseToBaseIndex(genotype.getBases().charAt(1));
if (allele1 >= 0 && allele1 != refbase) {
alleleCounts[allele1]++;
}
if (allele2 >= 0 && allele2 != refbase) {
alleleCounts[allele2]++;
}
Map<String, String> str = new HashMap<String, String>();
str.put("GQ", lod);
if (depth > 0) str.put("DP", String.valueOf(depth));
int allele1 = BaseUtils.simpleBaseToBaseIndex(alleles.get(0).charAt(0));
if (allele1 >= 0 && allele1 != refbase) { alleleCounts[allele1]++; }
int allele2 = BaseUtils.simpleBaseToBaseIndex(alleles.get(0).charAt(1));
if (allele2 >= 0 && allele2 != refbase) { alleleCounts[allele2]++; }
gt.add(new VCFGenotypeRecord(name, alleles, VCFGenotypeRecord.PHASE.UNPHASED, str ));
gt.add(new VCFGenotypeRecord(name, alleles, VCFGenotypeRecord.PHASE.UNPHASED, str));
numSNPs++;
snpQual += av.getVariationConfidence();
snpQual += av.getNegLog10PError();
} else {
Map<String,String> str = new HashMap<String,String>();
str.put("key","0/0");
Map<String, String> str = new HashMap<String, String>();
List<String> alleles = new ArrayList<String>();
alleles.add(ref.getBase() + "" + ref.getBase());
alleles.add(String.valueOf(ref.getBase()));
alleles.add(String.valueOf(ref.getBase()));
gt.add(new VCFGenotypeRecord(name, alleles, VCFGenotypeRecord.PHASE.UNPHASED, str));
gt.add(new VCFGenotypeRecord(name, alleles, VCFGenotypeRecord.PHASE.UNPHASED, str ));
numRefs++;
}
}
if (numSNPs == 0)
return false;
return null;
Integer[] perm = Utils.SortPermutation(alleleCounts);
@ -151,61 +161,38 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
rodDbSNP dbsnp = (rodDbSNP) tracker.lookup("dbsnp", null);
String infoString = String.format("locus=%s ref=%c allele_count=( %c:%d %c:%d %c:%d %c:%d )",
context.getLocation(),
ref.getBase(),
BaseUtils.baseIndexToSimpleBase(sortedNames[0]), sortedCounts[0],
BaseUtils.baseIndexToSimpleBase(sortedNames[1]), sortedCounts[1],
BaseUtils.baseIndexToSimpleBase(sortedNames[2]), sortedCounts[2],
BaseUtils.baseIndexToSimpleBase(sortedNames[3]), sortedCounts[3]
context.getLocation(),
ref.getBase(),
BaseUtils.baseIndexToSimpleBase(sortedNames[0]), sortedCounts[0],
BaseUtils.baseIndexToSimpleBase(sortedNames[1]), sortedCounts[1],
BaseUtils.baseIndexToSimpleBase(sortedNames[2]), sortedCounts[2],
BaseUtils.baseIndexToSimpleBase(sortedNames[3]), sortedCounts[3]
);
if (SUPPRESS_MULTISTATE && sortedCounts[2] > 0) {
out.println("[multistate] " + infoString);
return false;
return null;
} else {
if (VERBOSE) {
out.println("[locus_info] " + infoString);
}
}
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
map.put(field,String.valueOf(1));
Map<String,String> info = new HashMap<String,String>();
if (dbsnp != null) info.put("DB","1");
if (dbsnp != null && dbsnp.isHapmap()) info.put("H2","1");
if (field == VCFHeader.HEADER_FIELDS.CHROM) {
map.put(field, context.getContig());
} else if (field == VCFHeader.HEADER_FIELDS.POS) {
map.put(field, String.valueOf(context.getPosition()));
} else if (field == VCFHeader.HEADER_FIELDS.REF) {
map.put(field, String.valueOf(ref.getBases()));
} else if (field == VCFHeader.HEADER_FIELDS.ALT) {
map.put(field, String.valueOf(BaseUtils.baseIndexToSimpleBase(sortedNames[3])));
} else if (field == VCFHeader.HEADER_FIELDS.ID) {
map.put(field, (dbsnp == null) ? "." : dbsnp.name);
} else if (field == VCFHeader.HEADER_FIELDS.QUAL) {
map.put(field, String.format("%d", snpQual > 99 ? 99 : (int) snpQual));
} else if (field == VCFHeader.HEADER_FIELDS.FILTER) {
map.put(field, "0");
} else if (field == VCFHeader.HEADER_FIELDS.INFO) {
String infostr = ".";
ArrayList<String> info = new ArrayList<String>();
return new VCFRecord(ref.getBase(),
context.getContig(),
(int) context.getPosition(),
(dbsnp == null) ? "." : dbsnp.name,
alts,
snpQual > 99 ? 99 : (int) snpQual,
"0",
info,
format,
gt);
if (dbsnp != null) { info.add("DB=1"); }
if (dbsnp != null && dbsnp.isHapmap()) { info.add("H2=1"); }
for (int i = 0; i < info.size(); i++) {
if (i == 0) { infostr = ""; }
infostr += info.get(i);
if (i < info.size() - 1) {
infostr += ";";
}
}
map.put(field, infostr);
}
}
return true;
}
public Integer reduceInit() {

View File

@ -18,7 +18,7 @@ public interface Genotype {
public double getNegLog10PError();
/**
* get the bases that represent this
* get the bases that represent this genotype
*
* @return the bases, as a string
*/

View File

@ -42,9 +42,9 @@ public class VCFGenotypeRecord {
*/
public VCFGenotypeRecord(String sampleName, List<String> genotypes, PHASE phasing, Map<String, String> otherFlags) {
this.mSampleName = sampleName;
this.mGenotypeAlleles.addAll(genotypes);
if (genotypes != null) this.mGenotypeAlleles.addAll(genotypes);
this.mPhaseType = phasing;
this.mFields.putAll(otherFlags);
if (otherFlags != null) this.mFields.putAll(otherFlags);
}
@ -104,8 +104,9 @@ public class VCFGenotypeRecord {
}
first = false;
}
}
}
return str;
}

View File

@ -3,10 +3,7 @@ package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.Utils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/** the basic VCF record type */
public class VCFRecord {
@ -204,9 +201,7 @@ public class VCFRecord {
* @return an array of strings representing the filtering criteria, or null if none were applied
*/
public String[] getFilteringCodes() {
//if (this.mFilterString.equals("0")) {
// return null;
//}
if (mFilterString == null) return new String[]{"0"};
return this.mFilterString.split(";");
}
@ -220,6 +215,11 @@ public class VCFRecord {
* @return a map, of the info key-value pairs
*/
public Map<String, String> getInfoValues() {
if (this.mInfoFields.size() < 1) {
Map<String,String> map = new HashMap<String, String>();
map.put(".","");
return map;
}
return this.mInfoFields;
}
@ -316,7 +316,7 @@ public class VCFRecord {
builder.append(getReferenceBase() + FIELD_SEPERATOR);
String alts = "";
for (String str : this.getAlternateAlleles()) alts += str + ",";
builder.append(alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR);
builder.append((alts.length() > 0) ? alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR : "." + FIELD_SEPERATOR);
builder.append(getQual() + FIELD_SEPERATOR);
builder.append(Utils.join(";", getFilteringCodes()) + FIELD_SEPERATOR);
String info = "";

View File

@ -8,63 +8,63 @@ import java.util.Arrays;
public class VariantFiltrationIntegrationTest extends WalkerTest {
@Test
public void testIntervals() {
String[] md5DoC = {"eada262e03738876a2b5b94d56f0951e", "21c8e1f9dc65fdfb39347547f9b04011"};
String[] md5DoC = {"b222d15b300f989dd2a86ff1f500f64b", "21c8e1f9dc65fdfb39347547f9b04011"};
WalkerTestSpec spec1 = new WalkerTestSpec(
"-T VariantFiltration -X DepthOfCoverage:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2,
Arrays.asList(md5DoC));
executeTest("testDoCFilter", spec1);
String[] md5AlleleBalance = {"0ade6c97a46245a65558d115ef1e3956", "a13e4ce6260bf9f33ca99dc808b8e6ad"};
String[] md5AlleleBalance = {"9a59d33b55e5bad0228f2d2d67d4c17d", "a13e4ce6260bf9f33ca99dc808b8e6ad"};
WalkerTestSpec spec2 = new WalkerTestSpec(
"-T VariantFiltration -X AlleleBalance:low=0.25,high=0.75 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2,
Arrays.asList(md5AlleleBalance));
executeTest("testAlleleBalanceFilter", spec2);
String[] md5Strand = {"1a9568e3f7e88483e7fbc5f981a7973c", "0f7db0aad764268ee8fa3b857df8d87d"};
String[] md5Strand = {"b0a6fb821be2f7b26f8f6d77cbd758a9", "0f7db0aad764268ee8fa3b857df8d87d"};
WalkerTestSpec spec3 = new WalkerTestSpec(
"-T VariantFiltration -X FisherStrand:pvalue=0.0001 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2,
Arrays.asList(md5Strand));
executeTest("testStrandFilter", spec3);
String[] md5Lod = {"64bc742d0f44a7279ac6c4ad5c28da28", "7e0c4f2b0fda85fd2891eee76c396a55"};
String[] md5Lod = {"60624843c4c8ae561acc444df565da99", "7e0c4f2b0fda85fd2891eee76c396a55"};
WalkerTestSpec spec4 = new WalkerTestSpec(
"-T VariantFiltration -X LodThreshold:lod=10 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2,
Arrays.asList(md5Lod));
executeTest("testLodFilter", spec4);
String[] md5MQ0 = {"1201176efca694e2566a9de1508517b8", "3203de335621851bccf596242b079e23"};
String[] md5MQ0 = {"5e3d4d6b13e79a5df5171d3e5a9f1bd7", "3203de335621851bccf596242b079e23"};
WalkerTestSpec spec5 = new WalkerTestSpec(
"-T VariantFiltration -X MappingQualityZero:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2,
Arrays.asList(md5MQ0));
executeTest("testMappingQuality0Filter", spec5);
String[] md5MQ = {"ed922e58c856e1bc3f125b635dea48fc", "ecc777feedea61f7b570d114c2ab89b1"};
String[] md5MQ = {"fdbac9cf332dd45d9c92146157ace65f", "ecc777feedea61f7b570d114c2ab89b1"};
WalkerTestSpec spec6 = new WalkerTestSpec(
"-T VariantFiltration -X MappingQuality:min=20 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2,
Arrays.asList(md5MQ));
executeTest("testRMSMappingQualityFilter", spec6);
String[] md5OnOff = {"8a00c5ad80c43e468186ce0147553048", "67f2e1bc025833b0fa31f47195198997"};
String[] md5OnOff = {"57c5a92bde03adbff9c6ca6eada033c4", "67f2e1bc025833b0fa31f47195198997"};
WalkerTestSpec spec7 = new WalkerTestSpec(
"-T VariantFiltration -X OnOffGenotypeRatio:threshold=0.9 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2,
Arrays.asList(md5OnOff));
executeTest("testOnOffGenotypeFilter", spec7);
String[] md5Clusters = {"ee9d8039490354cc7f2c574b5d10c7d8", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"};
String[] md5Clusters = {"44223fa50dac2d9c1096558689cb8493", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"};
WalkerTestSpec spec8 = new WalkerTestSpec(
"-T VariantFiltration -X ClusteredSnps:window=10,snps=3 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2,
Arrays.asList(md5Clusters));
executeTest("testClusteredSnpsFilter", spec8);
String[] md5Indels = {"90eb46f8d2bf4c0a72716e35c914839d", "8e0e915a1cb63d7049e0671ed00101fe"};
String[] md5Indels = {"0f03727ac9e6fc43311377b29d12596c", "8e0e915a1cb63d7049e0671ed00101fe"};
WalkerTestSpec spec9 = new WalkerTestSpec(
"-T VariantFiltration -X IndelArtifact -B indels,PointIndel,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.indels -B cleaned,CleanedOutSNP,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.realigner_badsnps -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2,

View File

@ -0,0 +1,79 @@
package org.broadinstitute.sting.playground.gatk.walkers.variantstovcf;
import org.broadinstitute.sting.WalkerTest;
import org.junit.Test;
import java.util.List;
import java.util.ArrayList;
import java.io.File;
/**
* @author aaron
* <p/>
* Class VariantsToVCFIntegrationTest
* <p/>
* test(s) for the VariantsToVCF walker.
*/
public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testVariantsToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>();
md5.add("d1882fd8ecee6a95f561ed3be4d4a435");
/**
* the above MD5 was calculated from running the following command:
*
* java -jar ./dist/GenomeAnalysisTK.jar \
* -R /broad/1KG/reference/human_b36_both.fasta \
* -T VariantEval \
* --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod \
* -L 1:10,000,000-11,000,000 \
* --outerr myVariantEval \
* --supressDateInformation \
* --rodBind eval,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls
*
*/
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R /broad/1KG/reference/human_b36_both.fasta" +
" --rodBind NA123AB,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" +
" -T VariantsToVCF" +
" -L 1:10,000,000-11,000,000" +
" --vcfout %s",
1, // just one output file
md5);
List<File> result = executeTest("testVariantsToVCFUsingGeliInput", spec).getFirst();
}
@Test
public void testGenotypesToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>();
md5.add("debeaf31846328eddc0abf226fc72ac0");
/**
* the above MD5 was calculated from running the following command:
*
* java -jar ./dist/GenomeAnalysisTK.jar \
* -R /broad/1KG/reference/human_b36_both.fasta \
* -T VariantEval \
* --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod \
* -L 1:10,000,000-11,000,000 \
* --outerr myVariantEval \
* --supressDateInformation \
* --rodBind eval,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls
*
*/
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R /broad/1KG/reference/human_b36_both.fasta" +
" --rodBind NA123AB,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" +
" -T VariantsToVCF" +
" -L 1:10,000,000-11,000,000" +
" --vcfout %s",
1, // just one output file
md5);
List<File> result = executeTest("testVariantsToVCFUsingGeliInput", spec).getFirst();
}
}