From d262cbd41cf92a4dceb5716b20e19d2b741aadaf Mon Sep 17 00:00:00 2001 From: aaron Date: Thu, 24 Sep 2009 15:16:11 +0000 Subject: [PATCH] changes to add VCF to the rod system, fix VCF output in VariantsToVCF, and some other minor changes git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1715 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/refdata/ReferenceOrderedData.java | 2 + .../sting/gatk/refdata/RodVCF.java | 16 +- .../filters/VariantFiltrationWalker.java | 7 +- .../walkers/variantstovcf/VariantsToVCF.java | 153 ++++++++---------- .../sting/utils/genotype/Genotype.java | 2 +- .../utils/genotype/vcf/VCFGenotypeRecord.java | 7 +- .../sting/utils/genotype/vcf/VCFRecord.java | 16 +- .../VariantFiltrationIntegrationTest.java | 18 +-- .../VariantsToVCFIntegrationTest.java | 79 +++++++++ 9 files changed, 192 insertions(+), 108 deletions(-) create mode 100755 java/test/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCFIntegrationTest.java diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java index c6454e91b..b6edddabf 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java @@ -75,6 +75,8 @@ public class ReferenceOrderedData implements addModule("Intervals", IntervalRod.class); addModule("Variants", RodGeliText.class); addModule("GLF", RodGLF.class); + addModule("VCF", RodVCF.class); + } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java index bbbc831ab..64052d097 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java @@ -256,7 +256,21 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, */ @Override public Genotype getCalledGenotype() { - throw new UnsupportedOperationException("We don't support this right now"); + double refQual = (this.getNegLog10PError()); + + if (this.mCurrentRecord != null && this.mCurrentRecord.hasGenotypeData()) { + List lst = this.mCurrentRecord.getVCFGenotypeRecords(); + if (lst.size() != 1) { + throw new IllegalStateException("VCF object does not have one and only one genotype record"); + } + double qual = 0; + if (lst.get(0).getAlleles().equals(this.getReference())) + qual = refQual; + else if (lst.get(0).getFields().containsKey("GQ")) + qual = Double.valueOf(lst.get(0).getFields().get("GQ")) / 10.0; + return new BasicGenotype(this.getLocation(), Utils.join("", lst.get(0).getAlleles()), this.getReference().charAt(0), qual); + } + return null; } /** diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 142fdc94f..0407a604c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -315,10 +315,11 @@ public class VariantFiltrationWalker extends LocusWalker { List gt = new ArrayList(); Map map = new HashMap(); - if ( VariantsToVCF.generateVCFRecord(context.getTracker(), context.getReferenceContext(), context.getAlignmentContext(true), vcfHeader, gt, map, sampleNames, out, false, false) ) { + VCFRecord rec = VariantsToVCF.generateVCFRecord(context.getTracker(), context.getReferenceContext(), context.getAlignmentContext(true), vcfHeader, gt, map, sampleNames, out, false, false); + if ( rec != null) { if ( !filterFailureString.equals("") ) - map.put(VCFHeader.HEADER_FIELDS.FILTER, filterFailureString); - vcfWriter.addRecord(new VCFRecord(map, "GT:GQ:DP", gt)); + rec.setFilterString(filterFailureString); + vcfWriter.addRecord(rec); } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java index 4e8b374f9..4157878af 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java @@ -3,17 +3,24 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantstovcf; import org.broadinstitute.sting.gatk.GATKArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.RodGeliText; +import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; +import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader; import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; -import java.io.*; +import java.io.File; +import java.io.PrintStream; import java.util.*; public class VariantsToVCF extends RefWalker { @@ -24,6 +31,7 @@ public class VariantsToVCF extends RefWalker { private VCFWriter vcfwriter = null; private VCFHeader vcfheader = null; private TreeMap sampleNames = null; + private static String format = "GT:GQ:DP"; public void initialize() { sampleNames = new TreeMap(); @@ -62,7 +70,9 @@ public class VariantsToVCF extends RefWalker { } public boolean filter(RefMetaDataTracker tracker, char ref, AlignmentContext context) { - if (BaseUtils.simpleBaseToBaseIndex(ref) > -1) { return true; } + if (BaseUtils.simpleBaseToBaseIndex(ref) > -1) { + return true; + } for (ReferenceOrderedDatum rod : tracker.getAllRods()) { if (rod != null && sampleNames.keySet().contains(rod.getName().toUpperCase())) { @@ -75,73 +85,73 @@ public class VariantsToVCF extends RefWalker { public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { List gt = new ArrayList(); - Map map = new HashMap(); - if ( generateVCFRecord(tracker, ref, context, vcfheader, gt, map, sampleNames, out, SUPPRESS_MULTISTATE, VERBOSE) ) { - vcfwriter.addRecord(new VCFRecord(map, "GT:GQ:DP", gt)); - //vcfwriter.addRecord(new VCFRecord(vcfheader, map, "GT", gt)); - return 1; + Map map = new HashMap(); + VCFRecord rec = generateVCFRecord(tracker, ref, context, vcfheader, gt, map, sampleNames, out, SUPPRESS_MULTISTATE, VERBOSE); + if (rec != null) { + vcfwriter.addRecord(rec); + return 1; } return 0; } - public static boolean generateVCFRecord(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, - VCFHeader vcfheader, List gt, Map map, - Map sampleNamesToRods, PrintStream out, boolean SUPPRESS_MULTISTATE, boolean VERBOSE) { + public static VCFRecord generateVCFRecord(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, + VCFHeader vcfheader, List gt, Map map, + Map sampleNamesToRods, PrintStream out, boolean SUPPRESS_MULTISTATE, boolean VERBOSE) { int[] alleleCounts = new int[4]; int numSNPs = 0; int numRefs = 0; - int[] alleleNames = { 0, 1, 2, 3 }; + int[] alleleNames = {0, 1, 2, 3}; double snpQual = 0.0; int refbase = BaseUtils.simpleBaseToBaseIndex(ref.getBase()); - + List alts = new ArrayList(); for (String name : vcfheader.getGenotypeSamples()) { ReferenceOrderedDatum rod = tracker.lookup(sampleNamesToRods.get(name), null); if (rod != null) { - AllelicVariant av = (AllelicVariant) rod; - String lod = String.format("%d", av.getVariationConfidence() > 99 ? 99 : (int) av.getVariationConfidence()); + Variation av = (Variation) rod; + String lod = String.format("%d", av.getNegLog10PError() > 99 ? 99 : (int) av.getNegLog10PError()); int depth = 0; if (rod instanceof RodGeliText) { RodGeliText rv = (RodGeliText) rod; depth = rv.depth; } - - Map str = new HashMap(); - if (av.getGenotype().get(0).charAt(0) == av.getGenotype().get(0).charAt(1)) { - str.put("key","1/1:" + lod + (depth > 0 ? ":" + depth : "")); - //str.put("key","1/1"); - } else { - str.put("key","0/1:" + lod + (depth > 0 ? ":" + depth : "")); - //str.put("key","0/1"); + if (!(rod instanceof VariantBackedByGenotype)) + throw new IllegalArgumentException("The passed in variant type must be backed by genotype data"); + Genotype genotype = ((VariantBackedByGenotype) rod).getCalledGenotype(); + List alleles = new ArrayList(); + for (char base : genotype.getBases().toCharArray()) { + alleles.add(String.valueOf(base)); + if (base != ref.getBase() && !alts.contains(String.valueOf(base))) alts.add(String.valueOf(base)); } - - List alleles = av.getGenotype(); + int allele1 = BaseUtils.simpleBaseToBaseIndex(genotype.getBases().charAt(0)); + int allele2 = BaseUtils.simpleBaseToBaseIndex(genotype.getBases().charAt(1)); + if (allele1 >= 0 && allele1 != refbase) { + alleleCounts[allele1]++; + } + if (allele2 >= 0 && allele2 != refbase) { + alleleCounts[allele2]++; + } + Map str = new HashMap(); + str.put("GQ", lod); + if (depth > 0) str.put("DP", String.valueOf(depth)); - int allele1 = BaseUtils.simpleBaseToBaseIndex(alleles.get(0).charAt(0)); - if (allele1 >= 0 && allele1 != refbase) { alleleCounts[allele1]++; } - - int allele2 = BaseUtils.simpleBaseToBaseIndex(alleles.get(0).charAt(1)); - if (allele2 >= 0 && allele2 != refbase) { alleleCounts[allele2]++; } - - gt.add(new VCFGenotypeRecord(name, alleles, VCFGenotypeRecord.PHASE.UNPHASED, str )); + gt.add(new VCFGenotypeRecord(name, alleles, VCFGenotypeRecord.PHASE.UNPHASED, str)); numSNPs++; - snpQual += av.getVariationConfidence(); + snpQual += av.getNegLog10PError(); } else { - Map str = new HashMap(); - str.put("key","0/0"); - + Map str = new HashMap(); List alleles = new ArrayList(); - alleles.add(ref.getBase() + "" + ref.getBase()); + alleles.add(String.valueOf(ref.getBase())); + alleles.add(String.valueOf(ref.getBase())); + gt.add(new VCFGenotypeRecord(name, alleles, VCFGenotypeRecord.PHASE.UNPHASED, str)); - gt.add(new VCFGenotypeRecord(name, alleles, VCFGenotypeRecord.PHASE.UNPHASED, str )); - numRefs++; } } if (numSNPs == 0) - return false; + return null; Integer[] perm = Utils.SortPermutation(alleleCounts); @@ -151,61 +161,38 @@ public class VariantsToVCF extends RefWalker { rodDbSNP dbsnp = (rodDbSNP) tracker.lookup("dbsnp", null); String infoString = String.format("locus=%s ref=%c allele_count=( %c:%d %c:%d %c:%d %c:%d )", - context.getLocation(), - ref.getBase(), - BaseUtils.baseIndexToSimpleBase(sortedNames[0]), sortedCounts[0], - BaseUtils.baseIndexToSimpleBase(sortedNames[1]), sortedCounts[1], - BaseUtils.baseIndexToSimpleBase(sortedNames[2]), sortedCounts[2], - BaseUtils.baseIndexToSimpleBase(sortedNames[3]), sortedCounts[3] + context.getLocation(), + ref.getBase(), + BaseUtils.baseIndexToSimpleBase(sortedNames[0]), sortedCounts[0], + BaseUtils.baseIndexToSimpleBase(sortedNames[1]), sortedCounts[1], + BaseUtils.baseIndexToSimpleBase(sortedNames[2]), sortedCounts[2], + BaseUtils.baseIndexToSimpleBase(sortedNames[3]), sortedCounts[3] ); if (SUPPRESS_MULTISTATE && sortedCounts[2] > 0) { out.println("[multistate] " + infoString); - return false; + return null; } else { if (VERBOSE) { out.println("[locus_info] " + infoString); } } - for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) { - map.put(field,String.valueOf(1)); + Map info = new HashMap(); + if (dbsnp != null) info.put("DB","1"); + if (dbsnp != null && dbsnp.isHapmap()) info.put("H2","1"); - if (field == VCFHeader.HEADER_FIELDS.CHROM) { - map.put(field, context.getContig()); - } else if (field == VCFHeader.HEADER_FIELDS.POS) { - map.put(field, String.valueOf(context.getPosition())); - } else if (field == VCFHeader.HEADER_FIELDS.REF) { - map.put(field, String.valueOf(ref.getBases())); - } else if (field == VCFHeader.HEADER_FIELDS.ALT) { - map.put(field, String.valueOf(BaseUtils.baseIndexToSimpleBase(sortedNames[3]))); - } else if (field == VCFHeader.HEADER_FIELDS.ID) { - map.put(field, (dbsnp == null) ? "." : dbsnp.name); - } else if (field == VCFHeader.HEADER_FIELDS.QUAL) { - map.put(field, String.format("%d", snpQual > 99 ? 99 : (int) snpQual)); - } else if (field == VCFHeader.HEADER_FIELDS.FILTER) { - map.put(field, "0"); - } else if (field == VCFHeader.HEADER_FIELDS.INFO) { - String infostr = "."; - ArrayList info = new ArrayList(); + return new VCFRecord(ref.getBase(), + context.getContig(), + (int) context.getPosition(), + (dbsnp == null) ? "." : dbsnp.name, + alts, + snpQual > 99 ? 99 : (int) snpQual, + "0", + info, + format, + gt); - if (dbsnp != null) { info.add("DB=1"); } - if (dbsnp != null && dbsnp.isHapmap()) { info.add("H2=1"); } - - for (int i = 0; i < info.size(); i++) { - if (i == 0) { infostr = ""; } - - infostr += info.get(i); - - if (i < info.size() - 1) { - infostr += ";"; - } - } - - map.put(field, infostr); - } - } - return true; } public Integer reduceInit() { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java index 980f59eba..cc0ad15be 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java @@ -18,7 +18,7 @@ public interface Genotype { public double getNegLog10PError(); /** - * get the bases that represent this + * get the bases that represent this genotype * * @return the bases, as a string */ diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java index b3c032c42..e6a3399a5 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java @@ -42,9 +42,9 @@ public class VCFGenotypeRecord { */ public VCFGenotypeRecord(String sampleName, List genotypes, PHASE phasing, Map otherFlags) { this.mSampleName = sampleName; - this.mGenotypeAlleles.addAll(genotypes); + if (genotypes != null) this.mGenotypeAlleles.addAll(genotypes); this.mPhaseType = phasing; - this.mFields.putAll(otherFlags); + if (otherFlags != null) this.mFields.putAll(otherFlags); } @@ -104,8 +104,9 @@ public class VCFGenotypeRecord { } first = false; } - } + } return str; + } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java index e7799349d..2682e5a65 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java @@ -3,10 +3,7 @@ package org.broadinstitute.sting.utils.genotype.vcf; import org.broadinstitute.sting.utils.Utils; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; /** the basic VCF record type */ public class VCFRecord { @@ -204,9 +201,7 @@ public class VCFRecord { * @return an array of strings representing the filtering criteria, or null if none were applied */ public String[] getFilteringCodes() { - //if (this.mFilterString.equals("0")) { - // return null; - //} + if (mFilterString == null) return new String[]{"0"}; return this.mFilterString.split(";"); } @@ -220,6 +215,11 @@ public class VCFRecord { * @return a map, of the info key-value pairs */ public Map getInfoValues() { + if (this.mInfoFields.size() < 1) { + Map map = new HashMap(); + map.put(".",""); + return map; + } return this.mInfoFields; } @@ -316,7 +316,7 @@ public class VCFRecord { builder.append(getReferenceBase() + FIELD_SEPERATOR); String alts = ""; for (String str : this.getAlternateAlleles()) alts += str + ","; - builder.append(alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR); + builder.append((alts.length() > 0) ? alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR : "." + FIELD_SEPERATOR); builder.append(getQual() + FIELD_SEPERATOR); builder.append(Utils.join(";", getFilteringCodes()) + FIELD_SEPERATOR); String info = ""; diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index 9ae04eaee..68dae0749 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -8,63 +8,63 @@ import java.util.Arrays; public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testIntervals() { - String[] md5DoC = {"eada262e03738876a2b5b94d56f0951e", "21c8e1f9dc65fdfb39347547f9b04011"}; + String[] md5DoC = {"b222d15b300f989dd2a86ff1f500f64b", "21c8e1f9dc65fdfb39347547f9b04011"}; WalkerTestSpec spec1 = new WalkerTestSpec( "-T VariantFiltration -X DepthOfCoverage:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5DoC)); executeTest("testDoCFilter", spec1); - String[] md5AlleleBalance = {"0ade6c97a46245a65558d115ef1e3956", "a13e4ce6260bf9f33ca99dc808b8e6ad"}; + String[] md5AlleleBalance = {"9a59d33b55e5bad0228f2d2d67d4c17d", "a13e4ce6260bf9f33ca99dc808b8e6ad"}; WalkerTestSpec spec2 = new WalkerTestSpec( "-T VariantFiltration -X AlleleBalance:low=0.25,high=0.75 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5AlleleBalance)); executeTest("testAlleleBalanceFilter", spec2); - String[] md5Strand = {"1a9568e3f7e88483e7fbc5f981a7973c", "0f7db0aad764268ee8fa3b857df8d87d"}; + String[] md5Strand = {"b0a6fb821be2f7b26f8f6d77cbd758a9", "0f7db0aad764268ee8fa3b857df8d87d"}; WalkerTestSpec spec3 = new WalkerTestSpec( "-T VariantFiltration -X FisherStrand:pvalue=0.0001 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5Strand)); executeTest("testStrandFilter", spec3); - String[] md5Lod = {"64bc742d0f44a7279ac6c4ad5c28da28", "7e0c4f2b0fda85fd2891eee76c396a55"}; + String[] md5Lod = {"60624843c4c8ae561acc444df565da99", "7e0c4f2b0fda85fd2891eee76c396a55"}; WalkerTestSpec spec4 = new WalkerTestSpec( "-T VariantFiltration -X LodThreshold:lod=10 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5Lod)); executeTest("testLodFilter", spec4); - String[] md5MQ0 = {"1201176efca694e2566a9de1508517b8", "3203de335621851bccf596242b079e23"}; + String[] md5MQ0 = {"5e3d4d6b13e79a5df5171d3e5a9f1bd7", "3203de335621851bccf596242b079e23"}; WalkerTestSpec spec5 = new WalkerTestSpec( "-T VariantFiltration -X MappingQualityZero:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5MQ0)); executeTest("testMappingQuality0Filter", spec5); - String[] md5MQ = {"ed922e58c856e1bc3f125b635dea48fc", "ecc777feedea61f7b570d114c2ab89b1"}; + String[] md5MQ = {"fdbac9cf332dd45d9c92146157ace65f", "ecc777feedea61f7b570d114c2ab89b1"}; WalkerTestSpec spec6 = new WalkerTestSpec( "-T VariantFiltration -X MappingQuality:min=20 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5MQ)); executeTest("testRMSMappingQualityFilter", spec6); - String[] md5OnOff = {"8a00c5ad80c43e468186ce0147553048", "67f2e1bc025833b0fa31f47195198997"}; + String[] md5OnOff = {"57c5a92bde03adbff9c6ca6eada033c4", "67f2e1bc025833b0fa31f47195198997"}; WalkerTestSpec spec7 = new WalkerTestSpec( "-T VariantFiltration -X OnOffGenotypeRatio:threshold=0.9 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5OnOff)); executeTest("testOnOffGenotypeFilter", spec7); - String[] md5Clusters = {"ee9d8039490354cc7f2c574b5d10c7d8", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"}; + String[] md5Clusters = {"44223fa50dac2d9c1096558689cb8493", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"}; WalkerTestSpec spec8 = new WalkerTestSpec( "-T VariantFiltration -X ClusteredSnps:window=10,snps=3 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5Clusters)); executeTest("testClusteredSnpsFilter", spec8); - String[] md5Indels = {"90eb46f8d2bf4c0a72716e35c914839d", "8e0e915a1cb63d7049e0671ed00101fe"}; + String[] md5Indels = {"0f03727ac9e6fc43311377b29d12596c", "8e0e915a1cb63d7049e0671ed00101fe"}; WalkerTestSpec spec9 = new WalkerTestSpec( "-T VariantFiltration -X IndelArtifact -B indels,PointIndel,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.indels -B cleaned,CleanedOutSNP,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.realigner_badsnps -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCFIntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCFIntegrationTest.java new file mode 100755 index 000000000..425ff9c45 --- /dev/null +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCFIntegrationTest.java @@ -0,0 +1,79 @@ +package org.broadinstitute.sting.playground.gatk.walkers.variantstovcf; + +import org.broadinstitute.sting.WalkerTest; +import org.junit.Test; + +import java.util.List; +import java.util.ArrayList; +import java.io.File; + + +/** + * @author aaron + *

+ * Class VariantsToVCFIntegrationTest + *

+ * test(s) for the VariantsToVCF walker. + */ +public class VariantsToVCFIntegrationTest extends WalkerTest { + + + @Test + public void testVariantsToVCFUsingGeliInput() { + List md5 = new ArrayList(); + md5.add("d1882fd8ecee6a95f561ed3be4d4a435"); + + /** + * the above MD5 was calculated from running the following command: + * + * java -jar ./dist/GenomeAnalysisTK.jar \ + * -R /broad/1KG/reference/human_b36_both.fasta \ + * -T VariantEval \ + * --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod \ + * -L 1:10,000,000-11,000,000 \ + * --outerr myVariantEval \ + * --supressDateInformation \ + * --rodBind eval,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls + * + */ + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R /broad/1KG/reference/human_b36_both.fasta" + + " --rodBind NA123AB,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" + + " -T VariantsToVCF" + + " -L 1:10,000,000-11,000,000" + + " --vcfout %s", + 1, // just one output file + md5); + List result = executeTest("testVariantsToVCFUsingGeliInput", spec).getFirst(); + } + + @Test + public void testGenotypesToVCFUsingGeliInput() { + List md5 = new ArrayList(); + md5.add("debeaf31846328eddc0abf226fc72ac0"); + + /** + * the above MD5 was calculated from running the following command: + * + * java -jar ./dist/GenomeAnalysisTK.jar \ + * -R /broad/1KG/reference/human_b36_both.fasta \ + * -T VariantEval \ + * --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod \ + * -L 1:10,000,000-11,000,000 \ + * --outerr myVariantEval \ + * --supressDateInformation \ + * --rodBind eval,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls + * + */ + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R /broad/1KG/reference/human_b36_both.fasta" + + " --rodBind NA123AB,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" + + " -T VariantsToVCF" + + " -L 1:10,000,000-11,000,000" + + " --vcfout %s", + 1, // just one output file + md5); + List result = executeTest("testVariantsToVCFUsingGeliInput", spec).getFirst(); + } + +}