diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java index df73db2fe..3b6d1d76c 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java @@ -59,7 +59,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, @Override public String toString() { if (this.mCurrentRecord != null) - return this.mCurrentRecord.toString(); + return this.mCurrentRecord.toStringRepresentation(mReader.getHeader()); else return ""; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index 98b89a0ec..0a9025bd4 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -1,7 +1,5 @@ package org.broadinstitute.sting.utils.genotype.vcf; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.genotype.*; import java.io.File; @@ -119,51 +117,35 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { lazyInitialize(genotypes, mFile, mStream); - VCFParamters params = new VCFParamters(); + VCFParameters params = new VCFParameters(); params.addFormatItem("GT"); for (Genotype gtype : genotypes) { // setup the parameters params.setLocations(gtype.getLocation(), gtype.getReference()); - Map map = new HashMap(); - if (!(gtype instanceof SampleBacked)) { - throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface"); - } - - // calculate the RMS mapping qualities and the read depth - if (gtype instanceof ReadBacked) { - int readDepth = ((ReadBacked) gtype).getReadCount(); - map.put("RD", String.valueOf(readDepth)); - params.addFormatItem("RD"); - } - double qual = gtype.getNegLog10PError(); - map.put("GQ", String.format("%.2f", qual)); - params.addFormatItem("GQ"); - - List alleles = new ArrayList(); - for (char allele : gtype.getBases().toCharArray()) { - alleles.add(String.valueOf(allele)); - params.addAlternateBase(allele); - } - - // TODO -- use the GenotypeMetaData object if it's not null - - VCFGenotypeRecord record = new VCFGenotypeRecord(((SampleBacked) gtype).getSampleName(), - alleles, - VCFGenotypeRecord.PHASE.UNPHASED, - map); + VCFGenotypeRecord record = createVCFGenotypeRecord(params, gtype); params.addGenotypeRecord(record); } - Map infoFields = new HashMap(); + Map infoFields = getInfoFields(metadata, params); + + double qual = (metadata == null) ? 0 : (metadata.getLOD()) * 10; + + /** + * TODO: Eric fix the next line when our LOD scores are 0->Inf based instead + * of -3 to Inf based. + */ + if (qual < 0.0) { + qual = 0.0; + } VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(), params.getContig(), params.getPosition(), ".", params.getAlternateBases(), - 0, /* BETTER VALUE HERE */ + qual, ".", infoFields, params.getFormatString(), @@ -172,85 +154,79 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { mWriter.addRecord(vcfRecord); } + /** + * get the information fields of the VCF record, given the meta data and parameters + * + * @param metadata the metadata associated with this multi sample call + * @param params the parameters + * + * @return a mapping of info field to value + */ + private Map getInfoFields(GenotypeMetaData metadata, VCFParameters params) { + Map infoFields = new HashMap(); + if (metadata != null) { + infoFields.put("SB", String.format("%.2f", metadata.getSLOD())); + infoFields.put("AF", String.format("%.2f", metadata.getAlleleFrequency())); + } + infoFields.put("NS", String.valueOf(params.getGenotypesRecords().size())); + return infoFields; + } + + /** + * create the VCF genotype record + * + * @param params the VCF parameters object + * @param gtype the genotype + * + * @return a VCFGenotypeRecord + */ + private VCFGenotypeRecord createVCFGenotypeRecord(VCFParameters params, Genotype gtype) { + Map map = new HashMap(); + if (!(gtype instanceof SampleBacked)) { + throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface"); + } + + // calculate the RMS mapping qualities and the read depth + if (gtype instanceof ReadBacked) { + int readDepth = ((ReadBacked) gtype).getReadCount(); + map.put("RD", String.valueOf(readDepth)); + params.addFormatItem("RD"); + } + double qual = gtype.getNegLog10PError(); + map.put("GQ", String.format("%.2f", qual)); + params.addFormatItem("GQ"); + + List alleles = createAlleleArray(gtype); + for (String allele : alleles) { + params.addAlternateBase(allele); + } + + VCFGenotypeRecord record = new VCFGenotypeRecord(((SampleBacked) gtype).getSampleName(), + alleles, + VCFGenotypeRecord.PHASE.UNPHASED, + map); + return record; + } + + /** + * create the allele array? + * + * @param gtype the gentoype object + * + * @return a list of string representing the string array of alleles + */ + private List createAlleleArray(Genotype gtype) { + List alleles = new ArrayList(); + for (char allele : gtype.getBases().toCharArray()) { + alleles.add(String.valueOf(allele)); + } + return alleles; + } + /** @return true if we support multisample, false otherwise */ @Override public boolean supportsMultiSample() { return true; } - - /** - * a helper class, which performs a lot of the safety checks on the parameters - * we feed to the VCF (like ensuring the same position for each genotype in a call). - */ - class VCFParamters { - private char referenceBase = '0'; - private int position = 0; - private String contig = null; - private boolean initialized = false; - private List genotypesRecord = new ArrayList(); - private List formatList = new ArrayList(); - private List alternateBases = new ArrayList(); - - public void setLocations(GenomeLoc location, char refBase) { - // if we haven't set it up, we initialize the object - if (!initialized) { - initialized = true; - this.contig = location.getContig(); - this.position = (int)location.getStart(); - if (location.getStart() != location.getStop()) { - throw new IllegalArgumentException("The start and stop locations must be the same"); - } - this.referenceBase = refBase; - } else { - if (!contig.equals(this.contig)) - throw new IllegalArgumentException("The contig name has to be the same at a single locus"); - if (position != this.position) - throw new IllegalArgumentException("The position has to be the same at a single locus"); - if (refBase != this.referenceBase) - throw new IllegalArgumentException("The reference base name has to be the same at a single locus"); - } - } - - /** @return get the position */ - public int getPosition() { - return position; - } - - /** @return get the contig name */ - public String getContig() { - return contig; - } - - /** @return get the reference base */ - public char getReferenceBase() { - return referenceBase; - } - - public void addGenotypeRecord(VCFGenotypeRecord record) { - this.genotypesRecord.add(record); - } - - public void addFormatItem(String item) { - if (!formatList.contains(item)) - formatList.add(item); - } - - public void addAlternateBase(char base) { - if (!alternateBases.contains(String.valueOf(base)) && base != this.getReferenceBase()) - alternateBases.add(String.valueOf(base)); - } - - public List getAlternateBases() { - return alternateBases; - } - - public String getFormatString() { - return Utils.join(";", formatList); - } - - public List getGenotypesRecords() { - return genotypesRecord; - } - } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java new file mode 100644 index 000000000..01832b5e2 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java @@ -0,0 +1,83 @@ +package org.broadinstitute.sting.utils.genotype.vcf; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; + +import java.util.List; +import java.util.ArrayList; + + +/** + * a helper class, which performs a lot of the safety checks on the parameters + * we feed to the VCF (like ensuring the same position for each genotype in a call). + */ +class VCFParameters { + private char referenceBase = '0'; + private int position = 0; + private String contig = null; + private boolean initialized = false; + private List genotypesRecord = new ArrayList(); + private List formatList = new ArrayList(); + private List alternateBases = new ArrayList(); + + public void setLocations(GenomeLoc location, char refBase) { + // if we haven't set it up, we initialize the object + if (!initialized) { + initialized = true; + this.contig = location.getContig(); + this.position = (int) location.getStart(); + if (location.getStart() != location.getStop()) { + throw new IllegalArgumentException("The start and stop locations must be the same"); + } + this.referenceBase = refBase; + } else { + if (!contig.equals(this.contig)) + throw new IllegalArgumentException("The contig name has to be the same at a single locus"); + if (position != this.position) + throw new IllegalArgumentException("The position has to be the same at a single locus"); + if (refBase != this.referenceBase) + throw new IllegalArgumentException("The reference base name has to be the same at a single locus"); + } + } + + /** @return get the position */ + public int getPosition() { + return position; + } + + /** @return get the contig name */ + public String getContig() { + return contig; + } + + /** @return get the reference base */ + public char getReferenceBase() { + return referenceBase; + } + + public void addGenotypeRecord(VCFGenotypeRecord record) { + this.genotypesRecord.add(record); + } + + public void addFormatItem(String item) { + if (!formatList.contains(item)) + formatList.add(item); + } + + public void addAlternateBase(String base) { + if (!alternateBases.contains(String.valueOf(base)) && base != String.valueOf(this.getReferenceBase())) + alternateBases.add(base); + } + + public List getAlternateBases() { + return alternateBases; + } + + public String getFormatString() { + return Utils.join(";", formatList); + } + + public List getGenotypesRecords() { + return genotypesRecord; + } +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java index f2b1d65f7..0ed8c1ca1 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java @@ -19,7 +19,7 @@ public class VCFRecord { // the alternate bases private final List mAlts = new ArrayList(); // our qual value - private int mQual; + private double mQual; // our filter string private String mFilterString; // our info fields @@ -61,7 +61,7 @@ public class VCFRecord { int position, String ID, List altBases, - int qual, + double qual, String filters, Map infoFields, String genotypeFormatString, @@ -117,7 +117,7 @@ public class VCFRecord { addAlternateBase(alt); break; case QUAL: - this.setQual(Integer.valueOf(columnValues.get(val))); + this.setQual(Double.valueOf(columnValues.get(val))); break; case FILTER: this.setFilterString(columnValues.get(val)); @@ -191,7 +191,7 @@ public class VCFRecord { } /** @return the phred-scaled quality score */ - public int getQual() { + public double getQual() { return this.mQual; } @@ -273,7 +273,7 @@ public class VCFRecord { this.mID = mID; } - public void setQual(int mQual) { + public void setQual(double mQual) { if (mQual < 0) throw new IllegalArgumentException("Qual values must be greater than 0"); this.mQual = mQual; @@ -307,12 +307,12 @@ public class VCFRecord { /** * the generation of a string representation, which is used by the VCF writer + * * @return a string */ - public String toString() { + public String toStringRepresentation(VCFHeader header) { StringBuilder builder = new StringBuilder(); - // else builder.append(FIELD_SEPERATOR + record.getValue(field)); // CHROM \t POS \t ID \t REF \t ALT \t QUAL \t FILTER \t INFO builder.append(getChromosome() + FIELD_SEPERATOR); builder.append(getPosition() + FIELD_SEPERATOR); @@ -321,7 +321,7 @@ public class VCFRecord { String alts = ""; for (String str : this.getAlternateAlleles()) alts += str + ","; builder.append((alts.length() > 0) ? alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR : "." + FIELD_SEPERATOR); - builder.append(getQual() + FIELD_SEPERATOR); + builder.append(String.format("%.2f",getQual()) + FIELD_SEPERATOR); builder.append(Utils.join(";", getFilteringCodes()) + FIELD_SEPERATOR); String info = ""; for (String str : this.getInfoValues().keySet()) { @@ -335,9 +335,30 @@ public class VCFRecord { else builder.append(info); if (this.hasGenotypeData()) { - builder.append(FIELD_SEPERATOR + this.getGenotypeFormatString()); - for (VCFGenotypeRecord rec : this.getVCFGenotypeRecords()) { - builder.append(FIELD_SEPERATOR); + addGenotypeData(builder, header); + } + return builder.toString(); + } + + /** + * add the genotype data + * + * @param builder the string builder + * @param header the header object + */ + private void addGenotypeData(StringBuilder builder, VCFHeader header) { + builder.append(FIELD_SEPERATOR + this.getGenotypeFormatString()); + if (header.getGenotypeSamples().size() < getVCFGenotypeRecords().size()) + throw new RuntimeException("We have more genotype samples than the header specified"); + + Map gMap = genotypeListToMap(getVCFGenotypeRecords()); + + for (String genotype : header.getGenotypeSamples()) { + + builder.append(FIELD_SEPERATOR); + + if (gMap.containsKey(genotype)) { + VCFGenotypeRecord rec = gMap.get(genotype); if (!rec.toGenotypeString(this.mAlts).equals("")) builder.append(rec.toGenotypeString(this.mAlts)); for (String s : rec.getFields().keySet()) { @@ -345,9 +366,14 @@ public class VCFRecord { builder.append(":"); builder.append(rec.getFields().get(s)); } + gMap.remove(genotype); + } else { + builder.append("."); } } - return builder.toString(); + if (gMap.size() != 0) { + throw new RuntimeException("We failed to use all the genotype samples; their must be an incosistancy between the header and records"); + } } /** @@ -370,4 +396,19 @@ public class VCFRecord { return true; } + /** + * create a genotype mapping from a list and their sample names + * + * @param list a list of genotype samples + * + * @return a mapping of the sample name to VCF genotype record + */ + private static Map genotypeListToMap(List list) { + Map map = new HashMap(); + for (VCFGenotypeRecord rec : list) { + map.put(rec.getSampleName(), rec); + } + return map; + } + } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java index a1467ccfa..3a177a272 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java @@ -1,10 +1,7 @@ package org.broadinstitute.sting.utils.genotype.vcf; -import org.broadinstitute.sting.utils.StingException; - import java.io.*; -import java.nio.charset.Charset; /** * this class writers VCF files @@ -75,11 +72,7 @@ public class VCFWriter { * @param record the record to output */ public void addRecord(VCFRecord record) { - if (record.getColumnCount() != mHeader.getGenotypeSamples().size() + mHeader.getHeaderFields().size()) { - throw new RuntimeException("Record has " + record.getColumnCount() + - " columns, when is should have " + mHeader.getColumnCount()); - } - String vcfString = record.toString(); + String vcfString = record.toStringRepresentation(mHeader); try { mWriter.write(vcfString + "\n"); } catch (IOException e) { diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/RodVCFTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/RodVCFTest.java index feb6a1943..9a90fe51e 100755 --- a/java/test/org/broadinstitute/sting/gatk/refdata/RodVCFTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/RodVCFTest.java @@ -33,7 +33,7 @@ public class RodVCFTest extends BaseTest { private static IndexedFastaSequenceFile seq; private static File vcfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample.vcf"); - + private VCFHeader mHeader; @BeforeClass public static void beforeTests() { try { @@ -47,13 +47,13 @@ public class RodVCFTest extends BaseTest { private RodVCF getVCFObject() { RodVCF vcf = new RodVCF("VCF"); - VCFHeader header = null; + mHeader = null; try { - header = (VCFHeader) vcf.initialize(vcfFile); + mHeader = (VCFHeader) vcf.initialize(vcfFile); } catch (FileNotFoundException e) { fail("Unable to open VCF file"); } - header.checkVCFVersion(); + mHeader.checkVCFVersion(); return vcf; } @@ -91,7 +91,7 @@ public class RodVCFTest extends BaseTest { @Test public void testToString() { // slightly altered line, due to map ordering - String firstLine = "20\t14370\trs6054257\tG\tA\t29\t0\tDP=258;AF=0.786;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5\n"; + String firstLine = "20\t14370\trs6054257\tG\tA\t29.00\t0\tDP=258;AF=0.786;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5\n"; RodVCF vcf = getVCFObject(); VCFReader reader = new VCFReader(vcfFile); Iterator iter = vcf.createIterator("VCF", vcfFile); @@ -99,13 +99,13 @@ public class RodVCFTest extends BaseTest { while (iter.hasNext()) { VCFRecord rec1 = reader.next(); VCFRecord rec2 = iter.next().mCurrentRecord; - if (!rec1.toString().equals(rec2.toString())) { + if (!rec1.toStringRepresentation(mHeader).equals(rec2.toStringRepresentation(mHeader))) { fail("VCF record rec1.toString() != rec2.toString()"); } // verify the first line too if (first) { - if (!firstLine.equals(rec1.toString() + "\n")) { - fail("VCF record rec1.toString() != expected string :\n" + rec1.toString() + firstLine); + if (!firstLine.equals(rec1.toStringRepresentation(mHeader) + "\n")) { + fail("VCF record rec1.toString() != expected string :\n" + rec1.toStringRepresentation(mHeader) + firstLine); } first = false; } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index 68dae0749..a36041601 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -8,63 +8,63 @@ import java.util.Arrays; public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testIntervals() { - String[] md5DoC = {"b222d15b300f989dd2a86ff1f500f64b", "21c8e1f9dc65fdfb39347547f9b04011"}; + String[] md5DoC = {"c0a7e2fc07d565e633b3064f9f3cdaf5", "21c8e1f9dc65fdfb39347547f9b04011"}; WalkerTestSpec spec1 = new WalkerTestSpec( "-T VariantFiltration -X DepthOfCoverage:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5DoC)); executeTest("testDoCFilter", spec1); - String[] md5AlleleBalance = {"9a59d33b55e5bad0228f2d2d67d4c17d", "a13e4ce6260bf9f33ca99dc808b8e6ad"}; + String[] md5AlleleBalance = {"aa0f7800cfd346236620ae0eac220817", "a13e4ce6260bf9f33ca99dc808b8e6ad"}; WalkerTestSpec spec2 = new WalkerTestSpec( "-T VariantFiltration -X AlleleBalance:low=0.25,high=0.75 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5AlleleBalance)); executeTest("testAlleleBalanceFilter", spec2); - String[] md5Strand = {"b0a6fb821be2f7b26f8f6d77cbd758a9", "0f7db0aad764268ee8fa3b857df8d87d"}; + String[] md5Strand = {"9f430f251dbeb58a2f80a1306a5dd492", "0f7db0aad764268ee8fa3b857df8d87d"}; WalkerTestSpec spec3 = new WalkerTestSpec( "-T VariantFiltration -X FisherStrand:pvalue=0.0001 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5Strand)); executeTest("testStrandFilter", spec3); - String[] md5Lod = {"60624843c4c8ae561acc444df565da99", "7e0c4f2b0fda85fd2891eee76c396a55"}; + String[] md5Lod = {"56177258c0b3944c043f86faee4b42ae", "7e0c4f2b0fda85fd2891eee76c396a55"}; WalkerTestSpec spec4 = new WalkerTestSpec( "-T VariantFiltration -X LodThreshold:lod=10 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5Lod)); executeTest("testLodFilter", spec4); - String[] md5MQ0 = {"5e3d4d6b13e79a5df5171d3e5a9f1bd7", "3203de335621851bccf596242b079e23"}; + String[] md5MQ0 = {"0e303c32f5c1503f4c875771f28fc46c", "3203de335621851bccf596242b079e23"}; WalkerTestSpec spec5 = new WalkerTestSpec( "-T VariantFiltration -X MappingQualityZero:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5MQ0)); executeTest("testMappingQuality0Filter", spec5); - String[] md5MQ = {"fdbac9cf332dd45d9c92146157ace65f", "ecc777feedea61f7b570d114c2ab89b1"}; + String[] md5MQ = {"946462a6199e9453784e0942e18e6830", "ecc777feedea61f7b570d114c2ab89b1"}; WalkerTestSpec spec6 = new WalkerTestSpec( "-T VariantFiltration -X MappingQuality:min=20 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5MQ)); executeTest("testRMSMappingQualityFilter", spec6); - String[] md5OnOff = {"57c5a92bde03adbff9c6ca6eada033c4", "67f2e1bc025833b0fa31f47195198997"}; + String[] md5OnOff = {"2ff84e104ce73e347e55d272170b4d03", "67f2e1bc025833b0fa31f47195198997"}; WalkerTestSpec spec7 = new WalkerTestSpec( "-T VariantFiltration -X OnOffGenotypeRatio:threshold=0.9 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5OnOff)); executeTest("testOnOffGenotypeFilter", spec7); - String[] md5Clusters = {"44223fa50dac2d9c1096558689cb8493", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"}; + String[] md5Clusters = {"e6a1c088678b1c31ff340ebd622b476e", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"}; WalkerTestSpec spec8 = new WalkerTestSpec( "-T VariantFiltration -X ClusteredSnps:window=10,snps=3 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, Arrays.asList(md5Clusters)); executeTest("testClusteredSnpsFilter", spec8); - String[] md5Indels = {"0f03727ac9e6fc43311377b29d12596c", "8e0e915a1cb63d7049e0671ed00101fe"}; + String[] md5Indels = {"82e555b76c12474154f8e5e402516d73", "8e0e915a1cb63d7049e0671ed00101fe"}; WalkerTestSpec spec9 = new WalkerTestSpec( "-T VariantFiltration -X IndelArtifact -B indels,PointIndel,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.indels -B cleaned,CleanedOutSNP,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.realigner_badsnps -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", 2, diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCFIntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCFIntegrationTest.java index 425ff9c45..f8e94f0ed 100755 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCFIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCFIntegrationTest.java @@ -21,7 +21,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { @Test public void testVariantsToVCFUsingGeliInput() { List md5 = new ArrayList(); - md5.add("d1882fd8ecee6a95f561ed3be4d4a435"); + md5.add("0b96a8046d2a06bd87f57df8bac1678d"); /** * the above MD5 was calculated from running the following command: @@ -50,7 +50,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { @Test public void testGenotypesToVCFUsingGeliInput() { List md5 = new ArrayList(); - md5.add("debeaf31846328eddc0abf226fc72ac0"); + md5.add("09660faa7cfad8af36602f79461c0605"); /** * the above MD5 was calculated from running the following command: