changes to VCF output, and updated MD5's in the integration tests

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1836 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-10-14 18:42:48 +00:00
parent 89771fef05
commit eb90e5c4d7
8 changed files with 240 additions and 147 deletions

View File

@ -59,7 +59,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
@Override
public String toString() {
if (this.mCurrentRecord != null)
return this.mCurrentRecord.toString();
return this.mCurrentRecord.toStringRepresentation(mReader.getHeader());
else
return "";
}

View File

@ -1,7 +1,5 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.*;
import java.io.File;
@ -119,51 +117,35 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
lazyInitialize(genotypes, mFile, mStream);
VCFParamters params = new VCFParamters();
VCFParameters params = new VCFParameters();
params.addFormatItem("GT");
for (Genotype gtype : genotypes) {
// setup the parameters
params.setLocations(gtype.getLocation(), gtype.getReference());
Map<String, String> map = new HashMap<String, String>();
if (!(gtype instanceof SampleBacked)) {
throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface");
}
// calculate the RMS mapping qualities and the read depth
if (gtype instanceof ReadBacked) {
int readDepth = ((ReadBacked) gtype).getReadCount();
map.put("RD", String.valueOf(readDepth));
params.addFormatItem("RD");
}
double qual = gtype.getNegLog10PError();
map.put("GQ", String.format("%.2f", qual));
params.addFormatItem("GQ");
List<String> alleles = new ArrayList<String>();
for (char allele : gtype.getBases().toCharArray()) {
alleles.add(String.valueOf(allele));
params.addAlternateBase(allele);
}
// TODO -- use the GenotypeMetaData object if it's not null
VCFGenotypeRecord record = new VCFGenotypeRecord(((SampleBacked) gtype).getSampleName(),
alleles,
VCFGenotypeRecord.PHASE.UNPHASED,
map);
VCFGenotypeRecord record = createVCFGenotypeRecord(params, gtype);
params.addGenotypeRecord(record);
}
Map<String, String> infoFields = new HashMap<String, String>();
Map<String, String> infoFields = getInfoFields(metadata, params);
double qual = (metadata == null) ? 0 : (metadata.getLOD()) * 10;
/**
* TODO: Eric fix the next line when our LOD scores are 0->Inf based instead
* of -3 to Inf based.
*/
if (qual < 0.0) {
qual = 0.0;
}
VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(),
params.getContig(),
params.getPosition(),
".",
params.getAlternateBases(),
0, /* BETTER VALUE HERE */
qual,
".",
infoFields,
params.getFormatString(),
@ -172,85 +154,79 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
mWriter.addRecord(vcfRecord);
}
/**
* get the information fields of the VCF record, given the meta data and parameters
*
* @param metadata the metadata associated with this multi sample call
* @param params the parameters
*
* @return a mapping of info field to value
*/
private Map<String, String> getInfoFields(GenotypeMetaData metadata, VCFParameters params) {
Map<String, String> infoFields = new HashMap<String, String>();
if (metadata != null) {
infoFields.put("SB", String.format("%.2f", metadata.getSLOD()));
infoFields.put("AF", String.format("%.2f", metadata.getAlleleFrequency()));
}
infoFields.put("NS", String.valueOf(params.getGenotypesRecords().size()));
return infoFields;
}
/**
* create the VCF genotype record
*
* @param params the VCF parameters object
* @param gtype the genotype
*
* @return a VCFGenotypeRecord
*/
private VCFGenotypeRecord createVCFGenotypeRecord(VCFParameters params, Genotype gtype) {
Map<String, String> map = new HashMap<String, String>();
if (!(gtype instanceof SampleBacked)) {
throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface");
}
// calculate the RMS mapping qualities and the read depth
if (gtype instanceof ReadBacked) {
int readDepth = ((ReadBacked) gtype).getReadCount();
map.put("RD", String.valueOf(readDepth));
params.addFormatItem("RD");
}
double qual = gtype.getNegLog10PError();
map.put("GQ", String.format("%.2f", qual));
params.addFormatItem("GQ");
List<String> alleles = createAlleleArray(gtype);
for (String allele : alleles) {
params.addAlternateBase(allele);
}
VCFGenotypeRecord record = new VCFGenotypeRecord(((SampleBacked) gtype).getSampleName(),
alleles,
VCFGenotypeRecord.PHASE.UNPHASED,
map);
return record;
}
/**
* create the allele array?
*
* @param gtype the gentoype object
*
* @return a list of string representing the string array of alleles
*/
private List<String> createAlleleArray(Genotype gtype) {
List<String> alleles = new ArrayList<String>();
for (char allele : gtype.getBases().toCharArray()) {
alleles.add(String.valueOf(allele));
}
return alleles;
}
/** @return true if we support multisample, false otherwise */
@Override
public boolean supportsMultiSample() {
return true;
}
/**
* a helper class, which performs a lot of the safety checks on the parameters
* we feed to the VCF (like ensuring the same position for each genotype in a call).
*/
class VCFParamters {
private char referenceBase = '0';
private int position = 0;
private String contig = null;
private boolean initialized = false;
private List<VCFGenotypeRecord> genotypesRecord = new ArrayList<VCFGenotypeRecord>();
private List<String> formatList = new ArrayList<String>();
private List<String> alternateBases = new ArrayList<String>();
public void setLocations(GenomeLoc location, char refBase) {
// if we haven't set it up, we initialize the object
if (!initialized) {
initialized = true;
this.contig = location.getContig();
this.position = (int)location.getStart();
if (location.getStart() != location.getStop()) {
throw new IllegalArgumentException("The start and stop locations must be the same");
}
this.referenceBase = refBase;
} else {
if (!contig.equals(this.contig))
throw new IllegalArgumentException("The contig name has to be the same at a single locus");
if (position != this.position)
throw new IllegalArgumentException("The position has to be the same at a single locus");
if (refBase != this.referenceBase)
throw new IllegalArgumentException("The reference base name has to be the same at a single locus");
}
}
/** @return get the position */
public int getPosition() {
return position;
}
/** @return get the contig name */
public String getContig() {
return contig;
}
/** @return get the reference base */
public char getReferenceBase() {
return referenceBase;
}
public void addGenotypeRecord(VCFGenotypeRecord record) {
this.genotypesRecord.add(record);
}
public void addFormatItem(String item) {
if (!formatList.contains(item))
formatList.add(item);
}
public void addAlternateBase(char base) {
if (!alternateBases.contains(String.valueOf(base)) && base != this.getReferenceBase())
alternateBases.add(String.valueOf(base));
}
public List<String> getAlternateBases() {
return alternateBases;
}
public String getFormatString() {
return Utils.join(";", formatList);
}
public List<VCFGenotypeRecord> getGenotypesRecords() {
return genotypesRecord;
}
}
}

View File

@ -0,0 +1,83 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import java.util.List;
import java.util.ArrayList;
/**
* a helper class, which performs a lot of the safety checks on the parameters
* we feed to the VCF (like ensuring the same position for each genotype in a call).
*/
class VCFParameters {
private char referenceBase = '0';
private int position = 0;
private String contig = null;
private boolean initialized = false;
private List<VCFGenotypeRecord> genotypesRecord = new ArrayList<VCFGenotypeRecord>();
private List<String> formatList = new ArrayList<String>();
private List<String> alternateBases = new ArrayList<String>();
public void setLocations(GenomeLoc location, char refBase) {
// if we haven't set it up, we initialize the object
if (!initialized) {
initialized = true;
this.contig = location.getContig();
this.position = (int) location.getStart();
if (location.getStart() != location.getStop()) {
throw new IllegalArgumentException("The start and stop locations must be the same");
}
this.referenceBase = refBase;
} else {
if (!contig.equals(this.contig))
throw new IllegalArgumentException("The contig name has to be the same at a single locus");
if (position != this.position)
throw new IllegalArgumentException("The position has to be the same at a single locus");
if (refBase != this.referenceBase)
throw new IllegalArgumentException("The reference base name has to be the same at a single locus");
}
}
/** @return get the position */
public int getPosition() {
return position;
}
/** @return get the contig name */
public String getContig() {
return contig;
}
/** @return get the reference base */
public char getReferenceBase() {
return referenceBase;
}
public void addGenotypeRecord(VCFGenotypeRecord record) {
this.genotypesRecord.add(record);
}
public void addFormatItem(String item) {
if (!formatList.contains(item))
formatList.add(item);
}
public void addAlternateBase(String base) {
if (!alternateBases.contains(String.valueOf(base)) && base != String.valueOf(this.getReferenceBase()))
alternateBases.add(base);
}
public List<String> getAlternateBases() {
return alternateBases;
}
public String getFormatString() {
return Utils.join(";", formatList);
}
public List<VCFGenotypeRecord> getGenotypesRecords() {
return genotypesRecord;
}
}

View File

@ -19,7 +19,7 @@ public class VCFRecord {
// the alternate bases
private final List<String> mAlts = new ArrayList<String>();
// our qual value
private int mQual;
private double mQual;
// our filter string
private String mFilterString;
// our info fields
@ -61,7 +61,7 @@ public class VCFRecord {
int position,
String ID,
List<String> altBases,
int qual,
double qual,
String filters,
Map<String, String> infoFields,
String genotypeFormatString,
@ -117,7 +117,7 @@ public class VCFRecord {
addAlternateBase(alt);
break;
case QUAL:
this.setQual(Integer.valueOf(columnValues.get(val)));
this.setQual(Double.valueOf(columnValues.get(val)));
break;
case FILTER:
this.setFilterString(columnValues.get(val));
@ -191,7 +191,7 @@ public class VCFRecord {
}
/** @return the phred-scaled quality score */
public int getQual() {
public double getQual() {
return this.mQual;
}
@ -273,7 +273,7 @@ public class VCFRecord {
this.mID = mID;
}
public void setQual(int mQual) {
public void setQual(double mQual) {
if (mQual < 0)
throw new IllegalArgumentException("Qual values must be greater than 0");
this.mQual = mQual;
@ -307,12 +307,12 @@ public class VCFRecord {
/**
* the generation of a string representation, which is used by the VCF writer
*
* @return a string
*/
public String toString() {
public String toStringRepresentation(VCFHeader header) {
StringBuilder builder = new StringBuilder();
// else builder.append(FIELD_SEPERATOR + record.getValue(field));
// CHROM \t POS \t ID \t REF \t ALT \t QUAL \t FILTER \t INFO
builder.append(getChromosome() + FIELD_SEPERATOR);
builder.append(getPosition() + FIELD_SEPERATOR);
@ -321,7 +321,7 @@ public class VCFRecord {
String alts = "";
for (String str : this.getAlternateAlleles()) alts += str + ",";
builder.append((alts.length() > 0) ? alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR : "." + FIELD_SEPERATOR);
builder.append(getQual() + FIELD_SEPERATOR);
builder.append(String.format("%.2f",getQual()) + FIELD_SEPERATOR);
builder.append(Utils.join(";", getFilteringCodes()) + FIELD_SEPERATOR);
String info = "";
for (String str : this.getInfoValues().keySet()) {
@ -335,9 +335,30 @@ public class VCFRecord {
else builder.append(info);
if (this.hasGenotypeData()) {
builder.append(FIELD_SEPERATOR + this.getGenotypeFormatString());
for (VCFGenotypeRecord rec : this.getVCFGenotypeRecords()) {
builder.append(FIELD_SEPERATOR);
addGenotypeData(builder, header);
}
return builder.toString();
}
/**
* add the genotype data
*
* @param builder the string builder
* @param header the header object
*/
private void addGenotypeData(StringBuilder builder, VCFHeader header) {
builder.append(FIELD_SEPERATOR + this.getGenotypeFormatString());
if (header.getGenotypeSamples().size() < getVCFGenotypeRecords().size())
throw new RuntimeException("We have more genotype samples than the header specified");
Map<String, VCFGenotypeRecord> gMap = genotypeListToMap(getVCFGenotypeRecords());
for (String genotype : header.getGenotypeSamples()) {
builder.append(FIELD_SEPERATOR);
if (gMap.containsKey(genotype)) {
VCFGenotypeRecord rec = gMap.get(genotype);
if (!rec.toGenotypeString(this.mAlts).equals(""))
builder.append(rec.toGenotypeString(this.mAlts));
for (String s : rec.getFields().keySet()) {
@ -345,9 +366,14 @@ public class VCFRecord {
builder.append(":");
builder.append(rec.getFields().get(s));
}
gMap.remove(genotype);
} else {
builder.append(".");
}
}
return builder.toString();
if (gMap.size() != 0) {
throw new RuntimeException("We failed to use all the genotype samples; their must be an incosistancy between the header and records");
}
}
/**
@ -370,4 +396,19 @@ public class VCFRecord {
return true;
}
/**
* create a genotype mapping from a list and their sample names
*
* @param list a list of genotype samples
*
* @return a mapping of the sample name to VCF genotype record
*/
private static Map<String, VCFGenotypeRecord> genotypeListToMap(List<VCFGenotypeRecord> list) {
Map<String, VCFGenotypeRecord> map = new HashMap<String, VCFGenotypeRecord>();
for (VCFGenotypeRecord rec : list) {
map.put(rec.getSampleName(), rec);
}
return map;
}
}

View File

@ -1,10 +1,7 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.StingException;
import java.io.*;
import java.nio.charset.Charset;
/**
* this class writers VCF files
@ -75,11 +72,7 @@ public class VCFWriter {
* @param record the record to output
*/
public void addRecord(VCFRecord record) {
if (record.getColumnCount() != mHeader.getGenotypeSamples().size() + mHeader.getHeaderFields().size()) {
throw new RuntimeException("Record has " + record.getColumnCount() +
" columns, when is should have " + mHeader.getColumnCount());
}
String vcfString = record.toString();
String vcfString = record.toStringRepresentation(mHeader);
try {
mWriter.write(vcfString + "\n");
} catch (IOException e) {

View File

@ -33,7 +33,7 @@ public class RodVCFTest extends BaseTest {
private static IndexedFastaSequenceFile seq;
private static File vcfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample.vcf");
private VCFHeader mHeader;
@BeforeClass
public static void beforeTests() {
try {
@ -47,13 +47,13 @@ public class RodVCFTest extends BaseTest {
private RodVCF getVCFObject() {
RodVCF vcf = new RodVCF("VCF");
VCFHeader header = null;
mHeader = null;
try {
header = (VCFHeader) vcf.initialize(vcfFile);
mHeader = (VCFHeader) vcf.initialize(vcfFile);
} catch (FileNotFoundException e) {
fail("Unable to open VCF file");
}
header.checkVCFVersion();
mHeader.checkVCFVersion();
return vcf;
}
@ -91,7 +91,7 @@ public class RodVCFTest extends BaseTest {
@Test
public void testToString() {
// slightly altered line, due to map ordering
String firstLine = "20\t14370\trs6054257\tG\tA\t29\t0\tDP=258;AF=0.786;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5\n";
String firstLine = "20\t14370\trs6054257\tG\tA\t29.00\t0\tDP=258;AF=0.786;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5\n";
RodVCF vcf = getVCFObject();
VCFReader reader = new VCFReader(vcfFile);
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
@ -99,13 +99,13 @@ public class RodVCFTest extends BaseTest {
while (iter.hasNext()) {
VCFRecord rec1 = reader.next();
VCFRecord rec2 = iter.next().mCurrentRecord;
if (!rec1.toString().equals(rec2.toString())) {
if (!rec1.toStringRepresentation(mHeader).equals(rec2.toStringRepresentation(mHeader))) {
fail("VCF record rec1.toString() != rec2.toString()");
}
// verify the first line too
if (first) {
if (!firstLine.equals(rec1.toString() + "\n")) {
fail("VCF record rec1.toString() != expected string :\n" + rec1.toString() + firstLine);
if (!firstLine.equals(rec1.toStringRepresentation(mHeader) + "\n")) {
fail("VCF record rec1.toString() != expected string :\n" + rec1.toStringRepresentation(mHeader) + firstLine);
}
first = false;
}

View File

@ -8,63 +8,63 @@ import java.util.Arrays;
public class VariantFiltrationIntegrationTest extends WalkerTest {
@Test
public void testIntervals() {
String[] md5DoC = {"b222d15b300f989dd2a86ff1f500f64b", "21c8e1f9dc65fdfb39347547f9b04011"};
String[] md5DoC = {"c0a7e2fc07d565e633b3064f9f3cdaf5", "21c8e1f9dc65fdfb39347547f9b04011"};
WalkerTestSpec spec1 = new WalkerTestSpec(
"-T VariantFiltration -X DepthOfCoverage:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2,
Arrays.asList(md5DoC));
executeTest("testDoCFilter", spec1);
String[] md5AlleleBalance = {"9a59d33b55e5bad0228f2d2d67d4c17d", "a13e4ce6260bf9f33ca99dc808b8e6ad"};
String[] md5AlleleBalance = {"aa0f7800cfd346236620ae0eac220817", "a13e4ce6260bf9f33ca99dc808b8e6ad"};
WalkerTestSpec spec2 = new WalkerTestSpec(
"-T VariantFiltration -X AlleleBalance:low=0.25,high=0.75 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2,
Arrays.asList(md5AlleleBalance));
executeTest("testAlleleBalanceFilter", spec2);
String[] md5Strand = {"b0a6fb821be2f7b26f8f6d77cbd758a9", "0f7db0aad764268ee8fa3b857df8d87d"};
String[] md5Strand = {"9f430f251dbeb58a2f80a1306a5dd492", "0f7db0aad764268ee8fa3b857df8d87d"};
WalkerTestSpec spec3 = new WalkerTestSpec(
"-T VariantFiltration -X FisherStrand:pvalue=0.0001 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2,
Arrays.asList(md5Strand));
executeTest("testStrandFilter", spec3);
String[] md5Lod = {"60624843c4c8ae561acc444df565da99", "7e0c4f2b0fda85fd2891eee76c396a55"};
String[] md5Lod = {"56177258c0b3944c043f86faee4b42ae", "7e0c4f2b0fda85fd2891eee76c396a55"};
WalkerTestSpec spec4 = new WalkerTestSpec(
"-T VariantFiltration -X LodThreshold:lod=10 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2,
Arrays.asList(md5Lod));
executeTest("testLodFilter", spec4);
String[] md5MQ0 = {"5e3d4d6b13e79a5df5171d3e5a9f1bd7", "3203de335621851bccf596242b079e23"};
String[] md5MQ0 = {"0e303c32f5c1503f4c875771f28fc46c", "3203de335621851bccf596242b079e23"};
WalkerTestSpec spec5 = new WalkerTestSpec(
"-T VariantFiltration -X MappingQualityZero:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2,
Arrays.asList(md5MQ0));
executeTest("testMappingQuality0Filter", spec5);
String[] md5MQ = {"fdbac9cf332dd45d9c92146157ace65f", "ecc777feedea61f7b570d114c2ab89b1"};
String[] md5MQ = {"946462a6199e9453784e0942e18e6830", "ecc777feedea61f7b570d114c2ab89b1"};
WalkerTestSpec spec6 = new WalkerTestSpec(
"-T VariantFiltration -X MappingQuality:min=20 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2,
Arrays.asList(md5MQ));
executeTest("testRMSMappingQualityFilter", spec6);
String[] md5OnOff = {"57c5a92bde03adbff9c6ca6eada033c4", "67f2e1bc025833b0fa31f47195198997"};
String[] md5OnOff = {"2ff84e104ce73e347e55d272170b4d03", "67f2e1bc025833b0fa31f47195198997"};
WalkerTestSpec spec7 = new WalkerTestSpec(
"-T VariantFiltration -X OnOffGenotypeRatio:threshold=0.9 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2,
Arrays.asList(md5OnOff));
executeTest("testOnOffGenotypeFilter", spec7);
String[] md5Clusters = {"44223fa50dac2d9c1096558689cb8493", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"};
String[] md5Clusters = {"e6a1c088678b1c31ff340ebd622b476e", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"};
WalkerTestSpec spec8 = new WalkerTestSpec(
"-T VariantFiltration -X ClusteredSnps:window=10,snps=3 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2,
Arrays.asList(md5Clusters));
executeTest("testClusteredSnpsFilter", spec8);
String[] md5Indels = {"0f03727ac9e6fc43311377b29d12596c", "8e0e915a1cb63d7049e0671ed00101fe"};
String[] md5Indels = {"82e555b76c12474154f8e5e402516d73", "8e0e915a1cb63d7049e0671ed00101fe"};
WalkerTestSpec spec9 = new WalkerTestSpec(
"-T VariantFiltration -X IndelArtifact -B indels,PointIndel,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.indels -B cleaned,CleanedOutSNP,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.realigner_badsnps -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2,

View File

@ -21,7 +21,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testVariantsToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>();
md5.add("d1882fd8ecee6a95f561ed3be4d4a435");
md5.add("0b96a8046d2a06bd87f57df8bac1678d");
/**
* the above MD5 was calculated from running the following command:
@ -50,7 +50,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testGenotypesToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>();
md5.add("debeaf31846328eddc0abf226fc72ac0");
md5.add("09660faa7cfad8af36602f79461c0605");
/**
* the above MD5 was calculated from running the following command: