changes to VCF output, and updated MD5's in the integration tests

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1836 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-10-14 18:42:48 +00:00
parent 89771fef05
commit eb90e5c4d7
8 changed files with 240 additions and 147 deletions

View File

@ -59,7 +59,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
@Override @Override
public String toString() { public String toString() {
if (this.mCurrentRecord != null) if (this.mCurrentRecord != null)
return this.mCurrentRecord.toString(); return this.mCurrentRecord.toStringRepresentation(mReader.getHeader());
else else
return ""; return "";
} }

View File

@ -1,7 +1,5 @@
package org.broadinstitute.sting.utils.genotype.vcf; package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.*;
import java.io.File; import java.io.File;
@ -119,51 +117,35 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
lazyInitialize(genotypes, mFile, mStream); lazyInitialize(genotypes, mFile, mStream);
VCFParamters params = new VCFParamters(); VCFParameters params = new VCFParameters();
params.addFormatItem("GT"); params.addFormatItem("GT");
for (Genotype gtype : genotypes) { for (Genotype gtype : genotypes) {
// setup the parameters // setup the parameters
params.setLocations(gtype.getLocation(), gtype.getReference()); params.setLocations(gtype.getLocation(), gtype.getReference());
Map<String, String> map = new HashMap<String, String>(); VCFGenotypeRecord record = createVCFGenotypeRecord(params, gtype);
if (!(gtype instanceof SampleBacked)) {
throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface");
}
// calculate the RMS mapping qualities and the read depth
if (gtype instanceof ReadBacked) {
int readDepth = ((ReadBacked) gtype).getReadCount();
map.put("RD", String.valueOf(readDepth));
params.addFormatItem("RD");
}
double qual = gtype.getNegLog10PError();
map.put("GQ", String.format("%.2f", qual));
params.addFormatItem("GQ");
List<String> alleles = new ArrayList<String>();
for (char allele : gtype.getBases().toCharArray()) {
alleles.add(String.valueOf(allele));
params.addAlternateBase(allele);
}
// TODO -- use the GenotypeMetaData object if it's not null
VCFGenotypeRecord record = new VCFGenotypeRecord(((SampleBacked) gtype).getSampleName(),
alleles,
VCFGenotypeRecord.PHASE.UNPHASED,
map);
params.addGenotypeRecord(record); params.addGenotypeRecord(record);
} }
Map<String, String> infoFields = new HashMap<String, String>(); Map<String, String> infoFields = getInfoFields(metadata, params);
double qual = (metadata == null) ? 0 : (metadata.getLOD()) * 10;
/**
* TODO: Eric fix the next line when our LOD scores are 0->Inf based instead
* of -3 to Inf based.
*/
if (qual < 0.0) {
qual = 0.0;
}
VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(), VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(),
params.getContig(), params.getContig(),
params.getPosition(), params.getPosition(),
".", ".",
params.getAlternateBases(), params.getAlternateBases(),
0, /* BETTER VALUE HERE */ qual,
".", ".",
infoFields, infoFields,
params.getFormatString(), params.getFormatString(),
@ -172,85 +154,79 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
mWriter.addRecord(vcfRecord); mWriter.addRecord(vcfRecord);
} }
/**
* get the information fields of the VCF record, given the meta data and parameters
*
* @param metadata the metadata associated with this multi sample call
* @param params the parameters
*
* @return a mapping of info field to value
*/
private Map<String, String> getInfoFields(GenotypeMetaData metadata, VCFParameters params) {
Map<String, String> infoFields = new HashMap<String, String>();
if (metadata != null) {
infoFields.put("SB", String.format("%.2f", metadata.getSLOD()));
infoFields.put("AF", String.format("%.2f", metadata.getAlleleFrequency()));
}
infoFields.put("NS", String.valueOf(params.getGenotypesRecords().size()));
return infoFields;
}
/**
* create the VCF genotype record
*
* @param params the VCF parameters object
* @param gtype the genotype
*
* @return a VCFGenotypeRecord
*/
private VCFGenotypeRecord createVCFGenotypeRecord(VCFParameters params, Genotype gtype) {
Map<String, String> map = new HashMap<String, String>();
if (!(gtype instanceof SampleBacked)) {
throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface");
}
// calculate the RMS mapping qualities and the read depth
if (gtype instanceof ReadBacked) {
int readDepth = ((ReadBacked) gtype).getReadCount();
map.put("RD", String.valueOf(readDepth));
params.addFormatItem("RD");
}
double qual = gtype.getNegLog10PError();
map.put("GQ", String.format("%.2f", qual));
params.addFormatItem("GQ");
List<String> alleles = createAlleleArray(gtype);
for (String allele : alleles) {
params.addAlternateBase(allele);
}
VCFGenotypeRecord record = new VCFGenotypeRecord(((SampleBacked) gtype).getSampleName(),
alleles,
VCFGenotypeRecord.PHASE.UNPHASED,
map);
return record;
}
/**
* create the allele array?
*
* @param gtype the gentoype object
*
* @return a list of string representing the string array of alleles
*/
private List<String> createAlleleArray(Genotype gtype) {
List<String> alleles = new ArrayList<String>();
for (char allele : gtype.getBases().toCharArray()) {
alleles.add(String.valueOf(allele));
}
return alleles;
}
/** @return true if we support multisample, false otherwise */ /** @return true if we support multisample, false otherwise */
@Override @Override
public boolean supportsMultiSample() { public boolean supportsMultiSample() {
return true; return true;
} }
/**
* a helper class, which performs a lot of the safety checks on the parameters
* we feed to the VCF (like ensuring the same position for each genotype in a call).
*/
class VCFParamters {
private char referenceBase = '0';
private int position = 0;
private String contig = null;
private boolean initialized = false;
private List<VCFGenotypeRecord> genotypesRecord = new ArrayList<VCFGenotypeRecord>();
private List<String> formatList = new ArrayList<String>();
private List<String> alternateBases = new ArrayList<String>();
public void setLocations(GenomeLoc location, char refBase) {
// if we haven't set it up, we initialize the object
if (!initialized) {
initialized = true;
this.contig = location.getContig();
this.position = (int)location.getStart();
if (location.getStart() != location.getStop()) {
throw new IllegalArgumentException("The start and stop locations must be the same");
}
this.referenceBase = refBase;
} else {
if (!contig.equals(this.contig))
throw new IllegalArgumentException("The contig name has to be the same at a single locus");
if (position != this.position)
throw new IllegalArgumentException("The position has to be the same at a single locus");
if (refBase != this.referenceBase)
throw new IllegalArgumentException("The reference base name has to be the same at a single locus");
}
}
/** @return get the position */
public int getPosition() {
return position;
}
/** @return get the contig name */
public String getContig() {
return contig;
}
/** @return get the reference base */
public char getReferenceBase() {
return referenceBase;
}
public void addGenotypeRecord(VCFGenotypeRecord record) {
this.genotypesRecord.add(record);
}
public void addFormatItem(String item) {
if (!formatList.contains(item))
formatList.add(item);
}
public void addAlternateBase(char base) {
if (!alternateBases.contains(String.valueOf(base)) && base != this.getReferenceBase())
alternateBases.add(String.valueOf(base));
}
public List<String> getAlternateBases() {
return alternateBases;
}
public String getFormatString() {
return Utils.join(";", formatList);
}
public List<VCFGenotypeRecord> getGenotypesRecords() {
return genotypesRecord;
}
}
} }

View File

@ -0,0 +1,83 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import java.util.List;
import java.util.ArrayList;
/**
* a helper class, which performs a lot of the safety checks on the parameters
* we feed to the VCF (like ensuring the same position for each genotype in a call).
*/
class VCFParameters {
private char referenceBase = '0';
private int position = 0;
private String contig = null;
private boolean initialized = false;
private List<VCFGenotypeRecord> genotypesRecord = new ArrayList<VCFGenotypeRecord>();
private List<String> formatList = new ArrayList<String>();
private List<String> alternateBases = new ArrayList<String>();
public void setLocations(GenomeLoc location, char refBase) {
// if we haven't set it up, we initialize the object
if (!initialized) {
initialized = true;
this.contig = location.getContig();
this.position = (int) location.getStart();
if (location.getStart() != location.getStop()) {
throw new IllegalArgumentException("The start and stop locations must be the same");
}
this.referenceBase = refBase;
} else {
if (!contig.equals(this.contig))
throw new IllegalArgumentException("The contig name has to be the same at a single locus");
if (position != this.position)
throw new IllegalArgumentException("The position has to be the same at a single locus");
if (refBase != this.referenceBase)
throw new IllegalArgumentException("The reference base name has to be the same at a single locus");
}
}
/** @return get the position */
public int getPosition() {
return position;
}
/** @return get the contig name */
public String getContig() {
return contig;
}
/** @return get the reference base */
public char getReferenceBase() {
return referenceBase;
}
public void addGenotypeRecord(VCFGenotypeRecord record) {
this.genotypesRecord.add(record);
}
public void addFormatItem(String item) {
if (!formatList.contains(item))
formatList.add(item);
}
public void addAlternateBase(String base) {
if (!alternateBases.contains(String.valueOf(base)) && base != String.valueOf(this.getReferenceBase()))
alternateBases.add(base);
}
public List<String> getAlternateBases() {
return alternateBases;
}
public String getFormatString() {
return Utils.join(";", formatList);
}
public List<VCFGenotypeRecord> getGenotypesRecords() {
return genotypesRecord;
}
}

View File

@ -19,7 +19,7 @@ public class VCFRecord {
// the alternate bases // the alternate bases
private final List<String> mAlts = new ArrayList<String>(); private final List<String> mAlts = new ArrayList<String>();
// our qual value // our qual value
private int mQual; private double mQual;
// our filter string // our filter string
private String mFilterString; private String mFilterString;
// our info fields // our info fields
@ -61,7 +61,7 @@ public class VCFRecord {
int position, int position,
String ID, String ID,
List<String> altBases, List<String> altBases,
int qual, double qual,
String filters, String filters,
Map<String, String> infoFields, Map<String, String> infoFields,
String genotypeFormatString, String genotypeFormatString,
@ -117,7 +117,7 @@ public class VCFRecord {
addAlternateBase(alt); addAlternateBase(alt);
break; break;
case QUAL: case QUAL:
this.setQual(Integer.valueOf(columnValues.get(val))); this.setQual(Double.valueOf(columnValues.get(val)));
break; break;
case FILTER: case FILTER:
this.setFilterString(columnValues.get(val)); this.setFilterString(columnValues.get(val));
@ -191,7 +191,7 @@ public class VCFRecord {
} }
/** @return the phred-scaled quality score */ /** @return the phred-scaled quality score */
public int getQual() { public double getQual() {
return this.mQual; return this.mQual;
} }
@ -273,7 +273,7 @@ public class VCFRecord {
this.mID = mID; this.mID = mID;
} }
public void setQual(int mQual) { public void setQual(double mQual) {
if (mQual < 0) if (mQual < 0)
throw new IllegalArgumentException("Qual values must be greater than 0"); throw new IllegalArgumentException("Qual values must be greater than 0");
this.mQual = mQual; this.mQual = mQual;
@ -307,12 +307,12 @@ public class VCFRecord {
/** /**
* the generation of a string representation, which is used by the VCF writer * the generation of a string representation, which is used by the VCF writer
*
* @return a string * @return a string
*/ */
public String toString() { public String toStringRepresentation(VCFHeader header) {
StringBuilder builder = new StringBuilder(); StringBuilder builder = new StringBuilder();
// else builder.append(FIELD_SEPERATOR + record.getValue(field));
// CHROM \t POS \t ID \t REF \t ALT \t QUAL \t FILTER \t INFO // CHROM \t POS \t ID \t REF \t ALT \t QUAL \t FILTER \t INFO
builder.append(getChromosome() + FIELD_SEPERATOR); builder.append(getChromosome() + FIELD_SEPERATOR);
builder.append(getPosition() + FIELD_SEPERATOR); builder.append(getPosition() + FIELD_SEPERATOR);
@ -321,7 +321,7 @@ public class VCFRecord {
String alts = ""; String alts = "";
for (String str : this.getAlternateAlleles()) alts += str + ","; for (String str : this.getAlternateAlleles()) alts += str + ",";
builder.append((alts.length() > 0) ? alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR : "." + FIELD_SEPERATOR); builder.append((alts.length() > 0) ? alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR : "." + FIELD_SEPERATOR);
builder.append(getQual() + FIELD_SEPERATOR); builder.append(String.format("%.2f",getQual()) + FIELD_SEPERATOR);
builder.append(Utils.join(";", getFilteringCodes()) + FIELD_SEPERATOR); builder.append(Utils.join(";", getFilteringCodes()) + FIELD_SEPERATOR);
String info = ""; String info = "";
for (String str : this.getInfoValues().keySet()) { for (String str : this.getInfoValues().keySet()) {
@ -335,9 +335,30 @@ public class VCFRecord {
else builder.append(info); else builder.append(info);
if (this.hasGenotypeData()) { if (this.hasGenotypeData()) {
builder.append(FIELD_SEPERATOR + this.getGenotypeFormatString()); addGenotypeData(builder, header);
for (VCFGenotypeRecord rec : this.getVCFGenotypeRecords()) { }
builder.append(FIELD_SEPERATOR); return builder.toString();
}
/**
* add the genotype data
*
* @param builder the string builder
* @param header the header object
*/
private void addGenotypeData(StringBuilder builder, VCFHeader header) {
builder.append(FIELD_SEPERATOR + this.getGenotypeFormatString());
if (header.getGenotypeSamples().size() < getVCFGenotypeRecords().size())
throw new RuntimeException("We have more genotype samples than the header specified");
Map<String, VCFGenotypeRecord> gMap = genotypeListToMap(getVCFGenotypeRecords());
for (String genotype : header.getGenotypeSamples()) {
builder.append(FIELD_SEPERATOR);
if (gMap.containsKey(genotype)) {
VCFGenotypeRecord rec = gMap.get(genotype);
if (!rec.toGenotypeString(this.mAlts).equals("")) if (!rec.toGenotypeString(this.mAlts).equals(""))
builder.append(rec.toGenotypeString(this.mAlts)); builder.append(rec.toGenotypeString(this.mAlts));
for (String s : rec.getFields().keySet()) { for (String s : rec.getFields().keySet()) {
@ -345,9 +366,14 @@ public class VCFRecord {
builder.append(":"); builder.append(":");
builder.append(rec.getFields().get(s)); builder.append(rec.getFields().get(s));
} }
gMap.remove(genotype);
} else {
builder.append(".");
} }
} }
return builder.toString(); if (gMap.size() != 0) {
throw new RuntimeException("We failed to use all the genotype samples; their must be an incosistancy between the header and records");
}
} }
/** /**
@ -370,4 +396,19 @@ public class VCFRecord {
return true; return true;
} }
/**
* create a genotype mapping from a list and their sample names
*
* @param list a list of genotype samples
*
* @return a mapping of the sample name to VCF genotype record
*/
private static Map<String, VCFGenotypeRecord> genotypeListToMap(List<VCFGenotypeRecord> list) {
Map<String, VCFGenotypeRecord> map = new HashMap<String, VCFGenotypeRecord>();
for (VCFGenotypeRecord rec : list) {
map.put(rec.getSampleName(), rec);
}
return map;
}
} }

View File

@ -1,10 +1,7 @@
package org.broadinstitute.sting.utils.genotype.vcf; package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.StingException;
import java.io.*; import java.io.*;
import java.nio.charset.Charset;
/** /**
* this class writers VCF files * this class writers VCF files
@ -75,11 +72,7 @@ public class VCFWriter {
* @param record the record to output * @param record the record to output
*/ */
public void addRecord(VCFRecord record) { public void addRecord(VCFRecord record) {
if (record.getColumnCount() != mHeader.getGenotypeSamples().size() + mHeader.getHeaderFields().size()) { String vcfString = record.toStringRepresentation(mHeader);
throw new RuntimeException("Record has " + record.getColumnCount() +
" columns, when is should have " + mHeader.getColumnCount());
}
String vcfString = record.toString();
try { try {
mWriter.write(vcfString + "\n"); mWriter.write(vcfString + "\n");
} catch (IOException e) { } catch (IOException e) {

View File

@ -33,7 +33,7 @@ public class RodVCFTest extends BaseTest {
private static IndexedFastaSequenceFile seq; private static IndexedFastaSequenceFile seq;
private static File vcfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample.vcf"); private static File vcfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample.vcf");
private VCFHeader mHeader;
@BeforeClass @BeforeClass
public static void beforeTests() { public static void beforeTests() {
try { try {
@ -47,13 +47,13 @@ public class RodVCFTest extends BaseTest {
private RodVCF getVCFObject() { private RodVCF getVCFObject() {
RodVCF vcf = new RodVCF("VCF"); RodVCF vcf = new RodVCF("VCF");
VCFHeader header = null; mHeader = null;
try { try {
header = (VCFHeader) vcf.initialize(vcfFile); mHeader = (VCFHeader) vcf.initialize(vcfFile);
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
fail("Unable to open VCF file"); fail("Unable to open VCF file");
} }
header.checkVCFVersion(); mHeader.checkVCFVersion();
return vcf; return vcf;
} }
@ -91,7 +91,7 @@ public class RodVCFTest extends BaseTest {
@Test @Test
public void testToString() { public void testToString() {
// slightly altered line, due to map ordering // slightly altered line, due to map ordering
String firstLine = "20\t14370\trs6054257\tG\tA\t29\t0\tDP=258;AF=0.786;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5\n"; String firstLine = "20\t14370\trs6054257\tG\tA\t29.00\t0\tDP=258;AF=0.786;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5\n";
RodVCF vcf = getVCFObject(); RodVCF vcf = getVCFObject();
VCFReader reader = new VCFReader(vcfFile); VCFReader reader = new VCFReader(vcfFile);
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile); Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
@ -99,13 +99,13 @@ public class RodVCFTest extends BaseTest {
while (iter.hasNext()) { while (iter.hasNext()) {
VCFRecord rec1 = reader.next(); VCFRecord rec1 = reader.next();
VCFRecord rec2 = iter.next().mCurrentRecord; VCFRecord rec2 = iter.next().mCurrentRecord;
if (!rec1.toString().equals(rec2.toString())) { if (!rec1.toStringRepresentation(mHeader).equals(rec2.toStringRepresentation(mHeader))) {
fail("VCF record rec1.toString() != rec2.toString()"); fail("VCF record rec1.toString() != rec2.toString()");
} }
// verify the first line too // verify the first line too
if (first) { if (first) {
if (!firstLine.equals(rec1.toString() + "\n")) { if (!firstLine.equals(rec1.toStringRepresentation(mHeader) + "\n")) {
fail("VCF record rec1.toString() != expected string :\n" + rec1.toString() + firstLine); fail("VCF record rec1.toString() != expected string :\n" + rec1.toStringRepresentation(mHeader) + firstLine);
} }
first = false; first = false;
} }

View File

@ -8,63 +8,63 @@ import java.util.Arrays;
public class VariantFiltrationIntegrationTest extends WalkerTest { public class VariantFiltrationIntegrationTest extends WalkerTest {
@Test @Test
public void testIntervals() { public void testIntervals() {
String[] md5DoC = {"b222d15b300f989dd2a86ff1f500f64b", "21c8e1f9dc65fdfb39347547f9b04011"}; String[] md5DoC = {"c0a7e2fc07d565e633b3064f9f3cdaf5", "21c8e1f9dc65fdfb39347547f9b04011"};
WalkerTestSpec spec1 = new WalkerTestSpec( WalkerTestSpec spec1 = new WalkerTestSpec(
"-T VariantFiltration -X DepthOfCoverage:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", "-T VariantFiltration -X DepthOfCoverage:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2, 2,
Arrays.asList(md5DoC)); Arrays.asList(md5DoC));
executeTest("testDoCFilter", spec1); executeTest("testDoCFilter", spec1);
String[] md5AlleleBalance = {"9a59d33b55e5bad0228f2d2d67d4c17d", "a13e4ce6260bf9f33ca99dc808b8e6ad"}; String[] md5AlleleBalance = {"aa0f7800cfd346236620ae0eac220817", "a13e4ce6260bf9f33ca99dc808b8e6ad"};
WalkerTestSpec spec2 = new WalkerTestSpec( WalkerTestSpec spec2 = new WalkerTestSpec(
"-T VariantFiltration -X AlleleBalance:low=0.25,high=0.75 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", "-T VariantFiltration -X AlleleBalance:low=0.25,high=0.75 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2, 2,
Arrays.asList(md5AlleleBalance)); Arrays.asList(md5AlleleBalance));
executeTest("testAlleleBalanceFilter", spec2); executeTest("testAlleleBalanceFilter", spec2);
String[] md5Strand = {"b0a6fb821be2f7b26f8f6d77cbd758a9", "0f7db0aad764268ee8fa3b857df8d87d"}; String[] md5Strand = {"9f430f251dbeb58a2f80a1306a5dd492", "0f7db0aad764268ee8fa3b857df8d87d"};
WalkerTestSpec spec3 = new WalkerTestSpec( WalkerTestSpec spec3 = new WalkerTestSpec(
"-T VariantFiltration -X FisherStrand:pvalue=0.0001 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", "-T VariantFiltration -X FisherStrand:pvalue=0.0001 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2, 2,
Arrays.asList(md5Strand)); Arrays.asList(md5Strand));
executeTest("testStrandFilter", spec3); executeTest("testStrandFilter", spec3);
String[] md5Lod = {"60624843c4c8ae561acc444df565da99", "7e0c4f2b0fda85fd2891eee76c396a55"}; String[] md5Lod = {"56177258c0b3944c043f86faee4b42ae", "7e0c4f2b0fda85fd2891eee76c396a55"};
WalkerTestSpec spec4 = new WalkerTestSpec( WalkerTestSpec spec4 = new WalkerTestSpec(
"-T VariantFiltration -X LodThreshold:lod=10 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", "-T VariantFiltration -X LodThreshold:lod=10 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2, 2,
Arrays.asList(md5Lod)); Arrays.asList(md5Lod));
executeTest("testLodFilter", spec4); executeTest("testLodFilter", spec4);
String[] md5MQ0 = {"5e3d4d6b13e79a5df5171d3e5a9f1bd7", "3203de335621851bccf596242b079e23"}; String[] md5MQ0 = {"0e303c32f5c1503f4c875771f28fc46c", "3203de335621851bccf596242b079e23"};
WalkerTestSpec spec5 = new WalkerTestSpec( WalkerTestSpec spec5 = new WalkerTestSpec(
"-T VariantFiltration -X MappingQualityZero:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", "-T VariantFiltration -X MappingQualityZero:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2, 2,
Arrays.asList(md5MQ0)); Arrays.asList(md5MQ0));
executeTest("testMappingQuality0Filter", spec5); executeTest("testMappingQuality0Filter", spec5);
String[] md5MQ = {"fdbac9cf332dd45d9c92146157ace65f", "ecc777feedea61f7b570d114c2ab89b1"}; String[] md5MQ = {"946462a6199e9453784e0942e18e6830", "ecc777feedea61f7b570d114c2ab89b1"};
WalkerTestSpec spec6 = new WalkerTestSpec( WalkerTestSpec spec6 = new WalkerTestSpec(
"-T VariantFiltration -X MappingQuality:min=20 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", "-T VariantFiltration -X MappingQuality:min=20 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2, 2,
Arrays.asList(md5MQ)); Arrays.asList(md5MQ));
executeTest("testRMSMappingQualityFilter", spec6); executeTest("testRMSMappingQualityFilter", spec6);
String[] md5OnOff = {"57c5a92bde03adbff9c6ca6eada033c4", "67f2e1bc025833b0fa31f47195198997"}; String[] md5OnOff = {"2ff84e104ce73e347e55d272170b4d03", "67f2e1bc025833b0fa31f47195198997"};
WalkerTestSpec spec7 = new WalkerTestSpec( WalkerTestSpec spec7 = new WalkerTestSpec(
"-T VariantFiltration -X OnOffGenotypeRatio:threshold=0.9 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", "-T VariantFiltration -X OnOffGenotypeRatio:threshold=0.9 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2, 2,
Arrays.asList(md5OnOff)); Arrays.asList(md5OnOff));
executeTest("testOnOffGenotypeFilter", spec7); executeTest("testOnOffGenotypeFilter", spec7);
String[] md5Clusters = {"44223fa50dac2d9c1096558689cb8493", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"}; String[] md5Clusters = {"e6a1c088678b1c31ff340ebd622b476e", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"};
WalkerTestSpec spec8 = new WalkerTestSpec( WalkerTestSpec spec8 = new WalkerTestSpec(
"-T VariantFiltration -X ClusteredSnps:window=10,snps=3 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", "-T VariantFiltration -X ClusteredSnps:window=10,snps=3 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2, 2,
Arrays.asList(md5Clusters)); Arrays.asList(md5Clusters));
executeTest("testClusteredSnpsFilter", spec8); executeTest("testClusteredSnpsFilter", spec8);
String[] md5Indels = {"0f03727ac9e6fc43311377b29d12596c", "8e0e915a1cb63d7049e0671ed00101fe"}; String[] md5Indels = {"82e555b76c12474154f8e5e402516d73", "8e0e915a1cb63d7049e0671ed00101fe"};
WalkerTestSpec spec9 = new WalkerTestSpec( WalkerTestSpec spec9 = new WalkerTestSpec(
"-T VariantFiltration -X IndelArtifact -B indels,PointIndel,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.indels -B cleaned,CleanedOutSNP,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.realigner_badsnps -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878", "-T VariantFiltration -X IndelArtifact -B indels,PointIndel,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.indels -B cleaned,CleanedOutSNP,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.realigner_badsnps -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
2, 2,

View File

@ -21,7 +21,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test @Test
public void testVariantsToVCFUsingGeliInput() { public void testVariantsToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>(); List<String> md5 = new ArrayList<String>();
md5.add("d1882fd8ecee6a95f561ed3be4d4a435"); md5.add("0b96a8046d2a06bd87f57df8bac1678d");
/** /**
* the above MD5 was calculated from running the following command: * the above MD5 was calculated from running the following command:
@ -50,7 +50,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test @Test
public void testGenotypesToVCFUsingGeliInput() { public void testGenotypesToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>(); List<String> md5 = new ArrayList<String>();
md5.add("debeaf31846328eddc0abf226fc72ac0"); md5.add("09660faa7cfad8af36602f79461c0605");
/** /**
* the above MD5 was calculated from running the following command: * the above MD5 was calculated from running the following command: