changes to VCF output, and updated MD5's in the integration tests
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1836 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
89771fef05
commit
eb90e5c4d7
|
|
@ -59,7 +59,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
|||
@Override
|
||||
public String toString() {
|
||||
if (this.mCurrentRecord != null)
|
||||
return this.mCurrentRecord.toString();
|
||||
return this.mCurrentRecord.toStringRepresentation(mReader.getHeader());
|
||||
else
|
||||
return "";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -119,51 +117,35 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
lazyInitialize(genotypes, mFile, mStream);
|
||||
|
||||
|
||||
VCFParamters params = new VCFParamters();
|
||||
VCFParameters params = new VCFParameters();
|
||||
params.addFormatItem("GT");
|
||||
|
||||
for (Genotype gtype : genotypes) {
|
||||
// setup the parameters
|
||||
params.setLocations(gtype.getLocation(), gtype.getReference());
|
||||
|
||||
Map<String, String> map = new HashMap<String, String>();
|
||||
if (!(gtype instanceof SampleBacked)) {
|
||||
throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface");
|
||||
}
|
||||
|
||||
// calculate the RMS mapping qualities and the read depth
|
||||
if (gtype instanceof ReadBacked) {
|
||||
int readDepth = ((ReadBacked) gtype).getReadCount();
|
||||
map.put("RD", String.valueOf(readDepth));
|
||||
params.addFormatItem("RD");
|
||||
}
|
||||
double qual = gtype.getNegLog10PError();
|
||||
map.put("GQ", String.format("%.2f", qual));
|
||||
params.addFormatItem("GQ");
|
||||
|
||||
List<String> alleles = new ArrayList<String>();
|
||||
for (char allele : gtype.getBases().toCharArray()) {
|
||||
alleles.add(String.valueOf(allele));
|
||||
params.addAlternateBase(allele);
|
||||
}
|
||||
|
||||
// TODO -- use the GenotypeMetaData object if it's not null
|
||||
|
||||
VCFGenotypeRecord record = new VCFGenotypeRecord(((SampleBacked) gtype).getSampleName(),
|
||||
alleles,
|
||||
VCFGenotypeRecord.PHASE.UNPHASED,
|
||||
map);
|
||||
VCFGenotypeRecord record = createVCFGenotypeRecord(params, gtype);
|
||||
params.addGenotypeRecord(record);
|
||||
}
|
||||
|
||||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
Map<String, String> infoFields = getInfoFields(metadata, params);
|
||||
|
||||
double qual = (metadata == null) ? 0 : (metadata.getLOD()) * 10;
|
||||
|
||||
/**
|
||||
* TODO: Eric fix the next line when our LOD scores are 0->Inf based instead
|
||||
* of -3 to Inf based.
|
||||
*/
|
||||
if (qual < 0.0) {
|
||||
qual = 0.0;
|
||||
}
|
||||
|
||||
VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(),
|
||||
params.getContig(),
|
||||
params.getPosition(),
|
||||
".",
|
||||
params.getAlternateBases(),
|
||||
0, /* BETTER VALUE HERE */
|
||||
qual,
|
||||
".",
|
||||
infoFields,
|
||||
params.getFormatString(),
|
||||
|
|
@ -172,85 +154,79 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
mWriter.addRecord(vcfRecord);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the information fields of the VCF record, given the meta data and parameters
|
||||
*
|
||||
* @param metadata the metadata associated with this multi sample call
|
||||
* @param params the parameters
|
||||
*
|
||||
* @return a mapping of info field to value
|
||||
*/
|
||||
private Map<String, String> getInfoFields(GenotypeMetaData metadata, VCFParameters params) {
|
||||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
if (metadata != null) {
|
||||
infoFields.put("SB", String.format("%.2f", metadata.getSLOD()));
|
||||
infoFields.put("AF", String.format("%.2f", metadata.getAlleleFrequency()));
|
||||
}
|
||||
infoFields.put("NS", String.valueOf(params.getGenotypesRecords().size()));
|
||||
return infoFields;
|
||||
}
|
||||
|
||||
/**
|
||||
* create the VCF genotype record
|
||||
*
|
||||
* @param params the VCF parameters object
|
||||
* @param gtype the genotype
|
||||
*
|
||||
* @return a VCFGenotypeRecord
|
||||
*/
|
||||
private VCFGenotypeRecord createVCFGenotypeRecord(VCFParameters params, Genotype gtype) {
|
||||
Map<String, String> map = new HashMap<String, String>();
|
||||
if (!(gtype instanceof SampleBacked)) {
|
||||
throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface");
|
||||
}
|
||||
|
||||
// calculate the RMS mapping qualities and the read depth
|
||||
if (gtype instanceof ReadBacked) {
|
||||
int readDepth = ((ReadBacked) gtype).getReadCount();
|
||||
map.put("RD", String.valueOf(readDepth));
|
||||
params.addFormatItem("RD");
|
||||
}
|
||||
double qual = gtype.getNegLog10PError();
|
||||
map.put("GQ", String.format("%.2f", qual));
|
||||
params.addFormatItem("GQ");
|
||||
|
||||
List<String> alleles = createAlleleArray(gtype);
|
||||
for (String allele : alleles) {
|
||||
params.addAlternateBase(allele);
|
||||
}
|
||||
|
||||
VCFGenotypeRecord record = new VCFGenotypeRecord(((SampleBacked) gtype).getSampleName(),
|
||||
alleles,
|
||||
VCFGenotypeRecord.PHASE.UNPHASED,
|
||||
map);
|
||||
return record;
|
||||
}
|
||||
|
||||
/**
|
||||
* create the allele array?
|
||||
*
|
||||
* @param gtype the gentoype object
|
||||
*
|
||||
* @return a list of string representing the string array of alleles
|
||||
*/
|
||||
private List<String> createAlleleArray(Genotype gtype) {
|
||||
List<String> alleles = new ArrayList<String>();
|
||||
for (char allele : gtype.getBases().toCharArray()) {
|
||||
alleles.add(String.valueOf(allele));
|
||||
}
|
||||
return alleles;
|
||||
}
|
||||
|
||||
/** @return true if we support multisample, false otherwise */
|
||||
@Override
|
||||
public boolean supportsMultiSample() {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* a helper class, which performs a lot of the safety checks on the parameters
|
||||
* we feed to the VCF (like ensuring the same position for each genotype in a call).
|
||||
*/
|
||||
class VCFParamters {
|
||||
private char referenceBase = '0';
|
||||
private int position = 0;
|
||||
private String contig = null;
|
||||
private boolean initialized = false;
|
||||
private List<VCFGenotypeRecord> genotypesRecord = new ArrayList<VCFGenotypeRecord>();
|
||||
private List<String> formatList = new ArrayList<String>();
|
||||
private List<String> alternateBases = new ArrayList<String>();
|
||||
|
||||
public void setLocations(GenomeLoc location, char refBase) {
|
||||
// if we haven't set it up, we initialize the object
|
||||
if (!initialized) {
|
||||
initialized = true;
|
||||
this.contig = location.getContig();
|
||||
this.position = (int)location.getStart();
|
||||
if (location.getStart() != location.getStop()) {
|
||||
throw new IllegalArgumentException("The start and stop locations must be the same");
|
||||
}
|
||||
this.referenceBase = refBase;
|
||||
} else {
|
||||
if (!contig.equals(this.contig))
|
||||
throw new IllegalArgumentException("The contig name has to be the same at a single locus");
|
||||
if (position != this.position)
|
||||
throw new IllegalArgumentException("The position has to be the same at a single locus");
|
||||
if (refBase != this.referenceBase)
|
||||
throw new IllegalArgumentException("The reference base name has to be the same at a single locus");
|
||||
}
|
||||
}
|
||||
|
||||
/** @return get the position */
|
||||
public int getPosition() {
|
||||
return position;
|
||||
}
|
||||
|
||||
/** @return get the contig name */
|
||||
public String getContig() {
|
||||
return contig;
|
||||
}
|
||||
|
||||
/** @return get the reference base */
|
||||
public char getReferenceBase() {
|
||||
return referenceBase;
|
||||
}
|
||||
|
||||
public void addGenotypeRecord(VCFGenotypeRecord record) {
|
||||
this.genotypesRecord.add(record);
|
||||
}
|
||||
|
||||
public void addFormatItem(String item) {
|
||||
if (!formatList.contains(item))
|
||||
formatList.add(item);
|
||||
}
|
||||
|
||||
public void addAlternateBase(char base) {
|
||||
if (!alternateBases.contains(String.valueOf(base)) && base != this.getReferenceBase())
|
||||
alternateBases.add(String.valueOf(base));
|
||||
}
|
||||
|
||||
public List<String> getAlternateBases() {
|
||||
return alternateBases;
|
||||
}
|
||||
|
||||
public String getFormatString() {
|
||||
return Utils.join(";", formatList);
|
||||
}
|
||||
|
||||
public List<VCFGenotypeRecord> getGenotypesRecords() {
|
||||
return genotypesRecord;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,83 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
||||
/**
|
||||
* a helper class, which performs a lot of the safety checks on the parameters
|
||||
* we feed to the VCF (like ensuring the same position for each genotype in a call).
|
||||
*/
|
||||
class VCFParameters {
|
||||
private char referenceBase = '0';
|
||||
private int position = 0;
|
||||
private String contig = null;
|
||||
private boolean initialized = false;
|
||||
private List<VCFGenotypeRecord> genotypesRecord = new ArrayList<VCFGenotypeRecord>();
|
||||
private List<String> formatList = new ArrayList<String>();
|
||||
private List<String> alternateBases = new ArrayList<String>();
|
||||
|
||||
public void setLocations(GenomeLoc location, char refBase) {
|
||||
// if we haven't set it up, we initialize the object
|
||||
if (!initialized) {
|
||||
initialized = true;
|
||||
this.contig = location.getContig();
|
||||
this.position = (int) location.getStart();
|
||||
if (location.getStart() != location.getStop()) {
|
||||
throw new IllegalArgumentException("The start and stop locations must be the same");
|
||||
}
|
||||
this.referenceBase = refBase;
|
||||
} else {
|
||||
if (!contig.equals(this.contig))
|
||||
throw new IllegalArgumentException("The contig name has to be the same at a single locus");
|
||||
if (position != this.position)
|
||||
throw new IllegalArgumentException("The position has to be the same at a single locus");
|
||||
if (refBase != this.referenceBase)
|
||||
throw new IllegalArgumentException("The reference base name has to be the same at a single locus");
|
||||
}
|
||||
}
|
||||
|
||||
/** @return get the position */
|
||||
public int getPosition() {
|
||||
return position;
|
||||
}
|
||||
|
||||
/** @return get the contig name */
|
||||
public String getContig() {
|
||||
return contig;
|
||||
}
|
||||
|
||||
/** @return get the reference base */
|
||||
public char getReferenceBase() {
|
||||
return referenceBase;
|
||||
}
|
||||
|
||||
public void addGenotypeRecord(VCFGenotypeRecord record) {
|
||||
this.genotypesRecord.add(record);
|
||||
}
|
||||
|
||||
public void addFormatItem(String item) {
|
||||
if (!formatList.contains(item))
|
||||
formatList.add(item);
|
||||
}
|
||||
|
||||
public void addAlternateBase(String base) {
|
||||
if (!alternateBases.contains(String.valueOf(base)) && base != String.valueOf(this.getReferenceBase()))
|
||||
alternateBases.add(base);
|
||||
}
|
||||
|
||||
public List<String> getAlternateBases() {
|
||||
return alternateBases;
|
||||
}
|
||||
|
||||
public String getFormatString() {
|
||||
return Utils.join(";", formatList);
|
||||
}
|
||||
|
||||
public List<VCFGenotypeRecord> getGenotypesRecords() {
|
||||
return genotypesRecord;
|
||||
}
|
||||
}
|
||||
|
|
@ -19,7 +19,7 @@ public class VCFRecord {
|
|||
// the alternate bases
|
||||
private final List<String> mAlts = new ArrayList<String>();
|
||||
// our qual value
|
||||
private int mQual;
|
||||
private double mQual;
|
||||
// our filter string
|
||||
private String mFilterString;
|
||||
// our info fields
|
||||
|
|
@ -61,7 +61,7 @@ public class VCFRecord {
|
|||
int position,
|
||||
String ID,
|
||||
List<String> altBases,
|
||||
int qual,
|
||||
double qual,
|
||||
String filters,
|
||||
Map<String, String> infoFields,
|
||||
String genotypeFormatString,
|
||||
|
|
@ -117,7 +117,7 @@ public class VCFRecord {
|
|||
addAlternateBase(alt);
|
||||
break;
|
||||
case QUAL:
|
||||
this.setQual(Integer.valueOf(columnValues.get(val)));
|
||||
this.setQual(Double.valueOf(columnValues.get(val)));
|
||||
break;
|
||||
case FILTER:
|
||||
this.setFilterString(columnValues.get(val));
|
||||
|
|
@ -191,7 +191,7 @@ public class VCFRecord {
|
|||
}
|
||||
|
||||
/** @return the phred-scaled quality score */
|
||||
public int getQual() {
|
||||
public double getQual() {
|
||||
return this.mQual;
|
||||
}
|
||||
|
||||
|
|
@ -273,7 +273,7 @@ public class VCFRecord {
|
|||
this.mID = mID;
|
||||
}
|
||||
|
||||
public void setQual(int mQual) {
|
||||
public void setQual(double mQual) {
|
||||
if (mQual < 0)
|
||||
throw new IllegalArgumentException("Qual values must be greater than 0");
|
||||
this.mQual = mQual;
|
||||
|
|
@ -307,12 +307,12 @@ public class VCFRecord {
|
|||
|
||||
/**
|
||||
* the generation of a string representation, which is used by the VCF writer
|
||||
*
|
||||
* @return a string
|
||||
*/
|
||||
public String toString() {
|
||||
public String toStringRepresentation(VCFHeader header) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
|
||||
// else builder.append(FIELD_SEPERATOR + record.getValue(field));
|
||||
// CHROM \t POS \t ID \t REF \t ALT \t QUAL \t FILTER \t INFO
|
||||
builder.append(getChromosome() + FIELD_SEPERATOR);
|
||||
builder.append(getPosition() + FIELD_SEPERATOR);
|
||||
|
|
@ -321,7 +321,7 @@ public class VCFRecord {
|
|||
String alts = "";
|
||||
for (String str : this.getAlternateAlleles()) alts += str + ",";
|
||||
builder.append((alts.length() > 0) ? alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR : "." + FIELD_SEPERATOR);
|
||||
builder.append(getQual() + FIELD_SEPERATOR);
|
||||
builder.append(String.format("%.2f",getQual()) + FIELD_SEPERATOR);
|
||||
builder.append(Utils.join(";", getFilteringCodes()) + FIELD_SEPERATOR);
|
||||
String info = "";
|
||||
for (String str : this.getInfoValues().keySet()) {
|
||||
|
|
@ -335,9 +335,30 @@ public class VCFRecord {
|
|||
else builder.append(info);
|
||||
|
||||
if (this.hasGenotypeData()) {
|
||||
builder.append(FIELD_SEPERATOR + this.getGenotypeFormatString());
|
||||
for (VCFGenotypeRecord rec : this.getVCFGenotypeRecords()) {
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
addGenotypeData(builder, header);
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* add the genotype data
|
||||
*
|
||||
* @param builder the string builder
|
||||
* @param header the header object
|
||||
*/
|
||||
private void addGenotypeData(StringBuilder builder, VCFHeader header) {
|
||||
builder.append(FIELD_SEPERATOR + this.getGenotypeFormatString());
|
||||
if (header.getGenotypeSamples().size() < getVCFGenotypeRecords().size())
|
||||
throw new RuntimeException("We have more genotype samples than the header specified");
|
||||
|
||||
Map<String, VCFGenotypeRecord> gMap = genotypeListToMap(getVCFGenotypeRecords());
|
||||
|
||||
for (String genotype : header.getGenotypeSamples()) {
|
||||
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
|
||||
if (gMap.containsKey(genotype)) {
|
||||
VCFGenotypeRecord rec = gMap.get(genotype);
|
||||
if (!rec.toGenotypeString(this.mAlts).equals(""))
|
||||
builder.append(rec.toGenotypeString(this.mAlts));
|
||||
for (String s : rec.getFields().keySet()) {
|
||||
|
|
@ -345,9 +366,14 @@ public class VCFRecord {
|
|||
builder.append(":");
|
||||
builder.append(rec.getFields().get(s));
|
||||
}
|
||||
gMap.remove(genotype);
|
||||
} else {
|
||||
builder.append(".");
|
||||
}
|
||||
}
|
||||
return builder.toString();
|
||||
if (gMap.size() != 0) {
|
||||
throw new RuntimeException("We failed to use all the genotype samples; their must be an incosistancy between the header and records");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -370,4 +396,19 @@ public class VCFRecord {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genotype mapping from a list and their sample names
|
||||
*
|
||||
* @param list a list of genotype samples
|
||||
*
|
||||
* @return a mapping of the sample name to VCF genotype record
|
||||
*/
|
||||
private static Map<String, VCFGenotypeRecord> genotypeListToMap(List<VCFGenotypeRecord> list) {
|
||||
Map<String, VCFGenotypeRecord> map = new HashMap<String, VCFGenotypeRecord>();
|
||||
for (VCFGenotypeRecord rec : list) {
|
||||
map.put(rec.getSampleName(), rec);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,7 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
/**
|
||||
* this class writers VCF files
|
||||
|
|
@ -75,11 +72,7 @@ public class VCFWriter {
|
|||
* @param record the record to output
|
||||
*/
|
||||
public void addRecord(VCFRecord record) {
|
||||
if (record.getColumnCount() != mHeader.getGenotypeSamples().size() + mHeader.getHeaderFields().size()) {
|
||||
throw new RuntimeException("Record has " + record.getColumnCount() +
|
||||
" columns, when is should have " + mHeader.getColumnCount());
|
||||
}
|
||||
String vcfString = record.toString();
|
||||
String vcfString = record.toStringRepresentation(mHeader);
|
||||
try {
|
||||
mWriter.write(vcfString + "\n");
|
||||
} catch (IOException e) {
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ public class RodVCFTest extends BaseTest {
|
|||
|
||||
private static IndexedFastaSequenceFile seq;
|
||||
private static File vcfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample.vcf");
|
||||
|
||||
private VCFHeader mHeader;
|
||||
@BeforeClass
|
||||
public static void beforeTests() {
|
||||
try {
|
||||
|
|
@ -47,13 +47,13 @@ public class RodVCFTest extends BaseTest {
|
|||
|
||||
private RodVCF getVCFObject() {
|
||||
RodVCF vcf = new RodVCF("VCF");
|
||||
VCFHeader header = null;
|
||||
mHeader = null;
|
||||
try {
|
||||
header = (VCFHeader) vcf.initialize(vcfFile);
|
||||
mHeader = (VCFHeader) vcf.initialize(vcfFile);
|
||||
} catch (FileNotFoundException e) {
|
||||
fail("Unable to open VCF file");
|
||||
}
|
||||
header.checkVCFVersion();
|
||||
mHeader.checkVCFVersion();
|
||||
return vcf;
|
||||
}
|
||||
|
||||
|
|
@ -91,7 +91,7 @@ public class RodVCFTest extends BaseTest {
|
|||
@Test
|
||||
public void testToString() {
|
||||
// slightly altered line, due to map ordering
|
||||
String firstLine = "20\t14370\trs6054257\tG\tA\t29\t0\tDP=258;AF=0.786;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5\n";
|
||||
String firstLine = "20\t14370\trs6054257\tG\tA\t29.00\t0\tDP=258;AF=0.786;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5\n";
|
||||
RodVCF vcf = getVCFObject();
|
||||
VCFReader reader = new VCFReader(vcfFile);
|
||||
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
|
||||
|
|
@ -99,13 +99,13 @@ public class RodVCFTest extends BaseTest {
|
|||
while (iter.hasNext()) {
|
||||
VCFRecord rec1 = reader.next();
|
||||
VCFRecord rec2 = iter.next().mCurrentRecord;
|
||||
if (!rec1.toString().equals(rec2.toString())) {
|
||||
if (!rec1.toStringRepresentation(mHeader).equals(rec2.toStringRepresentation(mHeader))) {
|
||||
fail("VCF record rec1.toString() != rec2.toString()");
|
||||
}
|
||||
// verify the first line too
|
||||
if (first) {
|
||||
if (!firstLine.equals(rec1.toString() + "\n")) {
|
||||
fail("VCF record rec1.toString() != expected string :\n" + rec1.toString() + firstLine);
|
||||
if (!firstLine.equals(rec1.toStringRepresentation(mHeader) + "\n")) {
|
||||
fail("VCF record rec1.toString() != expected string :\n" + rec1.toStringRepresentation(mHeader) + firstLine);
|
||||
}
|
||||
first = false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,63 +8,63 @@ import java.util.Arrays;
|
|||
public class VariantFiltrationIntegrationTest extends WalkerTest {
|
||||
@Test
|
||||
public void testIntervals() {
|
||||
String[] md5DoC = {"b222d15b300f989dd2a86ff1f500f64b", "21c8e1f9dc65fdfb39347547f9b04011"};
|
||||
String[] md5DoC = {"c0a7e2fc07d565e633b3064f9f3cdaf5", "21c8e1f9dc65fdfb39347547f9b04011"};
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X DepthOfCoverage:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5DoC));
|
||||
executeTest("testDoCFilter", spec1);
|
||||
|
||||
String[] md5AlleleBalance = {"9a59d33b55e5bad0228f2d2d67d4c17d", "a13e4ce6260bf9f33ca99dc808b8e6ad"};
|
||||
String[] md5AlleleBalance = {"aa0f7800cfd346236620ae0eac220817", "a13e4ce6260bf9f33ca99dc808b8e6ad"};
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X AlleleBalance:low=0.25,high=0.75 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5AlleleBalance));
|
||||
executeTest("testAlleleBalanceFilter", spec2);
|
||||
|
||||
String[] md5Strand = {"b0a6fb821be2f7b26f8f6d77cbd758a9", "0f7db0aad764268ee8fa3b857df8d87d"};
|
||||
String[] md5Strand = {"9f430f251dbeb58a2f80a1306a5dd492", "0f7db0aad764268ee8fa3b857df8d87d"};
|
||||
WalkerTestSpec spec3 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X FisherStrand:pvalue=0.0001 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5Strand));
|
||||
executeTest("testStrandFilter", spec3);
|
||||
|
||||
String[] md5Lod = {"60624843c4c8ae561acc444df565da99", "7e0c4f2b0fda85fd2891eee76c396a55"};
|
||||
String[] md5Lod = {"56177258c0b3944c043f86faee4b42ae", "7e0c4f2b0fda85fd2891eee76c396a55"};
|
||||
WalkerTestSpec spec4 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X LodThreshold:lod=10 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5Lod));
|
||||
executeTest("testLodFilter", spec4);
|
||||
|
||||
String[] md5MQ0 = {"5e3d4d6b13e79a5df5171d3e5a9f1bd7", "3203de335621851bccf596242b079e23"};
|
||||
String[] md5MQ0 = {"0e303c32f5c1503f4c875771f28fc46c", "3203de335621851bccf596242b079e23"};
|
||||
WalkerTestSpec spec5 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X MappingQualityZero:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5MQ0));
|
||||
executeTest("testMappingQuality0Filter", spec5);
|
||||
|
||||
String[] md5MQ = {"fdbac9cf332dd45d9c92146157ace65f", "ecc777feedea61f7b570d114c2ab89b1"};
|
||||
String[] md5MQ = {"946462a6199e9453784e0942e18e6830", "ecc777feedea61f7b570d114c2ab89b1"};
|
||||
WalkerTestSpec spec6 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X MappingQuality:min=20 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5MQ));
|
||||
executeTest("testRMSMappingQualityFilter", spec6);
|
||||
|
||||
String[] md5OnOff = {"57c5a92bde03adbff9c6ca6eada033c4", "67f2e1bc025833b0fa31f47195198997"};
|
||||
String[] md5OnOff = {"2ff84e104ce73e347e55d272170b4d03", "67f2e1bc025833b0fa31f47195198997"};
|
||||
WalkerTestSpec spec7 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X OnOffGenotypeRatio:threshold=0.9 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5OnOff));
|
||||
executeTest("testOnOffGenotypeFilter", spec7);
|
||||
|
||||
String[] md5Clusters = {"44223fa50dac2d9c1096558689cb8493", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"};
|
||||
String[] md5Clusters = {"e6a1c088678b1c31ff340ebd622b476e", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"};
|
||||
WalkerTestSpec spec8 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X ClusteredSnps:window=10,snps=3 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5Clusters));
|
||||
executeTest("testClusteredSnpsFilter", spec8);
|
||||
|
||||
String[] md5Indels = {"0f03727ac9e6fc43311377b29d12596c", "8e0e915a1cb63d7049e0671ed00101fe"};
|
||||
String[] md5Indels = {"82e555b76c12474154f8e5e402516d73", "8e0e915a1cb63d7049e0671ed00101fe"};
|
||||
WalkerTestSpec spec9 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X IndelArtifact -B indels,PointIndel,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.indels -B cleaned,CleanedOutSNP,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.realigner_badsnps -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testVariantsToVCFUsingGeliInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("d1882fd8ecee6a95f561ed3be4d4a435");
|
||||
md5.add("0b96a8046d2a06bd87f57df8bac1678d");
|
||||
|
||||
/**
|
||||
* the above MD5 was calculated from running the following command:
|
||||
|
|
@ -50,7 +50,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testGenotypesToVCFUsingGeliInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("debeaf31846328eddc0abf226fc72ac0");
|
||||
md5.add("09660faa7cfad8af36602f79461c0605");
|
||||
|
||||
/**
|
||||
* the above MD5 was calculated from running the following command:
|
||||
|
|
|
|||
Loading…
Reference in New Issue