Finalizing BCF2 mark III commit

-- Moved GENOTYPE_KEY vcf header line to VCFConstants.  This general migration and cleanup is on Eric's plate now
-- Updated HC to initialize the annotation engine in an order that allows it to write a proper VCF header.  Still doesn't work...
-- Updating integration test files.  Moved many more files into public/testdata.  Updated their headers to all work correctly with new strict VCF header checking.
-- Bugfix for TandemRepeatAnnotation that must be unbounded not A count type as it provides info for the REF as well as each alt
-- No longer add FALSE values to flag values in VCs in VariantAnnotatorEngine.  DB = 0 is never seen in the output VCFs now
-- Fixed bug in VCFDiffableReader that didn't differeniate between "." and "PASS" VC filter status
-- Unconditionally add lowQual Filter to UG output VCF files as this is in some cases (EMIT_ALL_SITES) used when the previous check said it wouldn't be
-- VariantsToVCF now properly writes out the GT FORMAT field
-- BCF2 codec explodes when reading symbolic alleles as I literally cannot figure out how to use the allele clipping code.  Eric said he and Ami will clean up this whole piece of instructure
-- Fixed bug in BCF2Codec that wasn't setting the phase field correctly.  UnitTested now
-- PASS string now added at the end of the BCF2 dictionary after discussion with Heng
-- Fixed bug where I was writing out all field values as BigEndian.  Now everything is LittleEndian.
-- VCFHeader detects the case where a count field has size < 0 (some of our files have count = -1) and throws a UserException
-- Cleaned up unused code
-- Fixed bug in BCF2 string encoder that wasn't handling the case of an empty list of strings for encoding
-- Fixed bug where all samples are no called in a VC, in which case we (like the VCFwriter) write out no called diploid genotypes for all samples
-- We always write the number of genotype samples into the BCF2 nSamples header.  How we can have a variable number of samples per record isn't clear to me, as we don't have a map from missing samples to header names...
-- Removed old filtersWereAppliedToContext code in VCF as properly handle unfiltered, filtered, and PASS records internally
-- Fastpath function getDisplayBases() in allele that just gives you the raw bytes[] you'd see for an Allele
-- Genotype fields no longer differentiate between unfiltered, filtered, and PASS values.  Genotype objects are all PASS implicitly, or explicitly filtered.  We only write out the FT values if at least one sample is filtered.  Removed interface functions and cleaned up code
-- Refactored padAllele code from createVariantContextWithPaddedAlleles into the function padAllele so that it actually works.  In general, **** NEVER COPY CODE **** if you need to share funcitonality make a function, that's why there were invented!
-- Increased the default number of records to read for DiffObjects to 1M
This commit is contained in:
Mark DePristo 2012-06-19 09:46:26 -04:00
parent 0c8b830db7
commit fba7dafa0e
41 changed files with 317 additions and 267 deletions

View File

@ -71,7 +71,7 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
public static final VCFInfoHeaderLine[] descriptions = {
new VCFInfoHeaderLine(STR_PRESENT, 0, VCFHeaderLineType.Flag, "Variant is a short tandem repeat"),
new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"),
new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };
new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };
public List<String> getKeyNames() {
return Arrays.asList(keyNames);

View File

@ -217,11 +217,11 @@ public class VariantAnnotatorEngine {
if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) {
final String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), ref.getLocus()), vc.getType());
// put the DB key into the INFO field
infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null);
// add the ID if appropriate
if ( rsID != null ) {
// put the DB key into the INFO field
infoAnnotations.put(VCFConstants.DBSNP_KEY, true);
if ( vc.emptyID() ) {
vc = new VariantContextBuilder(vc).id(rsID).make();
} else if ( walker.alwaysAppendDbsnpId() && vc.getID().indexOf(rsID) == -1 ) {
@ -237,7 +237,8 @@ public class VariantAnnotatorEngine {
break;
}
}
infoAnnotations.put(dbSet.getValue(), overlapsComp);
if ( overlapsComp )
infoAnnotations.put(dbSet.getValue(), overlapsComp);
}
}

View File

@ -97,7 +97,9 @@ public class VCFDiffableReader implements DiffableReader {
vcRoot.add("REF", vc.getReference());
vcRoot.add("ALT", vc.getAlternateAlleles());
vcRoot.add("QUAL", vc.hasLog10PError() ? vc.getLog10PError() * -10 : VCFConstants.MISSING_VALUE_v4);
vcRoot.add("FILTER", vc.getFilters());
vcRoot.add("FILTER", ! vc.filtersWereApplied() // needs null to differentiate between PASS and .
? VCFConstants.MISSING_VALUE_v4
: ( vc.getFilters().isEmpty() ? VCFConstants.PASSES_FILTERS_v4 : vc.getFilters()) );
// add info fields
for (Map.Entry<String, Object> attribute : vc.getAttributes().entrySet()) {

View File

@ -272,9 +272,9 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
// FORMAT fields
headerInfo.addAll(getSupportedHeaderStrings());
// FILTER fields
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING )
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
// FILTER fields are added unconditionally as it's not always 100% certain the circumstances
// where the filters are used. For example, in emitting all sites the lowQual field is used
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
return headerInfo;
}
@ -285,7 +285,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
*/
private static Set<VCFFormatHeaderLine> getSupportedHeaderStrings() {
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
result.add(VCFConstants.GENOTYPE_KEY_HEADER_LINE);
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "Genotype Quality"));
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));

View File

@ -316,7 +316,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
// first, the basic info
headerInfo.add(new VCFHeaderLine("source", "SomaticIndelDetector"));
headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
headerInfo.add(VCFConstants.GENOTYPE_KEY_HEADER_LINE);
// FORMAT and INFO fields
// headerInfo.addAll(VCFUtils.getSupportedHeaderStrings());

View File

@ -204,16 +204,16 @@ public class VariantEvalUtils {
final int originalAlleleCount = vc.getHetCount() + 2 * vc.getHomVarCount();
final int newAlleleCount = vcsub.getHetCount() + 2 * vcsub.getHomVarCount();
final boolean isSingleton = originalAlleleCount == newAlleleCount && newAlleleCount == 1;
final boolean hasChrCountAnnotations = vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) &&
vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) &&
vc.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY);
final boolean hasChrCountAnnotations = vcsub.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) &&
vcsub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) &&
vcsub.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY);
if ( ! isSingleton && hasChrCountAnnotations ) {
// nothing to update
return vc;
return vcsub;
} else {
// have to do the work
VariantContextBuilder builder = new VariantContextBuilder(vc);
VariantContextBuilder builder = new VariantContextBuilder(vcsub);
if ( isSingleton )
builder.attribute(VariantEvalWalker.IS_SINGLETON_KEY, true);

View File

@ -129,7 +129,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
.attribute("OriginalStart", fromInterval.getStart()).make();
}
VariantContext newVC = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, false);
VariantContext newVC = VariantContextUtils.createVariantContextWithPaddedAlleles(vc);
if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(),

View File

@ -222,6 +222,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
//hInfo.add(new VCFHeaderLine("source", "VariantsToVCF"));
//hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getID()));
hInfo.add(VCFConstants.GENOTYPE_KEY_HEADER_LINE);
allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY);
for ( VCFHeaderLine field : hInfo ) {
if ( field instanceof VCFFormatHeaderLine) {

View File

@ -36,6 +36,7 @@ import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -334,7 +335,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
*/
protected static ArrayList<Allele> clipAllelesIfNecessary(int position, String ref, ArrayList<Allele> unclippedAlleles) {
if ( ! AbstractVCFCodec.isSingleNucleotideEvent(unclippedAlleles) ) {
ArrayList<Allele> clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
final ArrayList<Allele> clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
AbstractVCFCodec.clipAlleles(position, ref, unclippedAlleles, clippedAlleles, -1);
return clippedAlleles;
} else
@ -355,14 +356,16 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
String ref = null;
for ( int i = 0; i < nAlleles; i++ ) {
final String allele = (String)decoder.decodeTypedValue();
final String alleleBases = (String)decoder.decodeTypedValue();
if ( i == 0 ) {
ref = allele;
alleles.add(Allele.create(allele, true));
} else {
alleles.add(Allele.create(allele, false));
}
final boolean isRef = i == 0;
final Allele allele = Allele.create(alleleBases, isRef);
if ( isRef ) ref = alleleBases;
alleles.add(allele);
if ( allele.isSymbolic() )
throw new ReviewedStingException("LIMITATION: GATK BCF2 codec does not yet support symbolic alleles");
}
assert ref != null;

View File

@ -169,6 +169,9 @@ public class BCF2GenotypeFieldDecoders {
gb.alleles(gt);
}
final boolean phased = (a1 & 0x01) == 1;
gb.phased(phased);
}
}
@ -199,6 +202,8 @@ public class BCF2GenotypeFieldDecoders {
gt.add(getAlleleFromEncoded(siteAlleles, encode));
gb.alleles(gt);
final boolean phased = (encoded[0] & 0x01) == 1;
gb.phased(phased);
}
}
}

View File

@ -82,18 +82,27 @@ public final class BCF2Utils {
@Requires("header != null")
@Ensures({"result != null", "new HashSet(result).size() == result.size()"})
public final static ArrayList<String> makeDictionary(final VCFHeader header) {
final Set<String> dict = new TreeSet<String>();
final Set<String> seen = new HashSet<String>();
final ArrayList<String> dict = new ArrayList<String>();
boolean sawPASS = false;
// set up the strings dictionary
dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
for ( VCFHeaderLine line : header.getMetaData() ) {
if ( line instanceof VCFIDHeaderLine) {
VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
dict.add(idLine.getID());
final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
if ( ! seen.contains(idLine.getID())) {
sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4);
dict.add(idLine.getID());
seen.add(idLine.getID());
}
}
}
return new ArrayList<String>(dict);
if ( ! sawPASS )
dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
return dict;
}
@Requires({"nElements >= 0", "type != null"})
@ -142,25 +151,6 @@ public final class BCF2Utils {
}
}
@Requires({"stream != null", "bytesForEachInt > 0"})
public final static int readInt(int bytesForEachInt, final InputStream stream) {
switch ( bytesForEachInt ) {
case 1: {
return (byte)(readByte(stream));
} case 2: {
final int b1 = readByte(stream) & 0xFF;
final int b2 = readByte(stream) & 0xFF;
return (short)((b1 << 8) | b2);
} case 4: {
final int b1 = readByte(stream) & 0xFF;
final int b2 = readByte(stream) & 0xFF;
final int b3 = readByte(stream) & 0xFF;
final int b4 = readByte(stream) & 0xFF;
return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4);
} default: throw new ReviewedStingException("Unexpected size during decoding");
}
}
/**
* Collapse multiple strings into a comma separated list
*
@ -299,20 +289,40 @@ public final class BCF2Utils {
else return Collections.singletonList(o);
}
@Requires({"stream != null", "bytesForEachInt > 0"})
public final static int readInt(int bytesForEachInt, final InputStream stream) {
switch ( bytesForEachInt ) {
case 1: {
return (byte)(readByte(stream));
} case 2: {
final int b2 = readByte(stream) & 0xFF;
final int b1 = readByte(stream) & 0xFF;
return (short)((b1 << 8) | b2);
} case 4: {
final int b4 = readByte(stream) & 0xFF;
final int b3 = readByte(stream) & 0xFF;
final int b2 = readByte(stream) & 0xFF;
final int b1 = readByte(stream) & 0xFF;
return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4);
} default: throw new ReviewedStingException("Unexpected size during decoding");
}
}
public final static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
switch ( type.getSizeInBytes() ) {
case 1:
encodeStream.write(0xFF & value);
break;
case 2:
encodeStream.write((0x00FF & value));
encodeStream.write((0xFF00 & value) >> 8);
encodeStream.write(0xFF & value);
break;
case 4:
encodeStream.write((0xFF000000 & value) >> 24);
encodeStream.write((0x00FF0000 & value) >> 16);
encodeStream.write((0x0000FF00 & value) >> 8);
encodeStream.write((0x000000FF & value));
encodeStream.write((0x0000FF00 & value) >> 8);
encodeStream.write((0x00FF0000 & value) >> 16);
encodeStream.write((0xFF000000 & value) >> 24);
break;
default:
throw new ReviewedStingException("BUG: unexpected type size " + type);

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.codecs.vcf;
import org.apache.log4j.Logger;
import org.broad.tribble.TribbleException;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.Arrays;
import java.util.LinkedHashMap;
@ -154,6 +155,10 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
count = Integer.valueOf(numberStr);
}
if ( count < 0 && countType == VCFHeaderLineCount.INTEGER )
throw new UserException.MalformedVCFHeader("Count < 0 for fixed size VCF header field " + name);
try {
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
} catch (Exception e) {

View File

@ -117,4 +117,9 @@ public final class VCFConstants {
public static final int MAX_GENOTYPE_QUAL = 99;
public static final Double VCF_ENCODING_EPSILON = 0.00005; // when we consider fields equal(), used in the Qual compare
//
// VCF header line constants
//
public static final VCFFormatHeaderLine GENOTYPE_KEY_HEADER_LINE = new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype");
}

View File

@ -347,6 +347,15 @@ public class Allele implements Comparable<Allele> {
*/
public String getDisplayString() { return new String(bases); }
/**
* Same as #getDisplayString() but returns the result as byte[].
*
* Slightly faster then getDisplayString()
*
* @return the allele string representation
*/
public byte[] getDisplayBases() { return bases; }
/**
* @param other the other allele
*

View File

@ -156,11 +156,6 @@ public final class FastGenotype extends Genotype {
return (List<String>) getExtendedAttribute(VCFConstants.GENOTYPE_FILTER_KEY, Collections.emptyList());
}
@Override
public boolean filtersWereApplied() {
return hasExtendedAttribute(VCFConstants.GENOTYPE_FILTER_KEY);
}
@Override public int[] getPL() {
return PL;
}

View File

@ -451,7 +451,7 @@ public abstract class Genotype implements Comparable<Genotype> {
*
* @return
*/
@Ensures({"result != null", "filtersWereApplied() || result.isEmpty()"})
@Ensures({"result != null"})
public abstract List<String> getFilters();
@Ensures({"result != getFilters().isEmpty()"})
@ -459,9 +459,6 @@ public abstract class Genotype implements Comparable<Genotype> {
return ! getFilters().isEmpty();
}
@Ensures("result == true || getFilters().isEmpty()")
public abstract boolean filtersWereApplied();
@Deprecated public boolean hasLog10PError() { return hasGQ(); }
@Deprecated public double getLog10PError() { return getGQ() / -10.0; }
@Deprecated public int getPhredScaledQual() { return getGQ(); }

View File

@ -383,7 +383,8 @@ public final class GenotypeBuilder {
*/
@Requires("filters != null")
public GenotypeBuilder filters(final List<String> filters) {
attribute(VCFConstants.GENOTYPE_FILTER_KEY, filters);
if ( ! filters.isEmpty() )
attribute(VCFConstants.GENOTYPE_FILTER_KEY, filters);
return this;
}

View File

@ -113,7 +113,6 @@ public class SlowGenotype extends Genotype {
//
// ---------------------------------------------------------------------------------------------------------
@Override public List<String> getFilters() { return new ArrayList<String>(commonInfo.getFilters()); }
@Override public boolean filtersWereApplied() { return commonInfo.filtersWereApplied(); }
@Override public boolean hasLog10PError() { return commonInfo.hasLog10PError(); }
@Override public double getLog10PError() { return commonInfo.getLog10PError(); }

View File

@ -339,7 +339,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
* @return
*/
public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
if ( ! rederiveAllelesFromGenotypes && sampleNames.containsAll(getSampleNames()) ) {
if ( sampleNames.containsAll(getSampleNames()) ) {
return this; // fast path when you don't have any work to do
} else {
VariantContextBuilder builder = new VariantContextBuilder(this);
@ -559,7 +559,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
public String getAlleleStringWithRefPadding(final Allele allele) {
if ( VariantContextUtils.needsPadding(this) )
return VariantContextUtils.padAllele(this, allele);
return VariantContextUtils.padAllele(this, allele).getDisplayString();
else
return allele.getDisplayString();
}
@ -1177,8 +1177,9 @@ public class VariantContext implements Feature { // to enable tribble integratio
// if ( getType() == Type.INDEL ) {
// if ( getReference().length() != (getLocation().size()-1) ) {
long length = (stop - start) + 1;
if ( (getReference().isNull() && length != 1 ) ||
(!isSymbolic() && getReference().isNonNull() && (length - getReference().length() > 1))) {
if ( ! isSymbolic()
&& ((getReference().isNull() && length != 1 )
|| (getReference().isNonNull() && (length - getReference().length() > 1)))) {
throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
}
}
@ -1358,19 +1359,38 @@ public class VariantContext implements Feature { // to enable tribble integratio
}
private final void fullyDecodeInfo(final VariantContextBuilder builder, final VCFHeader header) {
builder.attributes(fullyDecodeAttributes(getAttributes(), header));
builder.attributes(fullyDecodeAttributes(getAttributes(), header, false));
}
private final Map<String, Object> fullyDecodeAttributes(final Map<String, Object> attributes, final VCFHeader header) {
private final Map<String, Object> fullyDecodeAttributes(final Map<String, Object> attributes,
final VCFHeader header,
final boolean allowMissingValuesComparedToHeader) {
final Map<String, Object> newAttributes = new HashMap<String, Object>(attributes.size());
for ( final Map.Entry<String, Object> attr : attributes.entrySet() ) {
final String field = attr.getKey();
if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) )
continue; // gross, FT is part of the extended attributes
final VCFCompoundHeaderLine format = VariantContextUtils.getMetaDataForField(header, field);
final Object decoded = decodeValue(field, attr.getValue(), format);
if ( decoded != null )
if ( decoded != null ) {
if ( ! allowMissingValuesComparedToHeader
&& format.getCountType() != VCFHeaderLineCount.UNBOUNDED
&& format.getType() != VCFHeaderLineType.Flag ) { // we expect exactly the right number of elements
final int obsSize = decoded instanceof List ? ((List) decoded).size() : 1;
final int expSize = format.getCount(this.getNAlleles() - 1);
if ( obsSize != expSize ) {
throw new UserException.MalformedVCFHeader("Discordant field size detected for field " +
field + " at " + getChr() + ":" + getStart() + ". Field had " + obsSize + " values " +
"but the header says this should have " + expSize + " values based on header record " +
format);
}
}
newAttributes.put(field, decoded);
}
}
return newAttributes;
@ -1400,6 +1420,8 @@ public class VariantContext implements Feature { // to enable tribble integratio
} else {
return value;
}
// allowMissingValuesComparedToHeader
}
private final Object decodeOne(final String field, final String string, final VCFCompoundHeaderLine format) {
@ -1409,7 +1431,12 @@ public class VariantContext implements Feature { // to enable tribble integratio
else {
switch ( format.getType() ) {
case Character: return string;
case Flag: return Boolean.valueOf(string);
case Flag:
final boolean b = Boolean.valueOf(string);
if ( b == false )
throw new UserException.MalformedVCF("VariantContext FLAG fields " + field + " cannot contain false values"
+ " as seen at " + getChr() + ":" + getStart());
return b;
case String: return string;
case Integer: return Integer.valueOf(string);
case Float: return Double.valueOf(string);
@ -1430,7 +1457,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
}
private final Genotype fullyDecodeGenotypes(final Genotype g, final VCFHeader header) {
final Map<String, Object> map = fullyDecodeAttributes(g.getExtendedAttributes(), header);
final Map<String, Object> map = fullyDecodeAttributes(g.getExtendedAttributes(), header, true);
return new GenotypeBuilder(g).attributes(map).make();
}

View File

@ -182,17 +182,23 @@ public class VariantContextUtils {
return false;
}
public static String padAllele(final VariantContext vc, final Allele allele) {
public static Allele padAllele(final VariantContext vc, final Allele allele) {
assert needsPadding(vc);
StringBuilder sb = new StringBuilder();
sb.append((char)vc.getReferenceBaseForIndel().byteValue());
sb.append(allele.getDisplayString());
return sb.toString();
if ( allele.isSymbolic() )
return allele;
else {
// get bases for current allele and create a new one with trimmed bases
final StringBuilder sb = new StringBuilder();
sb.append((char)vc.getReferenceBaseForIndel().byteValue());
sb.append(allele.getDisplayString());
final String newBases = sb.toString();
return Allele.create(newBases, allele.isReference());
}
}
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC) {
final boolean padVC = needsPadding(inputVC);
// nothing to do if we don't need to pad bases
@ -200,46 +206,21 @@ public class VariantContextUtils {
if ( !inputVC.hasReferenceBaseForIndel() )
throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available.");
Byte refByte = inputVC.getReferenceBaseForIndel();
final ArrayList<Allele> alleles = new ArrayList<Allele>(inputVC.getNAlleles());
final Map<Allele, Allele> unpaddedToPadded = new HashMap<Allele, Allele>(inputVC.getNAlleles());
List<Allele> alleles = new ArrayList<Allele>();
for (Allele a : inputVC.getAlleles()) {
// get bases for current allele and create a new one with trimmed bases
if (a.isSymbolic()) {
alleles.add(a);
} else {
String newBases;
if ( refBaseShouldBeAppliedToEndOfAlleles )
newBases = a.getBaseString() + new String(new byte[]{refByte});
else
newBases = new String(new byte[]{refByte}) + a.getBaseString();
alleles.add(Allele.create(newBases,a.isReference()));
}
for (final Allele a : inputVC.getAlleles()) {
final Allele padded = padAllele(inputVC, a);
alleles.add(padded);
unpaddedToPadded.put(a, padded);
}
// now we can recreate new genotypes with trimmed alleles
GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples());
for (final Genotype g : inputVC.getGenotypes() ) {
List<Allele> inAlleles = g.getAlleles();
List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
for (Allele a : inAlleles) {
if (a.isCalled()) {
if (a.isSymbolic()) {
newGenotypeAlleles.add(a);
} else {
String newBases;
if ( refBaseShouldBeAppliedToEndOfAlleles )
newBases = a.getBaseString() + new String(new byte[]{refByte});
else
newBases = new String(new byte[]{refByte}) + a.getBaseString();
newGenotypeAlleles.add(Allele.create(newBases,a.isReference()));
}
}
else {
// add no-call allele
newGenotypeAlleles.add(Allele.NO_CALL);
}
final List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
for (final Allele a : g.getAlleles()) {
newGenotypeAlleles.add( a.isCalled() ? unpaddedToPadded.get(a) : Allele.NO_CALL);
}
genotypes.add(new GenotypeBuilder(g).alleles(newGenotypeAlleles).make());
@ -556,7 +537,7 @@ public class VariantContextUtils {
for (final VariantContext vc : prepaddedVCs) {
// also a reasonable place to remove filtered calls, if needed
if ( ! filteredAreUncalled || vc.isNotFiltered() )
VCs.add(createVariantContextWithPaddedAlleles(vc, false));
VCs.add(createVariantContextWithPaddedAlleles(vc));
}
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
return null;

View File

@ -269,21 +269,6 @@ public abstract class BCF2FieldEncoder {
//
// ----------------------------------------------------------------------
/**
* Convenience method that just called encodeValue with a no minimum for the number of values.
*
* Primarily useful for encoding site values
*
* @param encoder
* @param value
* @param type
* @throws IOException
*/
@Requires({"encoder != null", "isDynamicallyTyped() || type == getStaticType()"})
public void encodeOneValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
encodeValue(encoder, value, type, 0);
}
/**
* Key abstract method that should encode a value of the given type into the encoder.
*
@ -348,10 +333,10 @@ public abstract class BCF2FieldEncoder {
if ( value == null )
return "";
else if (value instanceof List) {
if ( ((List) value).size() == 1 )
return (String)((List) value).get(0);
else
return BCF2Utils.collapseStringList((List<String>)value);
final List<String> l = (List<String>)value;
if ( l.isEmpty() ) return "";
else if ( l.size() == 1 ) return (String)l.get(0);
else return BCF2Utils.collapseStringList(l);
} else
return (String)value;
}
@ -376,7 +361,7 @@ public abstract class BCF2FieldEncoder {
}
@Override
@Requires("minValues <= 1")
@Requires({"minValues <= 1", "value != null", "value instanceof Boolean", "((Boolean)value) == true"})
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
encoder.encodeRawBytes(1, getStaticType());
}

View File

@ -100,7 +100,7 @@ public abstract class BCF2FieldWriter {
} else {
final int valueCount = getFieldEncoder().numElements(vc, rawValue);
encoder.encodeType(valueCount, type);
getFieldEncoder().encodeOneValue(encoder, rawValue, type);
getFieldEncoder().encodeValue(encoder, rawValue, type, valueCount);
}
}
}
@ -246,6 +246,10 @@ public abstract class BCF2FieldWriter {
buildAlleleMap(vc);
nValuesPerGenotype = vc.getMaxPloidy();
// deal with the case where we have no call everywhere, in which case we write out diploid
if ( nValuesPerGenotype == -1 )
nValuesPerGenotype = 2;
super.start(encoder, vc);
}
@ -298,7 +302,6 @@ public abstract class BCF2FieldWriter {
if ( nAlleles > 2 ) {
// for multi-allelics we need to clear the map, and add additional looks
alleleMapForTriPlus.clear();
alleleMapForTriPlus.put(Allele.NO_CALL, -1); // convenience for lookup
final List<Allele> alleles = vc.getAlleles();
for ( int i = 2; i < alleles.size(); i++ ) {
alleleMapForTriPlus.put(alleles.get(i), i);

View File

@ -84,6 +84,7 @@ import java.util.*;
*/
class BCF2Writer extends IndexingVariantContextWriter {
final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
final private static List<Allele> MISSING_GENOTYPE = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
private final OutputStream outputStream; // Note: do not flush until completely done writing, to avoid issues with eventual BGZF support
private VCFHeader header;
@ -213,7 +214,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
final int nAlleles = vc.getNAlleles();
final int nInfo = vc.getAttributes().size();
final int nGenotypeFormatFields = getNGenotypeFormatFields(vc);
final int nSamples = vc.getNSamples();
final int nSamples = header.getNGenotypeSamples();
encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x0000FFFF), BCF2Type.INT32);
encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x00FFFFF), BCF2Type.INT32);
@ -256,10 +257,10 @@ class BCF2Writer extends IndexingVariantContextWriter {
private void buildAlleles( VariantContext vc ) throws IOException {
final boolean needsPadding = VariantContextUtils.needsPadding(vc);
for ( final Allele allele : vc.getAlleles() ) {
byte[] s = allele.getBases();
for ( Allele allele : vc.getAlleles() ) {
if ( needsPadding )
s = VariantContextUtils.padAllele(vc,allele).getBytes();
allele = VariantContextUtils.padAllele(vc,allele);
final byte[] s = allele.getDisplayBases();
encoder.encodeTypedString(s);
}
}
@ -298,7 +299,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
Genotype g = vc.getGenotype(name);
if ( g == null )
// we don't have any data about g at all
g = new GenotypeBuilder(name).make();
g = new GenotypeBuilder(name).alleles(MISSING_GENOTYPE).make();
writer.addGenotype(encoder, vc, g);
}
writer.done(encoder, vc);

View File

@ -51,9 +51,6 @@ class VCFWriter extends IndexingVariantContextWriter {
// the VCF header we're storing
protected VCFHeader mHeader = null;
// were filters applied?
protected boolean filtersWereAppliedToContext = false;
final private boolean allowMissingFieldsInHeader;
private IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors();
@ -78,13 +75,6 @@ class VCFWriter extends IndexingVariantContextWriter {
// note we need to update the mHeader object after this call because they header
// may have genotypes trimmed out of it, if doNotWriteGenotypes is true
mHeader = writeHeader(header, mWriter, doNotWriteGenotypes, getVersionLine(), getStreamName());
// determine if we use filters, so we should FORCE pass the records
// TODO -- this might not be necessary any longer as we have unfiltered, filtered, and PASS VCs
for ( final VCFHeaderLine line : header.getMetaData() ) {
if ( line instanceof VCFFilterHeaderLine)
filtersWereAppliedToContext = true;
}
}
public static final String getVersionLine() {
@ -171,7 +161,7 @@ class VCFWriter extends IndexingVariantContextWriter {
vc = new VariantContextBuilder(vc).noGenotypes().make();
try {
vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, false);
vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc);
super.add(vc);
Map<Allele, String> alleleMap = buildAlleleMap(vc);
@ -219,7 +209,7 @@ class VCFWriter extends IndexingVariantContextWriter {
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// FILTER
String filters = getFilterString(vc, filtersWereAppliedToContext);
String filters = getFilterString(vc);
mWriter.write(filters);
mWriter.write(VCFConstants.FIELD_SEPARATOR);
@ -283,7 +273,7 @@ class VCFWriter extends IndexingVariantContextWriter {
//
// --------------------------------------------------------------------------------
private final String getFilterString(final VariantContext vc, boolean forcePASS) {
private final String getFilterString(final VariantContext vc) {
if ( vc.isFiltered() ) {
for ( final String filter : vc.getFilters() )
if ( ! mHeader.hasFilterLine(filter) )
@ -291,7 +281,7 @@ class VCFWriter extends IndexingVariantContextWriter {
return ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters()));
}
else if ( forcePASS || vc.filtersWereApplied() )
else if ( vc.filtersWereApplied() )
return VCFConstants.PASSES_FILTERS_v4;
else
return VCFConstants.UNFILTERED;
@ -407,7 +397,7 @@ class VCFWriter extends IndexingVariantContextWriter {
// some exceptions
if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY ) ) {
val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : VCFConstants.PASSES_FILTERS_v4;
}
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(field);

View File

@ -48,7 +48,7 @@ public class MD5DB {
/**
* Subdirectory under the ant build directory where we store integration test md5 results
*/
private static final int MAX_RECORDS_TO_READ = 100000;
private static final int MAX_RECORDS_TO_READ = 1000000;
private static final int MAX_RAW_DIFFS_TO_SUMMARIZE = -1;
public static final String LOCAL_MD5_DB_DIR = "integrationtests";
public static final String GLOBAL_MD5_DB_DIR = "/humgen/gsa-hpprojects/GATK/data/integrationtests";

View File

@ -43,7 +43,7 @@ public class ReferenceOrderedQueryDataPoolUnitTest extends BaseTest{
@Test
public void testCloseFilePointers() throws IOException {
// Build up query parameters
File file = new File(BaseTest.validationDataLocation + "NA12878.hg19.example1.vcf");
File file = new File(BaseTest.testDir + "NA12878.hg19.example1.vcf");
RMDTriplet triplet = new RMDTriplet("test", "VCF", file.getAbsolutePath(), RMDTriplet.RMDStorageType.FILE, new Tags());
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
GenomeLocParser parser = new GenomeLocParser(seq);

View File

@ -53,7 +53,7 @@ import java.util.*;
*/
public class FeatureManagerUnitTest extends BaseTest {
private static final File RANDOM_FILE = new File(testDir + "exampleGATKReport.eval");
private static final File VCF3_FILE = new File(testDir + "vcfexample3.vcf");
private static final File VCF3_FILE = new File(testDir + "vcf3.vcf");
private static final File VCF4_FILE = new File(testDir + "HiSeq.10000.vcf");
private static final File VCF4_FILE_GZ = new File(testDir + "HiSeq.10000.vcf.gz");
private static final File VCF4_FILE_BGZIP = new File(testDir + "HiSeq.10000.bgzip.vcf.gz");

View File

@ -44,7 +44,7 @@ public class FeatureToGATKFeatureIteratorUnitTest extends BaseTest {
final String chr = "20";
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
GenomeLocParser parser = new GenomeLocParser(seq);
File file = new File(validationDataLocation + "NA12878.hg19.example1.vcf");
File file = new File(testDir + "NA12878.hg19.example1.vcf");
VCFCodec codec = new VCFCodec();
TestFeatureReader reader = new TestFeatureReader(file.getAbsolutePath(), codec);
CheckableCloseableTribbleIterator<Feature> tribbleIterator = reader.query(chr, 1, 100000);

View File

@ -19,7 +19,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
}
@Test
@Test(enabled = false)
public void test1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(b36KGReference, "symbolic_alleles_1.vcf"),
@ -28,7 +28,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
executeTest("Test symbolic alleles", spec);
}
@Test
@Test(enabled = false)
public void test2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(b36KGReference, "symbolic_alleles_2.vcf"),

View File

@ -15,15 +15,15 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testHasAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("dfa5dff09fa964b06da19c0f4aff6928"));
baseTestString() + " --variant " + testDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("bd6848e7dbf2f809ee2f690ee2cf8ef4"));
executeTest("test file has annotations, not asking for annotations, #1", spec);
}
@Test
public void testHasAnnotsNotAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
baseTestString() + " --variant " + testDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("9914bd19f6235c550e5182e0f4591da6"));
executeTest("test file has annotations, not asking for annotations, #2", spec);
}
@ -31,15 +31,15 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("6a52ef10bb10d72cdd82a8f7afc2dd09"));
baseTestString() + " -G Standard --variant " + testDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("9084e6c7b1cec0f3a2c6d96711844d5e"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@Test
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
baseTestString() + " -G Standard --variant " + testDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("74d894fd31b449deffca88d0e465f01b"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -47,8 +47,8 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testNoAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("dd89dfa22f0e1d6760095e04f528d62a"));
baseTestString() + " --variant " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("b85c1ea28194484b327fbe0add1b5685"));
executeTest("test file doesn't have annotations, not asking for annotations, #1", spec);
}
@ -57,32 +57,32 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
// the genotype annotations in this file are actually out of order. If you don't parse the genotypes
// they don't get reordered. It's a good test of the genotype ordering system.
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("542d9ed8290ef7868387af4127e0b5fa"));
baseTestString() + " --variant " + testDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("fe4d4e2484c4cf8b1cd50ad42cfe468e"));
executeTest("test file doesn't have annotations, not asking for annotations, #2", spec);
}
@Test
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("b1b32ed3b831c92c94258c8e4a60e8c9"));
baseTestString() + " -G Standard --variant " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("043fc6205b0633edcd3fadc9e044800c"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@Test
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("a25eacb0ceea2c082af349f8d7776c8a"));
baseTestString() + " -G Standard --variant " + testDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("6fafb42d374a67ba4687a23078a126af"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@Test
public void testExcludeAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant:VCF3 " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("ef046909a6f6c6cb43653a255a99a014"));
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("639462a0e0fa79e33def5f011fe55961"));
executeTest("test exclude annotations", spec);
}
@ -90,7 +90,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testOverwritingHeader() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + testDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
Arrays.asList("5c2fded3b6a96b0b0788086bbb2409ed"));
Arrays.asList("ebbf32f5b8b8d22f2eb247a0a3db3da0"));
executeTest("test overwriting header", spec);
}
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoReads() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + testDir + "vcfexample3empty.vcf -L " + testDir + "vcfexample3empty.vcf", 1,
Arrays.asList("c590088d85edce786604fd600f5d5e75"));
Arrays.asList("afe6c9d3b4b80635a541cdfcfa48db2f"));
executeTest("not passing it any reads", spec);
}
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + testDir + "vcfexample3empty.vcf -L " + testDir + "vcfexample3empty.vcf", 1,
Arrays.asList("ade9354a4cdd6cc92c169f252fb36f3f"));
Arrays.asList("21d696ea8c55d2fd4cbb4dcd5f7f7db6"));
executeTest("getting DB tag with dbSNP", spec);
}
@ -114,7 +114,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testMultipleIdsWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + testDir + "vcfexample3withIDs.vcf -L " + testDir + "vcfexample3withIDs.vcf", 1,
Arrays.asList("f496f40e1e9efa743e3b473f6fe6e6d3"));
Arrays.asList("ef95394c14d5c16682a322f3dfb9000c"));
executeTest("adding multiple IDs with dbSNP", spec);
}
@ -122,7 +122,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithHapMap() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --comp:H3 " + testDir + "fakeHM3.vcf -G Standard --variant " + testDir + "vcfexample3empty.vcf -L " + testDir + "vcfexample3empty.vcf", 1,
Arrays.asList("d383fbd741d604625c9507d4da1c5a27"));
Arrays.asList("e6e276b7d517d57626c8409589cd286f"));
executeTest("getting DB tag with HM3", spec);
}
@ -130,23 +130,23 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoQuals() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --variant " + testDir + "noQual.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L " + testDir + "noQual.vcf -A QualByDepth", 1,
Arrays.asList("4a247f039dfb16ac05b38a0dd5f98da6"));
Arrays.asList("a99e8315571ed1b6bce942451b3d8612"));
executeTest("test file doesn't have QUALs", spec);
}
@Test
public void testUsingExpression() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + testDir + "targetAnnotations.vcf -G Standard --variant:VCF3 " + testDir + "vcfexample3empty.vcf -E foo.AF -L " + testDir + "vcfexample3empty.vcf", 1,
Arrays.asList("067792efcffea93ade632e52a80d0d8f"));
baseTestString() + " --resource:foo " + testDir + "targetAnnotations.vcf -G Standard --variant " + testDir + "vcfexample3empty.vcf -E foo.AF -L " + testDir + "vcfexample3empty.vcf", 1,
Arrays.asList("7d6ea3b54210620cbc7e14dad8836bcb"));
executeTest("using expression", spec);
}
@Test
public void testUsingExpressionWithID() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + testDir + "targetAnnotations.vcf -G Standard --variant:VCF3 " + testDir + "vcfexample3empty.vcf -E foo.ID -L " + testDir + "vcfexample3empty.vcf", 1,
Arrays.asList("66c68deb0508348324eb47d524e756de"));
baseTestString() + " --resource:foo " + testDir + "targetAnnotations.vcf -G Standard --variant " + testDir + "vcfexample3empty.vcf -E foo.ID -L " + testDir + "vcfexample3empty.vcf", 1,
Arrays.asList("35ce4fb0288dfc5c01ec6ce8b14c6157"));
executeTest("using expression with ID", spec);
}

View File

@ -41,7 +41,8 @@ public class BeagleIntegrationTest extends WalkerTest {
"--beagleR2:BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " +
"--beagleProbs:BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " +
"--beaglePhased:BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " +
"-o %s --no_cmdline_in_header", 1, Arrays.asList("cdbf8cc557f5be9ac778e52338c0d906"));
"-o %s --no_cmdline_in_header --allowMissingVCFHeaders", 1, Arrays.asList("c5522304abf0633041c7772dd7dafcea"));
spec.disableShadowBCF();
executeTest("test BeagleOutputToVCF", spec);
}
@ -50,7 +51,8 @@ public class BeagleIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T ProduceBeagleInput -R " + hg19Reference + " " +
"--variant:VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " +
"-o %s", 1, Arrays.asList("f301b089d21da259873f04bdc468835d"));
"-o %s --allowMissingVCFHeaders", 1, Arrays.asList("f301b089d21da259873f04bdc468835d"));
spec.disableShadowBCF();
executeTest("test BeagleInput", spec);
}
@ -59,8 +61,9 @@ public class BeagleIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T ProduceBeagleInput --variant:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+
"--validation:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_OMNI_chr22_14-16m.vcf "+
"-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta --no_cmdline_in_header ",2,
"-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 --allowMissingVCFHeaders -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta --no_cmdline_in_header ",2,
Arrays.asList("660986891b30cdc937e0f2a3a5743faa","4b6417f892ccfe5c63b8a60cb0ef3740"));
spec.disableShadowBCF();
executeTest("test BeagleInputWithBootstrap",spec);
}
@ -72,8 +75,8 @@ public class BeagleIntegrationTest extends WalkerTest {
"--beagleR2:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+
"--beagleProbs:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+
"--beaglePhased:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+
"-L 20:1-70000 -o %s --no_cmdline_in_header ",1,Arrays.asList("fbbbebfda35bab3f6f62eea2f0be1c01"));
"-L 20:1-70000 -o %s --no_cmdline_in_header --allowMissingVCFHeaders",1,Arrays.asList("fbbbebfda35bab3f6f62eea2f0be1c01"));
spec.disableShadowBCF();
executeTest("testBeagleChangesSitesToRef",spec);
}

View File

@ -15,80 +15,80 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
@Test
public void testNoAction() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("dfa5dff09fa964b06da19c0f4aff6928"));
baseTestString() + " --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("49471b44ac165929d3ff81f98ce19063"));
executeTest("test no action", spec);
}
@Test
public void testClusteredSnps() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -window 10 --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("4a4596929f9fe983d8868ca142567781"));
baseTestString() + " -window 10 --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("8b45895d7ae1f36b70e7fd26aa9451d3"));
executeTest("test clustered SNPs", spec);
}
@Test
public void testMask1() {
WalkerTestSpec spec1 = new WalkerTestSpec(
baseTestString() + " -maskName foo --mask:VCF3 " + testDir + "vcfexample2.vcf --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("1719462cd17986c33e59e45b69df0270"));
baseTestString() + " -maskName foo --mask " + testDir + "vcfexample2.vcf --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("06307029f5da87ae4edd9804063a98f9"));
executeTest("test mask all", spec1);
}
@Test
public void testMask2() {
WalkerTestSpec spec2 = new WalkerTestSpec(
baseTestString() + " -maskName foo --mask:VCF " + testDir + "vcfMask.vcf --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("db19ff7d90c82cda09fb3c3878100eb5"));
baseTestString() + " -maskName foo --mask:VCF " + testDir + "vcfMask.vcf --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("1fd06f6b2642685093ed36342f002b58"));
executeTest("test mask some", spec2);
}
@Test
public void testMask3() {
WalkerTestSpec spec3 = new WalkerTestSpec(
baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + testDir + "vcfMask.vcf --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("a9e417cba21585c786d4b9930265ea31"));
baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + testDir + "vcfMask.vcf --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("d8c5206d5d13477a5929fb1ae5a6bfc4"));
executeTest("test mask extend", spec3);
}
@Test
public void testFilter1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("4160904b180d1f62a6bf50de6728ce00"));
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("a3be095e8aa75d9ef4235b9487527307"));
executeTest("test filter #1", spec);
}
@Test
public void testFilter2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("df80db30c7836731ac7c8c3d4fc005b4"));
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("bd1361ddc52d73b8cd7adeb9e5c47200"));
executeTest("test filter #2", spec);
}
@Test
public void testFilterWithSeparateNames() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("71ce6c0952831cb68f575aa0173dce2b"));
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("4a43ec0285433df426ab482f88cf7ca6"));
executeTest("test filter with separate names #2", spec);
}
@Test
public void testGenotypeFilters1() {
WalkerTestSpec spec1 = new WalkerTestSpec(
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("179f7f2a90c0e6c656109aac9b775476"));
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("5ee4485a022e163645c08b9691384f67"));
executeTest("test genotype filter #1", spec1);
}
@Test
public void testGenotypeFilters2() {
WalkerTestSpec spec2 = new WalkerTestSpec(
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("22e07c27feb9017a130dfb045c5b29b9"));
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("d0a068c8cfb0758d2a8d471383f39b68"));
executeTest("test genotype filter #2", spec2);
}
@ -96,7 +96,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testDeletions() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo --variant:VCF " + testDir + "twoDeletions.vcf", 1,
Arrays.asList("637256ee5348c1c57f1dadf581b06ed9"));
Arrays.asList("a1c02a5a90f1262e9eb3d2cad1fd08f2"));
executeTest("test deletions", spec);
}
}

View File

@ -8,6 +8,7 @@ import org.testng.annotations.Test;
import java.io.File;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
// ********************************************************************************** //
// Note that this class also serves as an integration test for the VariantAnnotator! //
@ -28,7 +29,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("1c6ea045819b151bcd9d98947c5d4c4d"));
Arrays.asList("a4c520b56f85513423c1c0204cabb5e1"));
executeTest("test MultiSample Pilot1", spec);
}
@ -36,7 +37,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("f9f2912c63e3253495702099bde5de0f"));
Arrays.asList("26ec9db9c7ad4b9a2ef25a8b1cb0d45c"));
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
}
@ -44,7 +45,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("c51d037e0b1cd0ed3a1cd6c6b29646cf"));
Arrays.asList("f2624782525929384d9f2c59f3c65529"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
}
@ -52,7 +53,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("bd7d25f6c6142837e3bc4c0d5dced2ed"));
Arrays.asList("a71d4abbad9c31e66aeb21b1fe2cfe9a"));
executeTest("test SingleSample Pilot2", spec);
}
@ -60,7 +61,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + testDir + "multiallelic.snps.bam -o %s -L " + testDir + "multiallelic.snps.intervals", 1,
Arrays.asList("dba580e8b5e96a28d673b437b4da1c70"));
Arrays.asList("2429c0f24da57ab1a1313e807e53e48e"));
executeTest("test Multiple SNP alleles", spec);
}
@ -68,7 +69,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testBadRead() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH -I " + testDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
Arrays.asList("174905e2547e94c3eee07ce84497692b"));
Arrays.asList("995c8f57d1f211e004ce81d356a80d16"));
executeTest("test bad read", spec);
}
@ -76,7 +77,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testReverseTrim() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
Arrays.asList("29b15e2017b13e6cb3ad56cc74c719e7"));
Arrays.asList("53f60fe15ebffdf85183426b93d48b10"));
executeTest("test reverse trim", spec);
}
@ -86,7 +87,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "6f14394e90fdacd29390a1f3521f5ca8";
private final static String COMPRESSED_OUTPUT_MD5 = "65846f5a8591d591ffbd1f85afadd9d5";
@Test
public void testCompressedOutput() {
@ -107,7 +108,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
String md5 = "7824468b8290ffb7795a1ec3e493c1a4";
String md5 = "36e6c8b0f30b159915eedaa5926ebbad";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@ -139,7 +140,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinBaseQualityScore() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
Arrays.asList("86121f5094f26c8b2e320c1f5dea4ae3"));
Arrays.asList("efc4882c1150b246be163e08d81f428f"));
executeTest("test min_base_quality_score 26", spec);
}
@ -147,7 +148,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSLOD() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("3712dd35b0e630977c8c5226ccc532ae"));
Arrays.asList("b47b08b514acf5e96fb4994754e0e9ce"));
executeTest("test SLOD", spec);
}
@ -155,7 +156,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testNDA() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("8a5bb0cca3004848dbca9c08fc2afed9"));
Arrays.asList("08db1413ed6a04fcb03d58e3ece9f366"));
executeTest("test NDA", spec);
}
@ -163,23 +164,23 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testCompTrack() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("9863ecd2576c7a962f9d05a7dc670169"));
Arrays.asList("d78f95c225db2a4b21c99a688330df52"));
executeTest("test using comp track", spec);
}
@Test
public void testOutputParameterSitesOnly() {
testOutputParameters("-sites_only", "fe204cef499e5aceb2732ba2e45903ad");
testOutputParameters("-sites_only", "1e4a98213ec00479cc090f53620317e4");
}
@Test
public void testOutputParameterAllConfident() {
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "1ab8b68891d1531923a40d594250e8e0");
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "5580e3e7c2b358ed416bc03409c54c1d");
}
@Test
public void testOutputParameterAllSites() {
testOutputParameters("--output_mode EMIT_ALL_SITES", "ab179ef6ece3ab9e6b1ff5800cb89ebd");
testOutputParameters("--output_mode EMIT_ALL_SITES", "23ab7f15a01dd6dbf9f09a7560a2055b");
}
private void testOutputParameters(final String args, final String md5) {
@ -193,7 +194,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("afdba62ff773ee77dd5ec947f7cf280f"));
Arrays.asList("87c55fece67a562d208c538868307d7b"));
executeTest("test confidence 1", spec1);
}
@ -201,7 +202,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("d81007a1718d2e16c2d8cd5bbc0d7bf3"));
Arrays.asList("87c55fece67a562d208c538868307d7b"));
executeTest("test confidence 2", spec2);
}
@ -212,12 +213,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// --------------------------------------------------------------------------------------------------------------
@Test
public void testHeterozyosity1() {
testHeterozosity( 0.01, "7f3fcbe491284b321d6b92ef197644c3" );
testHeterozosity( 0.01, "481b17c5541f758a49f84263e5b0f795" );
}
@Test
public void testHeterozyosity2() {
testHeterozosity( 1.0 / 1850, "04d970a174dcfaccab58f2943326251d" );
testHeterozosity( 1.0 / 1850, "70ad4b50a22de917eb91a95ca191eb17" );
}
private void testHeterozosity(final double arg, final String md5) {
@ -241,7 +242,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("ba5b511efd3d99575620f14ba2ba259e"));
Arrays.asList("3d20dbf7912e49cdfa929eb04840d351"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -260,7 +261,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("4c71fbe45faf6e2b7da0eb8ae9dd0c0f"));
Arrays.asList("0b141419428831b598813272cb7af055"));
executeTest(String.format("test calling with BAQ"), spec);
}
@ -279,7 +280,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("e6c116225319f505d680beeeb2063bf1"));
Arrays.asList("9a54f9f820efa74e5a719e5ca44bc04d"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -294,7 +295,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("d46e1f465c649927fb3c4ec85df35d09"));
Arrays.asList("7f6c4e55b8e77c19199e8ad8b3594280"));
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
}
@ -307,7 +308,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("0b9d7998f222e55e82c1a7022d62a508"));
Arrays.asList("7ef98a593945f0269ac2d29982a2a72b"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -317,7 +318,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("9bd02b2c648695138f2645e955bf4d8d"));
Arrays.asList("656185ebade2db034441c787d6a363c1"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
}
@ -327,7 +328,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ testDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("59e874d76e42eafd98ad961eb70706bc"));
Arrays.asList("eb9624642e814a0b8962acc89422be23"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
}
@ -335,13 +336,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSampleIndels1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("e84f82e12deb9773dae21595b3531a07"));
Arrays.asList("e7b471d2a0eada2c7f37f120f2f1fa88"));
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("b4df2bf0d820c6fc11fabcafe18bb769"));
Arrays.asList("5c7db047ae9417d37c6bbda1d8ea6019"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
}
@ -351,7 +352,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + testDir + vcf + " -I " + validationDataLocation +
"NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
Arrays.asList("95226301a014347efc90e5f750a0db60"));
Arrays.asList("e362dc0488c8ee3013fa636d929db688"));
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
}
@ -384,7 +385,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction0() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
Arrays.asList("a3ea0eea74f2031ebb2ea0edfa14c945"));
Arrays.asList("3c9786453eb59013c70d99ee74f957a9"));
executeTest("test minIndelFraction 0.0", spec);
}
@ -392,7 +393,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction25() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
Arrays.asList("59c8f66eadd45c56f09291bf64f611e1"));
Arrays.asList("72b82f04dd7f9b9318ef7f8604f8085a"));
executeTest("test minIndelFraction 0.25", spec);
}
@ -400,7 +401,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction100() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 1", 1,
Arrays.asList("c1911f6ede7b4e8e83209ead66329596"));
Arrays.asList("0e9f485edabbed613e50c699cfa8822f"));
executeTest("test minIndelFraction 1.0", spec);
}
}

View File

@ -11,7 +11,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
return "-T ReadBackedPhasing" +
" -R " + reference +
" -I " + validationDataLocation + reads +
" --variant " + validationDataLocation + VCF +
" --variant " + ( VCF.contains("phasing_test") ? testDir : validationDataLocation) + VCF +
" --cacheWindowSize " + cacheWindowSize +
" --maxPhaseSites " + maxPhaseSites +
" --phaseQualityThresh " + phaseQualityThresh +
@ -26,7 +26,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
+ " -L chr20:332341-382503",
1,
Arrays.asList("0a41b96b04a87fdb99bc3342d48d2eba"));
Arrays.asList("442c819569417c1b7d6be9f41ce05394"));
executeTest("MAX 10 het sites [TEST ONE]; require PQ >= 10", spec);
}
@ -36,7 +36,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
+ " -L chr20:1232503-1332503",
1,
Arrays.asList("f7517896c899a872c24d8e823ac9deae"));
Arrays.asList("2a51ee7d3c024f2410dcee40c5412993"));
executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec);
}
@ -46,7 +46,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30)
+ " -L chr20:332341-382503",
1,
Arrays.asList("cdbdd2f68c232012b6fe9a322b0ea24c"));
Arrays.asList("85bc9b03e24159f746dbd0cb988f9ec8"));
executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec);
}
@ -56,7 +56,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100)
+ " -L chr20:332341-382503",
1,
Arrays.asList("6b70e3e4e28f9583d35d98bf8a7d0d59"));
Arrays.asList("96bb413a83c777ebbe622438e4565e8f"));
executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec);
}
@ -66,7 +66,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10)
+ " -L chr20:332341-482503",
1,
Arrays.asList("6163a1fba27532da77765a7a11c55332"));
Arrays.asList("7d2402f055d243e2208db9ea47973e13"));
executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec);
}
@ -76,7 +76,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
+ " -L chr20:652810-681757",
1,
Arrays.asList("94f2fc24c3ac1ddbecb2e0bf7ed1597c"));
Arrays.asList("72682b3f27c33580d2d4515653ba6de7"));
executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec);
}

View File

@ -271,7 +271,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
" -knownSites:anyNameABCD,VCF3 " + testDir + "vcfexample3.vcf" +
" -knownSites:anyNameABCD,VCF " + testDir + "vcfexample3.vcf" +
" -T CountCovariates" +
" -I " + bam +
" -knownSites " + b36dbSNP129 +

View File

@ -133,7 +133,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
" -o %s" +
" -tranchesFile " + testDir + "VQSR.mixedTest.tranches" +
" -recalFile " + testDir + "VQSR.mixedTest.recal",
Arrays.asList("1370d7701a6231633d43a8062b7aff7f"));
Arrays.asList("beadf841bbf39c3f0d0bc7fb55462b37"));
executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
}
}

View File

@ -72,7 +72,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
public void combinePLs(String file1, String file2, String md5) {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CombineVariants --no_cmdline_in_header -o %s -R " + b36KGReference + " -priority v1,v2 -V:v1 " + validationDataLocation + file1 + " -V:v2 " + validationDataLocation + file2,
"-T CombineVariants --no_cmdline_in_header -o %s -R " + b36KGReference + " -priority v1,v2 -V:v1 " + testDir + file1 + " -V:v2 " + testDir + file2,
1,
Arrays.asList(md5));
executeTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
@ -120,8 +120,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
String file2 = "combine.2.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T CombineVariants --no_cmdline_in_header -o %s -R " + b37KGReference
+ " -V:one " + validationDataLocation + file1
+ " -V:two " + validationDataLocation + file2 + args,
+ " -V:one " + testDir + file1
+ " -V:two " + testDir + file2 + args,
1,
Arrays.asList(md5));
executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);

View File

@ -40,14 +40,14 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + testDir + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
1,
Arrays.asList("70aeaca5b74cc7ba8e2da7b71ff0fbfd"));
Arrays.asList("b8f4171b0c39954b283dfed4afed87d7"));
executeTest("test b36 to hg19", spec);
}
@Test
public void testb36Tohg19UnsortedSamples() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + testDir + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
1,
Arrays.asList("07d1bf52125d1f9a25e260e13ec7b010"));
executeTest("test b36 to hg19, unsorted samples", spec);
@ -58,7 +58,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T LiftoverVariants -o %s -R " + hg18Reference + " --variant:vcf " + testDir + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
1,
Arrays.asList("ab2c6254225d7e2ecf52eee604d5673b"));
Arrays.asList("e0b813ff873185ab51995a151f80ec98"));
executeTest("test hg18 to hg19, unsorted", spec);
}
}

View File

@ -122,7 +122,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp " + testFile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("9162a67ccb4201c0542f30d14967f2d5")
Arrays.asList("2a0436eecc2bc29fe559e4d1b9e13580")
);
executeTest("testUsingDbsnpName--" + testFile, spec);

View File

@ -35,7 +35,7 @@ public class VCFIntegrationTest extends WalkerTest {
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("d2604faad0613932453395c54cc68369"));
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("355b029487c3b4c499140d71310ca37e"));
executeTest("Test reading and writing breakpoint VCF", spec1);
}

View File

@ -370,6 +370,35 @@ public class VariantContextTestProvider {
GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1)));
}
//
//
// TESTING PHASE
//
//
final Genotype gUnphased = new GenotypeBuilder("gUnphased", Arrays.asList(ref, alt1)).make();
final Genotype gPhased = new GenotypeBuilder("gPhased", Arrays.asList(ref, alt1)).phased(true).make();
final Genotype gPhased2 = new GenotypeBuilder("gPhased2", Arrays.asList(alt1, alt1)).phased(true).make();
final Genotype gPhased3 = new GenotypeBuilder("gPhased3", Arrays.asList(ref, ref)).phased(true).make();
final Genotype haploidNoPhase = new GenotypeBuilder("haploidNoPhase", Arrays.asList(ref)).make();
addGenotypeTests(site, gUnphased, gPhased);
addGenotypeTests(site, gUnphased, gPhased2);
addGenotypeTests(site, gUnphased, gPhased3);
addGenotypeTests(site, gPhased, gPhased2);
addGenotypeTests(site, gPhased, gPhased3);
addGenotypeTests(site, gPhased2, gPhased3);
addGenotypeTests(site, haploidNoPhase, gPhased);
addGenotypeTests(site, haploidNoPhase, gPhased2);
addGenotypeTests(site, haploidNoPhase, gPhased3);
addGenotypeTests(site, haploidNoPhase, gPhased, gPhased2);
addGenotypeTests(site, haploidNoPhase, gPhased, gPhased3);
addGenotypeTests(site, haploidNoPhase, gPhased2, gPhased3);
addGenotypeTests(site, haploidNoPhase, gPhased, gPhased2, gPhased3);
final Genotype gUnphasedTet = new GenotypeBuilder("gUnphasedTet", Arrays.asList(ref, alt1, ref, alt1)).make();
final Genotype gPhasedTet = new GenotypeBuilder("gPhasedTet", Arrays.asList(ref, alt1, alt1, alt1)).phased(true).make();
addGenotypeTests(site, gUnphasedTet, gPhasedTet);
}
if ( ENABLE_PL_TESTS ) {
@ -484,8 +513,6 @@ public class VariantContextTestProvider {
new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make(),
new GenotypeBuilder("g3-xy", Arrays.asList(ref, ref)).filters("X", "Y").make());
}
// TODO -- test test Integer, Float, Flag, String atomic, vector, and missing types of different lengths per sample
}
private static Genotype attr(final String name, final Allele ref, final String key, final Object ... value) {
@ -649,7 +676,6 @@ public class VariantContextTestProvider {
// filters are the same
Assert.assertEquals(actual.getFilters(), expected.getFilters());
Assert.assertEquals(actual.isFiltered(), expected.isFiltered());
Assert.assertEquals(actual.filtersWereApplied(), expected.filtersWereApplied());
// inline attributes
Assert.assertEquals(actual.getDP(), expected.getDP());