Finalizing BCF2 mark III commit
-- Moved GENOTYPE_KEY vcf header line to VCFConstants. This general migration and cleanup is on Eric's plate now -- Updated HC to initialize the annotation engine in an order that allows it to write a proper VCF header. Still doesn't work... -- Updating integration test files. Moved many more files into public/testdata. Updated their headers to all work correctly with new strict VCF header checking. -- Bugfix for TandemRepeatAnnotation that must be unbounded not A count type as it provides info for the REF as well as each alt -- No longer add FALSE values to flag values in VCs in VariantAnnotatorEngine. DB = 0 is never seen in the output VCFs now -- Fixed bug in VCFDiffableReader that didn't differeniate between "." and "PASS" VC filter status -- Unconditionally add lowQual Filter to UG output VCF files as this is in some cases (EMIT_ALL_SITES) used when the previous check said it wouldn't be -- VariantsToVCF now properly writes out the GT FORMAT field -- BCF2 codec explodes when reading symbolic alleles as I literally cannot figure out how to use the allele clipping code. Eric said he and Ami will clean up this whole piece of instructure -- Fixed bug in BCF2Codec that wasn't setting the phase field correctly. UnitTested now -- PASS string now added at the end of the BCF2 dictionary after discussion with Heng -- Fixed bug where I was writing out all field values as BigEndian. Now everything is LittleEndian. -- VCFHeader detects the case where a count field has size < 0 (some of our files have count = -1) and throws a UserException -- Cleaned up unused code -- Fixed bug in BCF2 string encoder that wasn't handling the case of an empty list of strings for encoding -- Fixed bug where all samples are no called in a VC, in which case we (like the VCFwriter) write out no called diploid genotypes for all samples -- We always write the number of genotype samples into the BCF2 nSamples header. How we can have a variable number of samples per record isn't clear to me, as we don't have a map from missing samples to header names... -- Removed old filtersWereAppliedToContext code in VCF as properly handle unfiltered, filtered, and PASS records internally -- Fastpath function getDisplayBases() in allele that just gives you the raw bytes[] you'd see for an Allele -- Genotype fields no longer differentiate between unfiltered, filtered, and PASS values. Genotype objects are all PASS implicitly, or explicitly filtered. We only write out the FT values if at least one sample is filtered. Removed interface functions and cleaned up code -- Refactored padAllele code from createVariantContextWithPaddedAlleles into the function padAllele so that it actually works. In general, **** NEVER COPY CODE **** if you need to share funcitonality make a function, that's why there were invented! -- Increased the default number of records to read for DiffObjects to 1M
This commit is contained in:
parent
0c8b830db7
commit
fba7dafa0e
|
|
@ -71,7 +71,7 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
|
|||
public static final VCFInfoHeaderLine[] descriptions = {
|
||||
new VCFInfoHeaderLine(STR_PRESENT, 0, VCFHeaderLineType.Flag, "Variant is a short tandem repeat"),
|
||||
new VCFInfoHeaderLine(REPEAT_UNIT_KEY, 1, VCFHeaderLineType.String, "Tandem repeat unit (bases)"),
|
||||
new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };
|
||||
new VCFInfoHeaderLine(REPEATS_PER_ALLELE_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Number of times tandem repeat unit is repeated, for each allele (including reference)") };
|
||||
|
||||
public List<String> getKeyNames() {
|
||||
return Arrays.asList(keyNames);
|
||||
|
|
|
|||
|
|
@ -217,11 +217,11 @@ public class VariantAnnotatorEngine {
|
|||
if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) {
|
||||
final String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), ref.getLocus()), vc.getType());
|
||||
|
||||
// put the DB key into the INFO field
|
||||
infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null);
|
||||
|
||||
// add the ID if appropriate
|
||||
if ( rsID != null ) {
|
||||
// put the DB key into the INFO field
|
||||
infoAnnotations.put(VCFConstants.DBSNP_KEY, true);
|
||||
|
||||
if ( vc.emptyID() ) {
|
||||
vc = new VariantContextBuilder(vc).id(rsID).make();
|
||||
} else if ( walker.alwaysAppendDbsnpId() && vc.getID().indexOf(rsID) == -1 ) {
|
||||
|
|
@ -237,7 +237,8 @@ public class VariantAnnotatorEngine {
|
|||
break;
|
||||
}
|
||||
}
|
||||
infoAnnotations.put(dbSet.getValue(), overlapsComp);
|
||||
if ( overlapsComp )
|
||||
infoAnnotations.put(dbSet.getValue(), overlapsComp);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -97,7 +97,9 @@ public class VCFDiffableReader implements DiffableReader {
|
|||
vcRoot.add("REF", vc.getReference());
|
||||
vcRoot.add("ALT", vc.getAlternateAlleles());
|
||||
vcRoot.add("QUAL", vc.hasLog10PError() ? vc.getLog10PError() * -10 : VCFConstants.MISSING_VALUE_v4);
|
||||
vcRoot.add("FILTER", vc.getFilters());
|
||||
vcRoot.add("FILTER", ! vc.filtersWereApplied() // needs null to differentiate between PASS and .
|
||||
? VCFConstants.MISSING_VALUE_v4
|
||||
: ( vc.getFilters().isEmpty() ? VCFConstants.PASSES_FILTERS_v4 : vc.getFilters()) );
|
||||
|
||||
// add info fields
|
||||
for (Map.Entry<String, Object> attribute : vc.getAttributes().entrySet()) {
|
||||
|
|
|
|||
|
|
@ -272,9 +272,9 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
// FORMAT fields
|
||||
headerInfo.addAll(getSupportedHeaderStrings());
|
||||
|
||||
// FILTER fields
|
||||
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING )
|
||||
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
|
||||
// FILTER fields are added unconditionally as it's not always 100% certain the circumstances
|
||||
// where the filters are used. For example, in emitting all sites the lowQual field is used
|
||||
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
|
||||
|
||||
return headerInfo;
|
||||
}
|
||||
|
|
@ -285,7 +285,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
*/
|
||||
private static Set<VCFFormatHeaderLine> getSupportedHeaderStrings() {
|
||||
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
|
||||
result.add(VCFConstants.GENOTYPE_KEY_HEADER_LINE);
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "Genotype Quality"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
|
||||
|
|
|
|||
|
|
@ -316,7 +316,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
// first, the basic info
|
||||
headerInfo.add(new VCFHeaderLine("source", "SomaticIndelDetector"));
|
||||
headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
|
||||
headerInfo.add(VCFConstants.GENOTYPE_KEY_HEADER_LINE);
|
||||
|
||||
// FORMAT and INFO fields
|
||||
// headerInfo.addAll(VCFUtils.getSupportedHeaderStrings());
|
||||
|
|
|
|||
|
|
@ -204,16 +204,16 @@ public class VariantEvalUtils {
|
|||
final int originalAlleleCount = vc.getHetCount() + 2 * vc.getHomVarCount();
|
||||
final int newAlleleCount = vcsub.getHetCount() + 2 * vcsub.getHomVarCount();
|
||||
final boolean isSingleton = originalAlleleCount == newAlleleCount && newAlleleCount == 1;
|
||||
final boolean hasChrCountAnnotations = vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) &&
|
||||
vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) &&
|
||||
vc.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY);
|
||||
final boolean hasChrCountAnnotations = vcsub.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) &&
|
||||
vcsub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) &&
|
||||
vcsub.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY);
|
||||
|
||||
if ( ! isSingleton && hasChrCountAnnotations ) {
|
||||
// nothing to update
|
||||
return vc;
|
||||
return vcsub;
|
||||
} else {
|
||||
// have to do the work
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vcsub);
|
||||
|
||||
if ( isSingleton )
|
||||
builder.attribute(VariantEvalWalker.IS_SINGLETON_KEY, true);
|
||||
|
|
|
|||
|
|
@ -129,7 +129,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
|||
.attribute("OriginalStart", fromInterval.getStart()).make();
|
||||
}
|
||||
|
||||
VariantContext newVC = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, false);
|
||||
VariantContext newVC = VariantContextUtils.createVariantContextWithPaddedAlleles(vc);
|
||||
if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
|
||||
logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
|
||||
originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(),
|
||||
|
|
|
|||
|
|
@ -222,6 +222,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
//hInfo.add(new VCFHeaderLine("source", "VariantsToVCF"));
|
||||
//hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getID()));
|
||||
|
||||
hInfo.add(VCFConstants.GENOTYPE_KEY_HEADER_LINE);
|
||||
allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY);
|
||||
for ( VCFHeaderLine field : hInfo ) {
|
||||
if ( field instanceof VCFFormatHeaderLine) {
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ import org.broad.tribble.readers.PositionalBufferedStream;
|
|||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
|
|
@ -334,7 +335,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
|||
*/
|
||||
protected static ArrayList<Allele> clipAllelesIfNecessary(int position, String ref, ArrayList<Allele> unclippedAlleles) {
|
||||
if ( ! AbstractVCFCodec.isSingleNucleotideEvent(unclippedAlleles) ) {
|
||||
ArrayList<Allele> clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
|
||||
final ArrayList<Allele> clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
|
||||
AbstractVCFCodec.clipAlleles(position, ref, unclippedAlleles, clippedAlleles, -1);
|
||||
return clippedAlleles;
|
||||
} else
|
||||
|
|
@ -355,14 +356,16 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
|||
String ref = null;
|
||||
|
||||
for ( int i = 0; i < nAlleles; i++ ) {
|
||||
final String allele = (String)decoder.decodeTypedValue();
|
||||
final String alleleBases = (String)decoder.decodeTypedValue();
|
||||
|
||||
if ( i == 0 ) {
|
||||
ref = allele;
|
||||
alleles.add(Allele.create(allele, true));
|
||||
} else {
|
||||
alleles.add(Allele.create(allele, false));
|
||||
}
|
||||
final boolean isRef = i == 0;
|
||||
final Allele allele = Allele.create(alleleBases, isRef);
|
||||
if ( isRef ) ref = alleleBases;
|
||||
|
||||
alleles.add(allele);
|
||||
|
||||
if ( allele.isSymbolic() )
|
||||
throw new ReviewedStingException("LIMITATION: GATK BCF2 codec does not yet support symbolic alleles");
|
||||
}
|
||||
assert ref != null;
|
||||
|
||||
|
|
|
|||
|
|
@ -169,6 +169,9 @@ public class BCF2GenotypeFieldDecoders {
|
|||
|
||||
gb.alleles(gt);
|
||||
}
|
||||
|
||||
final boolean phased = (a1 & 0x01) == 1;
|
||||
gb.phased(phased);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -199,6 +202,8 @@ public class BCF2GenotypeFieldDecoders {
|
|||
gt.add(getAlleleFromEncoded(siteAlleles, encode));
|
||||
|
||||
gb.alleles(gt);
|
||||
final boolean phased = (encoded[0] & 0x01) == 1;
|
||||
gb.phased(phased);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -82,18 +82,27 @@ public final class BCF2Utils {
|
|||
@Requires("header != null")
|
||||
@Ensures({"result != null", "new HashSet(result).size() == result.size()"})
|
||||
public final static ArrayList<String> makeDictionary(final VCFHeader header) {
|
||||
final Set<String> dict = new TreeSet<String>();
|
||||
final Set<String> seen = new HashSet<String>();
|
||||
final ArrayList<String> dict = new ArrayList<String>();
|
||||
|
||||
boolean sawPASS = false;
|
||||
// set up the strings dictionary
|
||||
dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
|
||||
for ( VCFHeaderLine line : header.getMetaData() ) {
|
||||
if ( line instanceof VCFIDHeaderLine) {
|
||||
VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
|
||||
dict.add(idLine.getID());
|
||||
final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
|
||||
if ( ! seen.contains(idLine.getID())) {
|
||||
sawPASS = sawPASS || idLine.getID().equals(VCFConstants.PASSES_FILTERS_v4);
|
||||
dict.add(idLine.getID());
|
||||
seen.add(idLine.getID());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new ArrayList<String>(dict);
|
||||
|
||||
if ( ! sawPASS )
|
||||
dict.add(VCFConstants.PASSES_FILTERS_v4); // special case the special PASS field
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
@Requires({"nElements >= 0", "type != null"})
|
||||
|
|
@ -142,25 +151,6 @@ public final class BCF2Utils {
|
|||
}
|
||||
}
|
||||
|
||||
@Requires({"stream != null", "bytesForEachInt > 0"})
|
||||
public final static int readInt(int bytesForEachInt, final InputStream stream) {
|
||||
switch ( bytesForEachInt ) {
|
||||
case 1: {
|
||||
return (byte)(readByte(stream));
|
||||
} case 2: {
|
||||
final int b1 = readByte(stream) & 0xFF;
|
||||
final int b2 = readByte(stream) & 0xFF;
|
||||
return (short)((b1 << 8) | b2);
|
||||
} case 4: {
|
||||
final int b1 = readByte(stream) & 0xFF;
|
||||
final int b2 = readByte(stream) & 0xFF;
|
||||
final int b3 = readByte(stream) & 0xFF;
|
||||
final int b4 = readByte(stream) & 0xFF;
|
||||
return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4);
|
||||
} default: throw new ReviewedStingException("Unexpected size during decoding");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Collapse multiple strings into a comma separated list
|
||||
*
|
||||
|
|
@ -299,20 +289,40 @@ public final class BCF2Utils {
|
|||
else return Collections.singletonList(o);
|
||||
}
|
||||
|
||||
|
||||
@Requires({"stream != null", "bytesForEachInt > 0"})
|
||||
public final static int readInt(int bytesForEachInt, final InputStream stream) {
|
||||
switch ( bytesForEachInt ) {
|
||||
case 1: {
|
||||
return (byte)(readByte(stream));
|
||||
} case 2: {
|
||||
final int b2 = readByte(stream) & 0xFF;
|
||||
final int b1 = readByte(stream) & 0xFF;
|
||||
return (short)((b1 << 8) | b2);
|
||||
} case 4: {
|
||||
final int b4 = readByte(stream) & 0xFF;
|
||||
final int b3 = readByte(stream) & 0xFF;
|
||||
final int b2 = readByte(stream) & 0xFF;
|
||||
final int b1 = readByte(stream) & 0xFF;
|
||||
return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4);
|
||||
} default: throw new ReviewedStingException("Unexpected size during decoding");
|
||||
}
|
||||
}
|
||||
|
||||
public final static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
|
||||
switch ( type.getSizeInBytes() ) {
|
||||
case 1:
|
||||
encodeStream.write(0xFF & value);
|
||||
break;
|
||||
case 2:
|
||||
encodeStream.write((0x00FF & value));
|
||||
encodeStream.write((0xFF00 & value) >> 8);
|
||||
encodeStream.write(0xFF & value);
|
||||
break;
|
||||
case 4:
|
||||
encodeStream.write((0xFF000000 & value) >> 24);
|
||||
encodeStream.write((0x00FF0000 & value) >> 16);
|
||||
encodeStream.write((0x0000FF00 & value) >> 8);
|
||||
encodeStream.write((0x000000FF & value));
|
||||
encodeStream.write((0x0000FF00 & value) >> 8);
|
||||
encodeStream.write((0x00FF0000 & value) >> 16);
|
||||
encodeStream.write((0xFF000000 & value) >> 24);
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("BUG: unexpected type size " + type);
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.codecs.vcf;
|
|||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashMap;
|
||||
|
|
@ -154,6 +155,10 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
count = Integer.valueOf(numberStr);
|
||||
|
||||
}
|
||||
|
||||
if ( count < 0 && countType == VCFHeaderLineCount.INTEGER )
|
||||
throw new UserException.MalformedVCFHeader("Count < 0 for fixed size VCF header field " + name);
|
||||
|
||||
try {
|
||||
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
|
||||
} catch (Exception e) {
|
||||
|
|
|
|||
|
|
@ -117,4 +117,9 @@ public final class VCFConstants {
|
|||
public static final int MAX_GENOTYPE_QUAL = 99;
|
||||
|
||||
public static final Double VCF_ENCODING_EPSILON = 0.00005; // when we consider fields equal(), used in the Qual compare
|
||||
|
||||
//
|
||||
// VCF header line constants
|
||||
//
|
||||
public static final VCFFormatHeaderLine GENOTYPE_KEY_HEADER_LINE = new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype");
|
||||
}
|
||||
|
|
@ -347,6 +347,15 @@ public class Allele implements Comparable<Allele> {
|
|||
*/
|
||||
public String getDisplayString() { return new String(bases); }
|
||||
|
||||
/**
|
||||
* Same as #getDisplayString() but returns the result as byte[].
|
||||
*
|
||||
* Slightly faster then getDisplayString()
|
||||
*
|
||||
* @return the allele string representation
|
||||
*/
|
||||
public byte[] getDisplayBases() { return bases; }
|
||||
|
||||
/**
|
||||
* @param other the other allele
|
||||
*
|
||||
|
|
|
|||
|
|
@ -156,11 +156,6 @@ public final class FastGenotype extends Genotype {
|
|||
return (List<String>) getExtendedAttribute(VCFConstants.GENOTYPE_FILTER_KEY, Collections.emptyList());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean filtersWereApplied() {
|
||||
return hasExtendedAttribute(VCFConstants.GENOTYPE_FILTER_KEY);
|
||||
}
|
||||
|
||||
@Override public int[] getPL() {
|
||||
return PL;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -451,7 +451,7 @@ public abstract class Genotype implements Comparable<Genotype> {
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
@Ensures({"result != null", "filtersWereApplied() || result.isEmpty()"})
|
||||
@Ensures({"result != null"})
|
||||
public abstract List<String> getFilters();
|
||||
|
||||
@Ensures({"result != getFilters().isEmpty()"})
|
||||
|
|
@ -459,9 +459,6 @@ public abstract class Genotype implements Comparable<Genotype> {
|
|||
return ! getFilters().isEmpty();
|
||||
}
|
||||
|
||||
@Ensures("result == true || getFilters().isEmpty()")
|
||||
public abstract boolean filtersWereApplied();
|
||||
|
||||
@Deprecated public boolean hasLog10PError() { return hasGQ(); }
|
||||
@Deprecated public double getLog10PError() { return getGQ() / -10.0; }
|
||||
@Deprecated public int getPhredScaledQual() { return getGQ(); }
|
||||
|
|
|
|||
|
|
@ -383,7 +383,8 @@ public final class GenotypeBuilder {
|
|||
*/
|
||||
@Requires("filters != null")
|
||||
public GenotypeBuilder filters(final List<String> filters) {
|
||||
attribute(VCFConstants.GENOTYPE_FILTER_KEY, filters);
|
||||
if ( ! filters.isEmpty() )
|
||||
attribute(VCFConstants.GENOTYPE_FILTER_KEY, filters);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -113,7 +113,6 @@ public class SlowGenotype extends Genotype {
|
|||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
@Override public List<String> getFilters() { return new ArrayList<String>(commonInfo.getFilters()); }
|
||||
@Override public boolean filtersWereApplied() { return commonInfo.filtersWereApplied(); }
|
||||
@Override public boolean hasLog10PError() { return commonInfo.hasLog10PError(); }
|
||||
@Override public double getLog10PError() { return commonInfo.getLog10PError(); }
|
||||
|
||||
|
|
|
|||
|
|
@ -339,7 +339,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
* @return
|
||||
*/
|
||||
public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
|
||||
if ( ! rederiveAllelesFromGenotypes && sampleNames.containsAll(getSampleNames()) ) {
|
||||
if ( sampleNames.containsAll(getSampleNames()) ) {
|
||||
return this; // fast path when you don't have any work to do
|
||||
} else {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(this);
|
||||
|
|
@ -559,7 +559,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
|
||||
public String getAlleleStringWithRefPadding(final Allele allele) {
|
||||
if ( VariantContextUtils.needsPadding(this) )
|
||||
return VariantContextUtils.padAllele(this, allele);
|
||||
return VariantContextUtils.padAllele(this, allele).getDisplayString();
|
||||
else
|
||||
return allele.getDisplayString();
|
||||
}
|
||||
|
|
@ -1177,8 +1177,9 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
// if ( getType() == Type.INDEL ) {
|
||||
// if ( getReference().length() != (getLocation().size()-1) ) {
|
||||
long length = (stop - start) + 1;
|
||||
if ( (getReference().isNull() && length != 1 ) ||
|
||||
(!isSymbolic() && getReference().isNonNull() && (length - getReference().length() > 1))) {
|
||||
if ( ! isSymbolic()
|
||||
&& ((getReference().isNull() && length != 1 )
|
||||
|| (getReference().isNonNull() && (length - getReference().length() > 1)))) {
|
||||
throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
|
||||
}
|
||||
}
|
||||
|
|
@ -1358,19 +1359,38 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
}
|
||||
|
||||
private final void fullyDecodeInfo(final VariantContextBuilder builder, final VCFHeader header) {
|
||||
builder.attributes(fullyDecodeAttributes(getAttributes(), header));
|
||||
builder.attributes(fullyDecodeAttributes(getAttributes(), header, false));
|
||||
}
|
||||
|
||||
private final Map<String, Object> fullyDecodeAttributes(final Map<String, Object> attributes, final VCFHeader header) {
|
||||
private final Map<String, Object> fullyDecodeAttributes(final Map<String, Object> attributes,
|
||||
final VCFHeader header,
|
||||
final boolean allowMissingValuesComparedToHeader) {
|
||||
final Map<String, Object> newAttributes = new HashMap<String, Object>(attributes.size());
|
||||
|
||||
for ( final Map.Entry<String, Object> attr : attributes.entrySet() ) {
|
||||
final String field = attr.getKey();
|
||||
|
||||
if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) )
|
||||
continue; // gross, FT is part of the extended attributes
|
||||
|
||||
final VCFCompoundHeaderLine format = VariantContextUtils.getMetaDataForField(header, field);
|
||||
final Object decoded = decodeValue(field, attr.getValue(), format);
|
||||
|
||||
if ( decoded != null )
|
||||
if ( decoded != null ) {
|
||||
if ( ! allowMissingValuesComparedToHeader
|
||||
&& format.getCountType() != VCFHeaderLineCount.UNBOUNDED
|
||||
&& format.getType() != VCFHeaderLineType.Flag ) { // we expect exactly the right number of elements
|
||||
final int obsSize = decoded instanceof List ? ((List) decoded).size() : 1;
|
||||
final int expSize = format.getCount(this.getNAlleles() - 1);
|
||||
if ( obsSize != expSize ) {
|
||||
throw new UserException.MalformedVCFHeader("Discordant field size detected for field " +
|
||||
field + " at " + getChr() + ":" + getStart() + ". Field had " + obsSize + " values " +
|
||||
"but the header says this should have " + expSize + " values based on header record " +
|
||||
format);
|
||||
}
|
||||
}
|
||||
newAttributes.put(field, decoded);
|
||||
}
|
||||
}
|
||||
|
||||
return newAttributes;
|
||||
|
|
@ -1400,6 +1420,8 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
} else {
|
||||
return value;
|
||||
}
|
||||
|
||||
// allowMissingValuesComparedToHeader
|
||||
}
|
||||
|
||||
private final Object decodeOne(final String field, final String string, final VCFCompoundHeaderLine format) {
|
||||
|
|
@ -1409,7 +1431,12 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
else {
|
||||
switch ( format.getType() ) {
|
||||
case Character: return string;
|
||||
case Flag: return Boolean.valueOf(string);
|
||||
case Flag:
|
||||
final boolean b = Boolean.valueOf(string);
|
||||
if ( b == false )
|
||||
throw new UserException.MalformedVCF("VariantContext FLAG fields " + field + " cannot contain false values"
|
||||
+ " as seen at " + getChr() + ":" + getStart());
|
||||
return b;
|
||||
case String: return string;
|
||||
case Integer: return Integer.valueOf(string);
|
||||
case Float: return Double.valueOf(string);
|
||||
|
|
@ -1430,7 +1457,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
}
|
||||
|
||||
private final Genotype fullyDecodeGenotypes(final Genotype g, final VCFHeader header) {
|
||||
final Map<String, Object> map = fullyDecodeAttributes(g.getExtendedAttributes(), header);
|
||||
final Map<String, Object> map = fullyDecodeAttributes(g.getExtendedAttributes(), header, true);
|
||||
return new GenotypeBuilder(g).attributes(map).make();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -182,17 +182,23 @@ public class VariantContextUtils {
|
|||
return false;
|
||||
}
|
||||
|
||||
public static String padAllele(final VariantContext vc, final Allele allele) {
|
||||
public static Allele padAllele(final VariantContext vc, final Allele allele) {
|
||||
assert needsPadding(vc);
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append((char)vc.getReferenceBaseForIndel().byteValue());
|
||||
sb.append(allele.getDisplayString());
|
||||
return sb.toString();
|
||||
if ( allele.isSymbolic() )
|
||||
return allele;
|
||||
else {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
sb.append((char)vc.getReferenceBaseForIndel().byteValue());
|
||||
sb.append(allele.getDisplayString());
|
||||
final String newBases = sb.toString();
|
||||
return Allele.create(newBases, allele.isReference());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
|
||||
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC) {
|
||||
final boolean padVC = needsPadding(inputVC);
|
||||
|
||||
// nothing to do if we don't need to pad bases
|
||||
|
|
@ -200,46 +206,21 @@ public class VariantContextUtils {
|
|||
if ( !inputVC.hasReferenceBaseForIndel() )
|
||||
throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available.");
|
||||
|
||||
Byte refByte = inputVC.getReferenceBaseForIndel();
|
||||
final ArrayList<Allele> alleles = new ArrayList<Allele>(inputVC.getNAlleles());
|
||||
final Map<Allele, Allele> unpaddedToPadded = new HashMap<Allele, Allele>(inputVC.getNAlleles());
|
||||
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
|
||||
for (Allele a : inputVC.getAlleles()) {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
if (a.isSymbolic()) {
|
||||
alleles.add(a);
|
||||
} else {
|
||||
String newBases;
|
||||
if ( refBaseShouldBeAppliedToEndOfAlleles )
|
||||
newBases = a.getBaseString() + new String(new byte[]{refByte});
|
||||
else
|
||||
newBases = new String(new byte[]{refByte}) + a.getBaseString();
|
||||
alleles.add(Allele.create(newBases,a.isReference()));
|
||||
}
|
||||
for (final Allele a : inputVC.getAlleles()) {
|
||||
final Allele padded = padAllele(inputVC, a);
|
||||
alleles.add(padded);
|
||||
unpaddedToPadded.put(a, padded);
|
||||
}
|
||||
|
||||
// now we can recreate new genotypes with trimmed alleles
|
||||
GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples());
|
||||
for (final Genotype g : inputVC.getGenotypes() ) {
|
||||
List<Allele> inAlleles = g.getAlleles();
|
||||
List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
|
||||
for (Allele a : inAlleles) {
|
||||
if (a.isCalled()) {
|
||||
if (a.isSymbolic()) {
|
||||
newGenotypeAlleles.add(a);
|
||||
} else {
|
||||
String newBases;
|
||||
if ( refBaseShouldBeAppliedToEndOfAlleles )
|
||||
newBases = a.getBaseString() + new String(new byte[]{refByte});
|
||||
else
|
||||
newBases = new String(new byte[]{refByte}) + a.getBaseString();
|
||||
newGenotypeAlleles.add(Allele.create(newBases,a.isReference()));
|
||||
}
|
||||
}
|
||||
else {
|
||||
// add no-call allele
|
||||
newGenotypeAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
final List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
|
||||
for (final Allele a : g.getAlleles()) {
|
||||
newGenotypeAlleles.add( a.isCalled() ? unpaddedToPadded.get(a) : Allele.NO_CALL);
|
||||
}
|
||||
genotypes.add(new GenotypeBuilder(g).alleles(newGenotypeAlleles).make());
|
||||
|
||||
|
|
@ -556,7 +537,7 @@ public class VariantContextUtils {
|
|||
for (final VariantContext vc : prepaddedVCs) {
|
||||
// also a reasonable place to remove filtered calls, if needed
|
||||
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
||||
VCs.add(createVariantContextWithPaddedAlleles(vc, false));
|
||||
VCs.add(createVariantContextWithPaddedAlleles(vc));
|
||||
}
|
||||
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -269,21 +269,6 @@ public abstract class BCF2FieldEncoder {
|
|||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Convenience method that just called encodeValue with a no minimum for the number of values.
|
||||
*
|
||||
* Primarily useful for encoding site values
|
||||
*
|
||||
* @param encoder
|
||||
* @param value
|
||||
* @param type
|
||||
* @throws IOException
|
||||
*/
|
||||
@Requires({"encoder != null", "isDynamicallyTyped() || type == getStaticType()"})
|
||||
public void encodeOneValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
encodeValue(encoder, value, type, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Key abstract method that should encode a value of the given type into the encoder.
|
||||
*
|
||||
|
|
@ -348,10 +333,10 @@ public abstract class BCF2FieldEncoder {
|
|||
if ( value == null )
|
||||
return "";
|
||||
else if (value instanceof List) {
|
||||
if ( ((List) value).size() == 1 )
|
||||
return (String)((List) value).get(0);
|
||||
else
|
||||
return BCF2Utils.collapseStringList((List<String>)value);
|
||||
final List<String> l = (List<String>)value;
|
||||
if ( l.isEmpty() ) return "";
|
||||
else if ( l.size() == 1 ) return (String)l.get(0);
|
||||
else return BCF2Utils.collapseStringList(l);
|
||||
} else
|
||||
return (String)value;
|
||||
}
|
||||
|
|
@ -376,7 +361,7 @@ public abstract class BCF2FieldEncoder {
|
|||
}
|
||||
|
||||
@Override
|
||||
@Requires("minValues <= 1")
|
||||
@Requires({"minValues <= 1", "value != null", "value instanceof Boolean", "((Boolean)value) == true"})
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
encoder.encodeRawBytes(1, getStaticType());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ public abstract class BCF2FieldWriter {
|
|||
} else {
|
||||
final int valueCount = getFieldEncoder().numElements(vc, rawValue);
|
||||
encoder.encodeType(valueCount, type);
|
||||
getFieldEncoder().encodeOneValue(encoder, rawValue, type);
|
||||
getFieldEncoder().encodeValue(encoder, rawValue, type, valueCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -246,6 +246,10 @@ public abstract class BCF2FieldWriter {
|
|||
buildAlleleMap(vc);
|
||||
nValuesPerGenotype = vc.getMaxPloidy();
|
||||
|
||||
// deal with the case where we have no call everywhere, in which case we write out diploid
|
||||
if ( nValuesPerGenotype == -1 )
|
||||
nValuesPerGenotype = 2;
|
||||
|
||||
super.start(encoder, vc);
|
||||
}
|
||||
|
||||
|
|
@ -298,7 +302,6 @@ public abstract class BCF2FieldWriter {
|
|||
if ( nAlleles > 2 ) {
|
||||
// for multi-allelics we need to clear the map, and add additional looks
|
||||
alleleMapForTriPlus.clear();
|
||||
alleleMapForTriPlus.put(Allele.NO_CALL, -1); // convenience for lookup
|
||||
final List<Allele> alleles = vc.getAlleles();
|
||||
for ( int i = 2; i < alleles.size(); i++ ) {
|
||||
alleleMapForTriPlus.put(alleles.get(i), i);
|
||||
|
|
|
|||
|
|
@ -84,6 +84,7 @@ import java.util.*;
|
|||
*/
|
||||
class BCF2Writer extends IndexingVariantContextWriter {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
|
||||
final private static List<Allele> MISSING_GENOTYPE = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
|
||||
private final OutputStream outputStream; // Note: do not flush until completely done writing, to avoid issues with eventual BGZF support
|
||||
private VCFHeader header;
|
||||
|
|
@ -213,7 +214,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
final int nAlleles = vc.getNAlleles();
|
||||
final int nInfo = vc.getAttributes().size();
|
||||
final int nGenotypeFormatFields = getNGenotypeFormatFields(vc);
|
||||
final int nSamples = vc.getNSamples();
|
||||
final int nSamples = header.getNGenotypeSamples();
|
||||
|
||||
encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x0000FFFF), BCF2Type.INT32);
|
||||
encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x00FFFFF), BCF2Type.INT32);
|
||||
|
|
@ -256,10 +257,10 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
|
||||
private void buildAlleles( VariantContext vc ) throws IOException {
|
||||
final boolean needsPadding = VariantContextUtils.needsPadding(vc);
|
||||
for ( final Allele allele : vc.getAlleles() ) {
|
||||
byte[] s = allele.getBases();
|
||||
for ( Allele allele : vc.getAlleles() ) {
|
||||
if ( needsPadding )
|
||||
s = VariantContextUtils.padAllele(vc,allele).getBytes();
|
||||
allele = VariantContextUtils.padAllele(vc,allele);
|
||||
final byte[] s = allele.getDisplayBases();
|
||||
encoder.encodeTypedString(s);
|
||||
}
|
||||
}
|
||||
|
|
@ -298,7 +299,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
Genotype g = vc.getGenotype(name);
|
||||
if ( g == null )
|
||||
// we don't have any data about g at all
|
||||
g = new GenotypeBuilder(name).make();
|
||||
g = new GenotypeBuilder(name).alleles(MISSING_GENOTYPE).make();
|
||||
writer.addGenotype(encoder, vc, g);
|
||||
}
|
||||
writer.done(encoder, vc);
|
||||
|
|
|
|||
|
|
@ -51,9 +51,6 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
// the VCF header we're storing
|
||||
protected VCFHeader mHeader = null;
|
||||
|
||||
// were filters applied?
|
||||
protected boolean filtersWereAppliedToContext = false;
|
||||
|
||||
final private boolean allowMissingFieldsInHeader;
|
||||
|
||||
private IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors();
|
||||
|
|
@ -78,13 +75,6 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
// note we need to update the mHeader object after this call because they header
|
||||
// may have genotypes trimmed out of it, if doNotWriteGenotypes is true
|
||||
mHeader = writeHeader(header, mWriter, doNotWriteGenotypes, getVersionLine(), getStreamName());
|
||||
|
||||
// determine if we use filters, so we should FORCE pass the records
|
||||
// TODO -- this might not be necessary any longer as we have unfiltered, filtered, and PASS VCs
|
||||
for ( final VCFHeaderLine line : header.getMetaData() ) {
|
||||
if ( line instanceof VCFFilterHeaderLine)
|
||||
filtersWereAppliedToContext = true;
|
||||
}
|
||||
}
|
||||
|
||||
public static final String getVersionLine() {
|
||||
|
|
@ -171,7 +161,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
vc = new VariantContextBuilder(vc).noGenotypes().make();
|
||||
|
||||
try {
|
||||
vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, false);
|
||||
vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc);
|
||||
super.add(vc);
|
||||
|
||||
Map<Allele, String> alleleMap = buildAlleleMap(vc);
|
||||
|
|
@ -219,7 +209,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// FILTER
|
||||
String filters = getFilterString(vc, filtersWereAppliedToContext);
|
||||
String filters = getFilterString(vc);
|
||||
mWriter.write(filters);
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
|
|
@ -283,7 +273,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private final String getFilterString(final VariantContext vc, boolean forcePASS) {
|
||||
private final String getFilterString(final VariantContext vc) {
|
||||
if ( vc.isFiltered() ) {
|
||||
for ( final String filter : vc.getFilters() )
|
||||
if ( ! mHeader.hasFilterLine(filter) )
|
||||
|
|
@ -291,7 +281,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
|
||||
return ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters()));
|
||||
}
|
||||
else if ( forcePASS || vc.filtersWereApplied() )
|
||||
else if ( vc.filtersWereApplied() )
|
||||
return VCFConstants.PASSES_FILTERS_v4;
|
||||
else
|
||||
return VCFConstants.UNFILTERED;
|
||||
|
|
@ -407,7 +397,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
|
||||
// some exceptions
|
||||
if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY ) ) {
|
||||
val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
|
||||
val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : VCFConstants.PASSES_FILTERS_v4;
|
||||
}
|
||||
|
||||
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(field);
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ public class MD5DB {
|
|||
/**
|
||||
* Subdirectory under the ant build directory where we store integration test md5 results
|
||||
*/
|
||||
private static final int MAX_RECORDS_TO_READ = 100000;
|
||||
private static final int MAX_RECORDS_TO_READ = 1000000;
|
||||
private static final int MAX_RAW_DIFFS_TO_SUMMARIZE = -1;
|
||||
public static final String LOCAL_MD5_DB_DIR = "integrationtests";
|
||||
public static final String GLOBAL_MD5_DB_DIR = "/humgen/gsa-hpprojects/GATK/data/integrationtests";
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ public class ReferenceOrderedQueryDataPoolUnitTest extends BaseTest{
|
|||
@Test
|
||||
public void testCloseFilePointers() throws IOException {
|
||||
// Build up query parameters
|
||||
File file = new File(BaseTest.validationDataLocation + "NA12878.hg19.example1.vcf");
|
||||
File file = new File(BaseTest.testDir + "NA12878.hg19.example1.vcf");
|
||||
RMDTriplet triplet = new RMDTriplet("test", "VCF", file.getAbsolutePath(), RMDTriplet.RMDStorageType.FILE, new Tags());
|
||||
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
|
||||
GenomeLocParser parser = new GenomeLocParser(seq);
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ import java.util.*;
|
|||
*/
|
||||
public class FeatureManagerUnitTest extends BaseTest {
|
||||
private static final File RANDOM_FILE = new File(testDir + "exampleGATKReport.eval");
|
||||
private static final File VCF3_FILE = new File(testDir + "vcfexample3.vcf");
|
||||
private static final File VCF3_FILE = new File(testDir + "vcf3.vcf");
|
||||
private static final File VCF4_FILE = new File(testDir + "HiSeq.10000.vcf");
|
||||
private static final File VCF4_FILE_GZ = new File(testDir + "HiSeq.10000.vcf.gz");
|
||||
private static final File VCF4_FILE_BGZIP = new File(testDir + "HiSeq.10000.bgzip.vcf.gz");
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ public class FeatureToGATKFeatureIteratorUnitTest extends BaseTest {
|
|||
final String chr = "20";
|
||||
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
|
||||
GenomeLocParser parser = new GenomeLocParser(seq);
|
||||
File file = new File(validationDataLocation + "NA12878.hg19.example1.vcf");
|
||||
File file = new File(testDir + "NA12878.hg19.example1.vcf");
|
||||
VCFCodec codec = new VCFCodec();
|
||||
TestFeatureReader reader = new TestFeatureReader(file.getAbsolutePath(), codec);
|
||||
CheckableCloseableTribbleIterator<Feature> tribbleIterator = reader.query(chr, 1, 100000);
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
|
||||
@Test
|
||||
@Test(enabled = false)
|
||||
public void test1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(b36KGReference, "symbolic_alleles_1.vcf"),
|
||||
|
|
@ -28,7 +28,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
|
|||
executeTest("Test symbolic alleles", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(enabled = false)
|
||||
public void test2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(b36KGReference, "symbolic_alleles_2.vcf"),
|
||||
|
|
|
|||
|
|
@ -15,15 +15,15 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHasAnnotsNotAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("dfa5dff09fa964b06da19c0f4aff6928"));
|
||||
baseTestString() + " --variant " + testDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("bd6848e7dbf2f809ee2f690ee2cf8ef4"));
|
||||
executeTest("test file has annotations, not asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHasAnnotsNotAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
baseTestString() + " --variant " + testDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("9914bd19f6235c550e5182e0f4591da6"));
|
||||
executeTest("test file has annotations, not asking for annotations, #2", spec);
|
||||
}
|
||||
|
|
@ -31,15 +31,15 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("6a52ef10bb10d72cdd82a8f7afc2dd09"));
|
||||
baseTestString() + " -G Standard --variant " + testDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("9084e6c7b1cec0f3a2c6d96711844d5e"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
baseTestString() + " -G Standard --variant " + testDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("74d894fd31b449deffca88d0e465f01b"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
|
@ -47,8 +47,8 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testNoAnnotsNotAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("dd89dfa22f0e1d6760095e04f528d62a"));
|
||||
baseTestString() + " --variant " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("b85c1ea28194484b327fbe0add1b5685"));
|
||||
executeTest("test file doesn't have annotations, not asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -57,32 +57,32 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
// the genotype annotations in this file are actually out of order. If you don't parse the genotypes
|
||||
// they don't get reordered. It's a good test of the genotype ordering system.
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("542d9ed8290ef7868387af4127e0b5fa"));
|
||||
baseTestString() + " --variant " + testDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("fe4d4e2484c4cf8b1cd50ad42cfe468e"));
|
||||
executeTest("test file doesn't have annotations, not asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("b1b32ed3b831c92c94258c8e4a60e8c9"));
|
||||
baseTestString() + " -G Standard --variant " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("043fc6205b0633edcd3fadc9e044800c"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant:VCF3 " + testDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("a25eacb0ceea2c082af349f8d7776c8a"));
|
||||
baseTestString() + " -G Standard --variant " + testDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("6fafb42d374a67ba4687a23078a126af"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExcludeAnnotations() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant:VCF3 " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("ef046909a6f6c6cb43653a255a99a014"));
|
||||
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + testDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("639462a0e0fa79e33def5f011fe55961"));
|
||||
executeTest("test exclude annotations", spec);
|
||||
}
|
||||
|
||||
|
|
@ -90,7 +90,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testOverwritingHeader() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + testDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
|
||||
Arrays.asList("5c2fded3b6a96b0b0788086bbb2409ed"));
|
||||
Arrays.asList("ebbf32f5b8b8d22f2eb247a0a3db3da0"));
|
||||
executeTest("test overwriting header", spec);
|
||||
}
|
||||
|
||||
|
|
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoReads() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + testDir + "vcfexample3empty.vcf -L " + testDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("c590088d85edce786604fd600f5d5e75"));
|
||||
Arrays.asList("afe6c9d3b4b80635a541cdfcfa48db2f"));
|
||||
executeTest("not passing it any reads", spec);
|
||||
}
|
||||
|
||||
|
|
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + testDir + "vcfexample3empty.vcf -L " + testDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("ade9354a4cdd6cc92c169f252fb36f3f"));
|
||||
Arrays.asList("21d696ea8c55d2fd4cbb4dcd5f7f7db6"));
|
||||
executeTest("getting DB tag with dbSNP", spec);
|
||||
}
|
||||
|
||||
|
|
@ -114,7 +114,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testMultipleIdsWithDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + testDir + "vcfexample3withIDs.vcf -L " + testDir + "vcfexample3withIDs.vcf", 1,
|
||||
Arrays.asList("f496f40e1e9efa743e3b473f6fe6e6d3"));
|
||||
Arrays.asList("ef95394c14d5c16682a322f3dfb9000c"));
|
||||
executeTest("adding multiple IDs with dbSNP", spec);
|
||||
}
|
||||
|
||||
|
|
@ -122,7 +122,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithHapMap() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --comp:H3 " + testDir + "fakeHM3.vcf -G Standard --variant " + testDir + "vcfexample3empty.vcf -L " + testDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("d383fbd741d604625c9507d4da1c5a27"));
|
||||
Arrays.asList("e6e276b7d517d57626c8409589cd286f"));
|
||||
executeTest("getting DB tag with HM3", spec);
|
||||
}
|
||||
|
||||
|
|
@ -130,23 +130,23 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoQuals() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant " + testDir + "noQual.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L " + testDir + "noQual.vcf -A QualByDepth", 1,
|
||||
Arrays.asList("4a247f039dfb16ac05b38a0dd5f98da6"));
|
||||
Arrays.asList("a99e8315571ed1b6bce942451b3d8612"));
|
||||
executeTest("test file doesn't have QUALs", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUsingExpression() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + testDir + "targetAnnotations.vcf -G Standard --variant:VCF3 " + testDir + "vcfexample3empty.vcf -E foo.AF -L " + testDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("067792efcffea93ade632e52a80d0d8f"));
|
||||
baseTestString() + " --resource:foo " + testDir + "targetAnnotations.vcf -G Standard --variant " + testDir + "vcfexample3empty.vcf -E foo.AF -L " + testDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("7d6ea3b54210620cbc7e14dad8836bcb"));
|
||||
executeTest("using expression", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUsingExpressionWithID() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + testDir + "targetAnnotations.vcf -G Standard --variant:VCF3 " + testDir + "vcfexample3empty.vcf -E foo.ID -L " + testDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("66c68deb0508348324eb47d524e756de"));
|
||||
baseTestString() + " --resource:foo " + testDir + "targetAnnotations.vcf -G Standard --variant " + testDir + "vcfexample3empty.vcf -E foo.ID -L " + testDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("35ce4fb0288dfc5c01ec6ce8b14c6157"));
|
||||
executeTest("using expression with ID", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -41,7 +41,8 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
"--beagleR2:BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " +
|
||||
"--beagleProbs:BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " +
|
||||
"--beaglePhased:BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " +
|
||||
"-o %s --no_cmdline_in_header", 1, Arrays.asList("cdbf8cc557f5be9ac778e52338c0d906"));
|
||||
"-o %s --no_cmdline_in_header --allowMissingVCFHeaders", 1, Arrays.asList("c5522304abf0633041c7772dd7dafcea"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("test BeagleOutputToVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -50,7 +51,8 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T ProduceBeagleInput -R " + hg19Reference + " " +
|
||||
"--variant:VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " +
|
||||
"-o %s", 1, Arrays.asList("f301b089d21da259873f04bdc468835d"));
|
||||
"-o %s --allowMissingVCFHeaders", 1, Arrays.asList("f301b089d21da259873f04bdc468835d"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("test BeagleInput", spec);
|
||||
}
|
||||
|
||||
|
|
@ -59,8 +61,9 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T ProduceBeagleInput --variant:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+
|
||||
"--validation:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_OMNI_chr22_14-16m.vcf "+
|
||||
"-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta --no_cmdline_in_header ",2,
|
||||
"-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 --allowMissingVCFHeaders -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta --no_cmdline_in_header ",2,
|
||||
Arrays.asList("660986891b30cdc937e0f2a3a5743faa","4b6417f892ccfe5c63b8a60cb0ef3740"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("test BeagleInputWithBootstrap",spec);
|
||||
}
|
||||
|
||||
|
|
@ -72,8 +75,8 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
"--beagleR2:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+
|
||||
"--beagleProbs:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+
|
||||
"--beaglePhased:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+
|
||||
"-L 20:1-70000 -o %s --no_cmdline_in_header ",1,Arrays.asList("fbbbebfda35bab3f6f62eea2f0be1c01"));
|
||||
|
||||
"-L 20:1-70000 -o %s --no_cmdline_in_header --allowMissingVCFHeaders",1,Arrays.asList("fbbbebfda35bab3f6f62eea2f0be1c01"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testBeagleChangesSitesToRef",spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,80 +15,80 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testNoAction() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("dfa5dff09fa964b06da19c0f4aff6928"));
|
||||
baseTestString() + " --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("49471b44ac165929d3ff81f98ce19063"));
|
||||
executeTest("test no action", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testClusteredSnps() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -window 10 --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("4a4596929f9fe983d8868ca142567781"));
|
||||
baseTestString() + " -window 10 --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("8b45895d7ae1f36b70e7fd26aa9451d3"));
|
||||
executeTest("test clustered SNPs", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMask1() {
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
baseTestString() + " -maskName foo --mask:VCF3 " + testDir + "vcfexample2.vcf --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("1719462cd17986c33e59e45b69df0270"));
|
||||
baseTestString() + " -maskName foo --mask " + testDir + "vcfexample2.vcf --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("06307029f5da87ae4edd9804063a98f9"));
|
||||
executeTest("test mask all", spec1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMask2() {
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
baseTestString() + " -maskName foo --mask:VCF " + testDir + "vcfMask.vcf --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("db19ff7d90c82cda09fb3c3878100eb5"));
|
||||
baseTestString() + " -maskName foo --mask:VCF " + testDir + "vcfMask.vcf --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("1fd06f6b2642685093ed36342f002b58"));
|
||||
executeTest("test mask some", spec2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMask3() {
|
||||
WalkerTestSpec spec3 = new WalkerTestSpec(
|
||||
baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + testDir + "vcfMask.vcf --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("a9e417cba21585c786d4b9930265ea31"));
|
||||
baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + testDir + "vcfMask.vcf --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("d8c5206d5d13477a5929fb1ae5a6bfc4"));
|
||||
executeTest("test mask extend", spec3);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFilter1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("4160904b180d1f62a6bf50de6728ce00"));
|
||||
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("a3be095e8aa75d9ef4235b9487527307"));
|
||||
executeTest("test filter #1", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFilter2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("df80db30c7836731ac7c8c3d4fc005b4"));
|
||||
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("bd1361ddc52d73b8cd7adeb9e5c47200"));
|
||||
executeTest("test filter #2", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFilterWithSeparateNames() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("71ce6c0952831cb68f575aa0173dce2b"));
|
||||
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("4a43ec0285433df426ab482f88cf7ca6"));
|
||||
executeTest("test filter with separate names #2", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGenotypeFilters1() {
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("179f7f2a90c0e6c656109aac9b775476"));
|
||||
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("5ee4485a022e163645c08b9691384f67"));
|
||||
executeTest("test genotype filter #1", spec1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGenotypeFilters2() {
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo --variant:VCF3 " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("22e07c27feb9017a130dfb045c5b29b9"));
|
||||
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo --variant " + testDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("d0a068c8cfb0758d2a8d471383f39b68"));
|
||||
executeTest("test genotype filter #2", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -96,7 +96,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testDeletions() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo --variant:VCF " + testDir + "twoDeletions.vcf", 1,
|
||||
Arrays.asList("637256ee5348c1c57f1dadf581b06ed9"));
|
||||
Arrays.asList("a1c02a5a90f1262e9eb3d2cad1fd08f2"));
|
||||
executeTest("test deletions", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import org.testng.annotations.Test;
|
|||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
// ********************************************************************************** //
|
||||
// Note that this class also serves as an integration test for the VariantAnnotator! //
|
||||
|
|
@ -28,7 +29,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("1c6ea045819b151bcd9d98947c5d4c4d"));
|
||||
Arrays.asList("a4c520b56f85513423c1c0204cabb5e1"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -36,7 +37,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("f9f2912c63e3253495702099bde5de0f"));
|
||||
Arrays.asList("26ec9db9c7ad4b9a2ef25a8b1cb0d45c"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -44,7 +45,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("c51d037e0b1cd0ed3a1cd6c6b29646cf"));
|
||||
Arrays.asList("f2624782525929384d9f2c59f3c65529"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -52,7 +53,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("bd7d25f6c6142837e3bc4c0d5dced2ed"));
|
||||
Arrays.asList("a71d4abbad9c31e66aeb21b1fe2cfe9a"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -60,7 +61,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + testDir + "multiallelic.snps.bam -o %s -L " + testDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("dba580e8b5e96a28d673b437b4da1c70"));
|
||||
Arrays.asList("2429c0f24da57ab1a1313e807e53e48e"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -68,7 +69,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testBadRead() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH -I " + testDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
|
||||
Arrays.asList("174905e2547e94c3eee07ce84497692b"));
|
||||
Arrays.asList("995c8f57d1f211e004ce81d356a80d16"));
|
||||
executeTest("test bad read", spec);
|
||||
}
|
||||
|
||||
|
|
@ -76,7 +77,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testReverseTrim() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("29b15e2017b13e6cb3ad56cc74c719e7"));
|
||||
Arrays.asList("53f60fe15ebffdf85183426b93d48b10"));
|
||||
executeTest("test reverse trim", spec);
|
||||
}
|
||||
|
||||
|
|
@ -86,7 +87,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "6f14394e90fdacd29390a1f3521f5ca8";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "65846f5a8591d591ffbd1f85afadd9d5";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
|
|
@ -107,7 +108,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
||||
|
||||
String md5 = "7824468b8290ffb7795a1ec3e493c1a4";
|
||||
String md5 = "36e6c8b0f30b159915eedaa5926ebbad";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||
|
|
@ -139,7 +140,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinBaseQualityScore() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
|
||||
Arrays.asList("86121f5094f26c8b2e320c1f5dea4ae3"));
|
||||
Arrays.asList("efc4882c1150b246be163e08d81f428f"));
|
||||
executeTest("test min_base_quality_score 26", spec);
|
||||
}
|
||||
|
||||
|
|
@ -147,7 +148,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSLOD() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("3712dd35b0e630977c8c5226ccc532ae"));
|
||||
Arrays.asList("b47b08b514acf5e96fb4994754e0e9ce"));
|
||||
executeTest("test SLOD", spec);
|
||||
}
|
||||
|
||||
|
|
@ -155,7 +156,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testNDA() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("8a5bb0cca3004848dbca9c08fc2afed9"));
|
||||
Arrays.asList("08db1413ed6a04fcb03d58e3ece9f366"));
|
||||
executeTest("test NDA", spec);
|
||||
}
|
||||
|
||||
|
|
@ -163,23 +164,23 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testCompTrack() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("9863ecd2576c7a962f9d05a7dc670169"));
|
||||
Arrays.asList("d78f95c225db2a4b21c99a688330df52"));
|
||||
executeTest("test using comp track", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterSitesOnly() {
|
||||
testOutputParameters("-sites_only", "fe204cef499e5aceb2732ba2e45903ad");
|
||||
testOutputParameters("-sites_only", "1e4a98213ec00479cc090f53620317e4");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllConfident() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "1ab8b68891d1531923a40d594250e8e0");
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "5580e3e7c2b358ed416bc03409c54c1d");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllSites() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "ab179ef6ece3ab9e6b1ff5800cb89ebd");
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "23ab7f15a01dd6dbf9f09a7560a2055b");
|
||||
}
|
||||
|
||||
private void testOutputParameters(final String args, final String md5) {
|
||||
|
|
@ -193,7 +194,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("afdba62ff773ee77dd5ec947f7cf280f"));
|
||||
Arrays.asList("87c55fece67a562d208c538868307d7b"));
|
||||
executeTest("test confidence 1", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -201,7 +202,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
||||
Arrays.asList("d81007a1718d2e16c2d8cd5bbc0d7bf3"));
|
||||
Arrays.asList("87c55fece67a562d208c538868307d7b"));
|
||||
executeTest("test confidence 2", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -212,12 +213,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void testHeterozyosity1() {
|
||||
testHeterozosity( 0.01, "7f3fcbe491284b321d6b92ef197644c3" );
|
||||
testHeterozosity( 0.01, "481b17c5541f758a49f84263e5b0f795" );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHeterozyosity2() {
|
||||
testHeterozosity( 1.0 / 1850, "04d970a174dcfaccab58f2943326251d" );
|
||||
testHeterozosity( 1.0 / 1850, "70ad4b50a22de917eb91a95ca191eb17" );
|
||||
}
|
||||
|
||||
private void testHeterozosity(final double arg, final String md5) {
|
||||
|
|
@ -241,7 +242,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("ba5b511efd3d99575620f14ba2ba259e"));
|
||||
Arrays.asList("3d20dbf7912e49cdfa929eb04840d351"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -260,7 +261,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("4c71fbe45faf6e2b7da0eb8ae9dd0c0f"));
|
||||
Arrays.asList("0b141419428831b598813272cb7af055"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
@ -279,7 +280,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("e6c116225319f505d680beeeb2063bf1"));
|
||||
Arrays.asList("9a54f9f820efa74e5a719e5ca44bc04d"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -294,7 +295,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("d46e1f465c649927fb3c4ec85df35d09"));
|
||||
Arrays.asList("7f6c4e55b8e77c19199e8ad8b3594280"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||
}
|
||||
|
|
@ -307,7 +308,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("0b9d7998f222e55e82c1a7022d62a508"));
|
||||
Arrays.asList("7ef98a593945f0269ac2d29982a2a72b"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -317,7 +318,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("9bd02b2c648695138f2645e955bf4d8d"));
|
||||
Arrays.asList("656185ebade2db034441c787d6a363c1"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
|
|
@ -327,7 +328,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ testDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("59e874d76e42eafd98ad961eb70706bc"));
|
||||
Arrays.asList("eb9624642e814a0b8962acc89422be23"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
|
|
@ -335,13 +336,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSampleIndels1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("e84f82e12deb9773dae21595b3531a07"));
|
||||
Arrays.asList("e7b471d2a0eada2c7f37f120f2f1fa88"));
|
||||
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("b4df2bf0d820c6fc11fabcafe18bb769"));
|
||||
Arrays.asList("5c7db047ae9417d37c6bbda1d8ea6019"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -351,7 +352,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + testDir + vcf + " -I " + validationDataLocation +
|
||||
"NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
|
||||
Arrays.asList("95226301a014347efc90e5f750a0db60"));
|
||||
Arrays.asList("e362dc0488c8ee3013fa636d929db688"));
|
||||
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
|
||||
}
|
||||
|
||||
|
|
@ -384,7 +385,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("a3ea0eea74f2031ebb2ea0edfa14c945"));
|
||||
Arrays.asList("3c9786453eb59013c70d99ee74f957a9"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
|
|
@ -392,7 +393,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("59c8f66eadd45c56f09291bf64f611e1"));
|
||||
Arrays.asList("72b82f04dd7f9b9318ef7f8604f8085a"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
|
|
@ -400,7 +401,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction100() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 1", 1,
|
||||
Arrays.asList("c1911f6ede7b4e8e83209ead66329596"));
|
||||
Arrays.asList("0e9f485edabbed613e50c699cfa8822f"));
|
||||
executeTest("test minIndelFraction 1.0", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
return "-T ReadBackedPhasing" +
|
||||
" -R " + reference +
|
||||
" -I " + validationDataLocation + reads +
|
||||
" --variant " + validationDataLocation + VCF +
|
||||
" --variant " + ( VCF.contains("phasing_test") ? testDir : validationDataLocation) + VCF +
|
||||
" --cacheWindowSize " + cacheWindowSize +
|
||||
" --maxPhaseSites " + maxPhaseSites +
|
||||
" --phaseQualityThresh " + phaseQualityThresh +
|
||||
|
|
@ -26,7 +26,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("0a41b96b04a87fdb99bc3342d48d2eba"));
|
||||
Arrays.asList("442c819569417c1b7d6be9f41ce05394"));
|
||||
executeTest("MAX 10 het sites [TEST ONE]; require PQ >= 10", spec);
|
||||
}
|
||||
|
||||
|
|
@ -36,7 +36,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:1232503-1332503",
|
||||
1,
|
||||
Arrays.asList("f7517896c899a872c24d8e823ac9deae"));
|
||||
Arrays.asList("2a51ee7d3c024f2410dcee40c5412993"));
|
||||
executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec);
|
||||
}
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("cdbdd2f68c232012b6fe9a322b0ea24c"));
|
||||
Arrays.asList("85bc9b03e24159f746dbd0cb988f9ec8"));
|
||||
executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec);
|
||||
}
|
||||
|
||||
|
|
@ -56,7 +56,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("6b70e3e4e28f9583d35d98bf8a7d0d59"));
|
||||
Arrays.asList("96bb413a83c777ebbe622438e4565e8f"));
|
||||
executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec);
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10)
|
||||
+ " -L chr20:332341-482503",
|
||||
1,
|
||||
Arrays.asList("6163a1fba27532da77765a7a11c55332"));
|
||||
Arrays.asList("7d2402f055d243e2208db9ea47973e13"));
|
||||
executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec);
|
||||
}
|
||||
|
||||
|
|
@ -76,7 +76,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:652810-681757",
|
||||
1,
|
||||
Arrays.asList("94f2fc24c3ac1ddbecb2e0bf7ed1597c"));
|
||||
Arrays.asList("72682b3f27c33580d2d4515653ba6de7"));
|
||||
executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -271,7 +271,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" -knownSites:anyNameABCD,VCF3 " + testDir + "vcfexample3.vcf" +
|
||||
" -knownSites:anyNameABCD,VCF " + testDir + "vcfexample3.vcf" +
|
||||
" -T CountCovariates" +
|
||||
" -I " + bam +
|
||||
" -knownSites " + b36dbSNP129 +
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -tranchesFile " + testDir + "VQSR.mixedTest.tranches" +
|
||||
" -recalFile " + testDir + "VQSR.mixedTest.recal",
|
||||
Arrays.asList("1370d7701a6231633d43a8062b7aff7f"));
|
||||
Arrays.asList("beadf841bbf39c3f0d0bc7fb55462b37"));
|
||||
executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
|
||||
public void combinePLs(String file1, String file2, String md5) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T CombineVariants --no_cmdline_in_header -o %s -R " + b36KGReference + " -priority v1,v2 -V:v1 " + validationDataLocation + file1 + " -V:v2 " + validationDataLocation + file2,
|
||||
"-T CombineVariants --no_cmdline_in_header -o %s -R " + b36KGReference + " -priority v1,v2 -V:v1 " + testDir + file1 + " -V:v2 " + testDir + file2,
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
|
||||
|
|
@ -120,8 +120,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
String file2 = "combine.2.vcf";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T CombineVariants --no_cmdline_in_header -o %s -R " + b37KGReference
|
||||
+ " -V:one " + validationDataLocation + file1
|
||||
+ " -V:two " + validationDataLocation + file2 + args,
|
||||
+ " -V:one " + testDir + file1
|
||||
+ " -V:two " + testDir + file2 + args,
|
||||
1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
|
||||
|
|
|
|||
|
|
@ -40,14 +40,14 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + testDir + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
1,
|
||||
Arrays.asList("70aeaca5b74cc7ba8e2da7b71ff0fbfd"));
|
||||
Arrays.asList("b8f4171b0c39954b283dfed4afed87d7"));
|
||||
executeTest("test b36 to hg19", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testb36Tohg19UnsortedSamples() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + testDir + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
1,
|
||||
Arrays.asList("07d1bf52125d1f9a25e260e13ec7b010"));
|
||||
executeTest("test b36 to hg19, unsorted samples", spec);
|
||||
|
|
@ -58,7 +58,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LiftoverVariants -o %s -R " + hg18Reference + " --variant:vcf " + testDir + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
|
||||
1,
|
||||
Arrays.asList("ab2c6254225d7e2ecf52eee604d5673b"));
|
||||
Arrays.asList("e0b813ff873185ab51995a151f80ec98"));
|
||||
executeTest("test hg18 to hg19, unsorted", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("9162a67ccb4201c0542f30d14967f2d5")
|
||||
Arrays.asList("2a0436eecc2bc29fe559e4d1b9e13580")
|
||||
);
|
||||
|
||||
executeTest("testUsingDbsnpName--" + testFile, spec);
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
||||
|
||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("d2604faad0613932453395c54cc68369"));
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("355b029487c3b4c499140d71310ca37e"));
|
||||
executeTest("Test reading and writing breakpoint VCF", spec1);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -370,6 +370,35 @@ public class VariantContextTestProvider {
|
|||
GenotypeBuilder.create("dip", Arrays.asList(ref, alt1)),
|
||||
GenotypeBuilder.create("tet", Arrays.asList(ref, alt1, alt1)));
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
//
|
||||
// TESTING PHASE
|
||||
//
|
||||
//
|
||||
final Genotype gUnphased = new GenotypeBuilder("gUnphased", Arrays.asList(ref, alt1)).make();
|
||||
final Genotype gPhased = new GenotypeBuilder("gPhased", Arrays.asList(ref, alt1)).phased(true).make();
|
||||
final Genotype gPhased2 = new GenotypeBuilder("gPhased2", Arrays.asList(alt1, alt1)).phased(true).make();
|
||||
final Genotype gPhased3 = new GenotypeBuilder("gPhased3", Arrays.asList(ref, ref)).phased(true).make();
|
||||
final Genotype haploidNoPhase = new GenotypeBuilder("haploidNoPhase", Arrays.asList(ref)).make();
|
||||
addGenotypeTests(site, gUnphased, gPhased);
|
||||
addGenotypeTests(site, gUnphased, gPhased2);
|
||||
addGenotypeTests(site, gUnphased, gPhased3);
|
||||
addGenotypeTests(site, gPhased, gPhased2);
|
||||
addGenotypeTests(site, gPhased, gPhased3);
|
||||
addGenotypeTests(site, gPhased2, gPhased3);
|
||||
addGenotypeTests(site, haploidNoPhase, gPhased);
|
||||
addGenotypeTests(site, haploidNoPhase, gPhased2);
|
||||
addGenotypeTests(site, haploidNoPhase, gPhased3);
|
||||
addGenotypeTests(site, haploidNoPhase, gPhased, gPhased2);
|
||||
addGenotypeTests(site, haploidNoPhase, gPhased, gPhased3);
|
||||
addGenotypeTests(site, haploidNoPhase, gPhased2, gPhased3);
|
||||
addGenotypeTests(site, haploidNoPhase, gPhased, gPhased2, gPhased3);
|
||||
|
||||
final Genotype gUnphasedTet = new GenotypeBuilder("gUnphasedTet", Arrays.asList(ref, alt1, ref, alt1)).make();
|
||||
final Genotype gPhasedTet = new GenotypeBuilder("gPhasedTet", Arrays.asList(ref, alt1, alt1, alt1)).phased(true).make();
|
||||
addGenotypeTests(site, gUnphasedTet, gPhasedTet);
|
||||
}
|
||||
|
||||
if ( ENABLE_PL_TESTS ) {
|
||||
|
|
@ -484,8 +513,6 @@ public class VariantContextTestProvider {
|
|||
new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make(),
|
||||
new GenotypeBuilder("g3-xy", Arrays.asList(ref, ref)).filters("X", "Y").make());
|
||||
}
|
||||
|
||||
// TODO -- test test Integer, Float, Flag, String atomic, vector, and missing types of different lengths per sample
|
||||
}
|
||||
|
||||
private static Genotype attr(final String name, final Allele ref, final String key, final Object ... value) {
|
||||
|
|
@ -649,7 +676,6 @@ public class VariantContextTestProvider {
|
|||
// filters are the same
|
||||
Assert.assertEquals(actual.getFilters(), expected.getFilters());
|
||||
Assert.assertEquals(actual.isFiltered(), expected.isFiltered());
|
||||
Assert.assertEquals(actual.filtersWereApplied(), expected.filtersWereApplied());
|
||||
|
||||
// inline attributes
|
||||
Assert.assertEquals(actual.getDP(), expected.getDP());
|
||||
|
|
|
|||
Loading…
Reference in New Issue