Bugfixes on the way to passing integration tests

-- Replaced getAttributes with getDP() and not the old style getAttribute, where appropriate
-- Added getAnyAttribute and hasAnyAttribute that actually does the expensive work of seeing if the key is something like GT, AD or another inline datum, and returns it.  Very expensive but convenient.
-- Fixed nasty subsetting bug in SelectVariants with excluding samples
-- Generalized VariantsToTable to work with new inline attributes (using getAnyAttribute) as well as GT
-- Bugfix for dropping old style GL field values
-- Added test to VCFWriter to ensure that we have the sample number of samples in the VC as in the header
-- Bugfix for Allele.getBaseString to properly show NO_CALL alleles
-- getGenotypeString in Genotype returns "NA" instead of null for ploidy == 0 genotypes
This commit is contained in:
Mark DePristo 2012-06-13 13:29:17 -04:00
parent ea1b699778
commit 31997f8092
14 changed files with 95 additions and 28 deletions

View File

@ -807,9 +807,9 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
updateTrioMetricsCounters(phasedMother,phasedFather,phasedChild,mvCount,metricsCounters);
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t%s:%s:%s:%s\t%s:%s:%s:%s",
vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),
phasedMother.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getAttribute(VCFConstants.DEPTH_KEY),phasedMother.getAttribute("AD"),phasedMother.getLikelihoodsString(),
phasedFather.getGenotypeString(),phasedFather.getAttribute(VCFConstants.DEPTH_KEY),phasedFather.getAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS),phasedFather.getLikelihoodsString(),
phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS),phasedChild.getLikelihoodsString());
phasedMother.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()),
phasedMother.getLikelihoodsString(), phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
phasedChild.getGenotypeString(),Arrays.asList(phasedChild.getDP()),phasedChild.getAD(),phasedChild.getLikelihoodsString());
if(!(phasedMother.getType()==mother.getType() && phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType()))
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
}
@ -819,8 +819,8 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t.:.:.:.\t%s:%s:%s:%s",
vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),
phasedMother.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getAttribute(VCFConstants.DEPTH_KEY),phasedMother.getAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS),phasedMother.getLikelihoodsString(),
phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS),phasedChild.getLikelihoodsString());
phasedMother.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()),phasedMother.getLikelihoodsString(),
phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString());
}
}
else{
@ -830,8 +830,8 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t.:.:.:.\t%s:%s:%s:%s\t%s:%s:%s:%s",
vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(),
phasedFather.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getAttribute(VCFConstants.DEPTH_KEY),phasedFather.getAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS),phasedFather.getLikelihoodsString(),
phasedChild.getGenotypeString(),phasedChild.getAttribute(VCFConstants.DEPTH_KEY),phasedChild.getAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS),phasedChild.getLikelihoodsString());
phasedFather.getAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString());
}
//Report violation if set so

View File

@ -241,7 +241,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
// update transition / transversion ratio
if ( titvTable != null ) titvTable.inc(type, g.getSampleName());
if ( g.hasAttribute(VCFConstants.DEPTH_KEY) )
if ( g.hasDP() )
depthPerSample.inc(type, g.getSampleName());
}
}

View File

@ -401,6 +401,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
Collection<String> XLsamplesFromFile = SampleUtils.getSamplesFromFiles(XLsampleFiles);
samples.removeAll(XLsamplesFromFile);
samples.removeAll(XLsampleNames);
NO_SAMPLES_SPECIFIED = NO_SAMPLES_SPECIFIED && XLsampleNames.isEmpty();
if ( samples.size() == 0 && !NO_SAMPLES_SPECIFIED )
throw new UserException("All samples requested to be included were also requested to be excluded.");

View File

@ -288,8 +288,8 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
private byte getStandardEncoding(Genotype g, int offset) {
byte b;
if ( g.hasAttribute(VCFConstants.GENOTYPE_QUALITY_KEY) && ((Integer) g.getAttribute(VCFConstants.GENOTYPE_QUALITY_KEY)) < minGenotypeQuality ) {
b = NO_CALL;
if ( g.hasGQ() && g.getGQ() < minGenotypeQuality ) {
b = NO_CALL;
} else if ( g.isHomRef() ) {
b = HOM_REF;
} else if ( g.isHomVar() ) {
@ -305,7 +305,7 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
private byte getFlippedEncoding(Genotype g, int offset) {
byte b;
if ( g.hasAttribute(VCFConstants.GENOTYPE_QUALITY_KEY) && ((Integer) g.getAttribute(VCFConstants.GENOTYPE_QUALITY_KEY)) < minGenotypeQuality ) {
if ( g.hasGQ() && g.getGQ() < minGenotypeQuality ) {
b = NO_CALL;
} else if ( g.isHomRef() ) {
b = HOM_VAR;

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
@ -314,8 +315,12 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
if ( addGenotypeFields ) {
for ( final String sample : samples ) {
for ( final String gf : genotypeFields ) {
if ( vc.hasGenotype(sample) && vc.getGenotype(sample).hasAttribute(gf) )
addFieldValue(vc.getGenotype(sample).getAttribute(gf), records);
if ( vc.hasGenotype(sample) && vc.getGenotype(sample).hasAnyAttribute(gf) ) {
if ( gf.equals(VCFConstants.GENOTYPE_KEY) )
addFieldValue(vc.getGenotype(sample).getGenotypeString(true), records);
else
addFieldValue(vc.getGenotype(sample).getAnyAttribute(gf), records);
}
else
addFieldValue(MISSING_DATA, records);
}

View File

@ -782,6 +782,8 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
gb.AD(decodeInts(GTValueArray[i]));
} else if (gtKey.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) {
gb.PL(decodeInts(GTValueArray[i]));
} else if (gtKey.equals(VCFConstants.GENOTYPE_LIKELIHOODS_KEY)) {
gb.PL(GenotypeLikelihoods.fromGLField(GTValueArray[i]).getAsPLs());
} else if (gtKey.equals(VCFConstants.DEPTH_KEY)) {
gb.DP(Integer.valueOf(GTValueArray[i]));
} else {

View File

@ -336,7 +336,7 @@ public class Allele implements Comparable<Allele> {
*
* @return the segregating bases
*/
public String getBaseString() { return new String(getBases()); }
public String getBaseString() { return isNoCall() ? NO_CALL_STRING : new String(getBases()); }
/**
* Return the printed representation of this allele.

View File

@ -325,7 +325,7 @@ public abstract class Genotype implements Comparable<Genotype> {
@Ensures("result != null || ! isAvailable()")
public String getGenotypeString(boolean ignoreRefState) {
if ( getPloidy() == 0 )
return null;
return "NA";
// Notes:
// 1. Make sure to use the appropriate separator depending on whether the genotype is phased
@ -419,7 +419,7 @@ public abstract class Genotype implements Comparable<Genotype> {
* @param key a non-null string key to check for an association
* @return true if key has a value in the extendedAttributes
*/
@Requires("key != null")
@Requires({"key != null", "! isForbiddenKey(key)"})
public boolean hasAttribute(final String key) {
return getExtendedAttributes().containsKey(key);
}
@ -431,7 +431,7 @@ public abstract class Genotype implements Comparable<Genotype> {
* @param defaultValue the value to return if key isn't in the extended attributes
* @return a value (potentially) null associated with key, or defaultValue if no association exists
*/
@Requires("key != null")
@Requires({"key != null", "! isForbiddenKey(key)"})
@Ensures("hasAttribute(key) || result == defaultValue")
public Object getAttribute(final String key, final Object defaultValue) {
return hasAttribute(key) ? getExtendedAttributes().get(key) : defaultValue;
@ -490,6 +490,46 @@ public abstract class Genotype implements Comparable<Genotype> {
return Double.valueOf((String)x); // throws an exception if this isn't a string
}
/**
* A totally generic getter, that allows you to specific keys that correspond
* to even inline values (GQ, for example). Can be very expensive. Additionally,
* all int[] are converted inline into List<Integer> for convenience.
*
* @param key
* @return
*/
public Object getAnyAttribute(final String key) {
if (key.equals(VCFConstants.GENOTYPE_KEY)) {
return getAlleles();
} else if (key.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
return getGQ();
} else if (key.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
return Arrays.asList(getAD());
} else if (key.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) {
return Arrays.asList(getPL());
} else if (key.equals(VCFConstants.DEPTH_KEY)) {
return getDP();
} else {
return getAttribute(key);
}
}
public boolean hasAnyAttribute(final String key) {
if (key.equals(VCFConstants.GENOTYPE_KEY)) {
return isAvailable();
} else if (key.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
return hasGQ();
} else if (key.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
return hasAD();
} else if (key.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) {
return hasPL();
} else if (key.equals(VCFConstants.DEPTH_KEY)) {
return hasDP();
} else {
return hasAttribute(key);
}
}
// TODO -- add getAttributesAsX interface here
// ------------------------------------------------------------------------------
@ -567,4 +607,8 @@ public abstract class Genotype implements Comparable<Genotype> {
return true;
return false;
}
protected final static boolean isForbiddenKey(final String key) {
return PRIMARY_KEYS.contains(key);
}
}

View File

@ -72,8 +72,8 @@ public class SlowGenotype extends Genotype {
// Useful methods for getting genotype likelihoods for a genotype object, if present
//
@Override public boolean hasLikelihoods() {
return (hasAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) && !getAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4)) ||
(hasAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && !getAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4));
return (commonInfo.hasAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) && !commonInfo.getAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4)) ||
(commonInfo.hasAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && !commonInfo.getAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4));
}
@Override public GenotypeLikelihoods getLikelihoods() {
@ -87,7 +87,7 @@ public class SlowGenotype extends Genotype {
}
private GenotypeLikelihoods getLikelihoods(String key, boolean asPL) {
Object x = getAttribute(key);
Object x = commonInfo.getAttribute(key);
if ( x instanceof String ) {
if ( asPL )
return GenotypeLikelihoods.fromPLField((String)x);
@ -145,25 +145,25 @@ public class SlowGenotype extends Genotype {
@Override
public int getDP() {
return getAttributeAsInt(VCFConstants.DEPTH_KEY, -1);
return commonInfo.getAttributeAsInt(VCFConstants.DEPTH_KEY, -1);
}
@Override
public boolean hasDP() {
return hasAttribute(VCFConstants.DEPTH_KEY);
return commonInfo.hasAttribute(VCFConstants.DEPTH_KEY);
}
@Override
public int[] getAD() {
if ( hasAD() ) {
return (int[])getAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS);
return (int[])commonInfo.getAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS);
} else
return null;
}
@Override
public boolean hasAD() {
return hasAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS);
return commonInfo.hasAttribute(VCFConstants.GENOTYPE_ALLELE_DEPTHS);
}
@Override

View File

@ -192,7 +192,7 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
infoMap.put("isHomRef", g.isHomRef() ? "1" : "0");
infoMap.put("isHet", g.isHet() ? "1" : "0");
infoMap.put("isHomVar", g.isHomVar() ? "1" : "0");
infoMap.put(VCFConstants.GENOTYPE_QUALITY_KEY, g.getPhredScaledQual());
infoMap.put(VCFConstants.GENOTYPE_QUALITY_KEY, g.getGQ());
for ( Map.Entry<String, Object> e : g.getExtendedAttributes().entrySet() ) {
if ( e.getValue() != null && !e.getValue().equals(VCFConstants.MISSING_VALUE_v4) )
infoMap.put(e.getKey(), e.getValue());

View File

@ -329,6 +329,8 @@ class VCFWriter extends IndexingVariantContextWriter {
*/
private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
throws IOException {
if ( mHeader.getGenotypeSamples().size() != vc.getNSamples() )
throw new ReviewedStingException("BUG: number of VariantContext samples " + vc.getNSamples() + " != to the number of sample found in the VCF header" + mHeader.getGenotypeSamples().size());
for ( String sample : mHeader.getGenotypeSamples() ) {
mWriter.write(VCFConstants.FIELD_SEPARATOR);

View File

@ -47,7 +47,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest {
@Test
public void testb36Tohg19UnsortedSamples() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + testDir + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
"-T LiftoverVariants -o %s -R " + b36KGReference + " --variant " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict",
1,
Arrays.asList("07d1bf52125d1f9a25e260e13ec7b010"));
executeTest("test b36 to hg19, unsorted samples", spec);

View File

@ -56,7 +56,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants" +
" -R " + b36KGReference +
" --variant,storage=STREAM " + tmpFifo.getAbsolutePath() +
" --variant:VCF,storage=STREAM " + tmpFifo.getAbsolutePath() +
" --no_cmdline_in_header " +
" -o %s",
1,
@ -80,7 +80,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
WalkerTestSpec selectTestSpec = new WalkerTestSpec(
"-T SelectVariants" +
" -R " + b36KGReference +
" --variant,storage=STREAM " + testFile +
" --variant:VCF,storage=STREAM " + testFile +
" --no_cmdline_in_header" +
" -select 'QD > 2.0'" +
" -o " + tmpFifo.getAbsolutePath(),

View File

@ -87,6 +87,19 @@ public class VariantsToTableIntegrationTest extends WalkerTest {
executeTest("testGenotypeFields", spec);
}
@Test(enabled = true)
public void testGenotypeFieldsWithInline() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
" --variant " + testDir + "vcfexample2.vcf" +
" -T VariantsToTable" +
" -GF RD -GF GT -GF GQ" +
" -o %s",
1,
Arrays.asList("29744059742ae71fd6aabd29e5c391fb"));
executeTest("testGenotypeFieldsWithInline", spec);
}
@Test(enabled = true)
public void testMoltenOutput() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(