Bug fixes and code cleanup throughout

-- chromosomeCounts now takes builder as well, cleaning up a lot of code throughout the codebase.
This commit is contained in:
Mark DePristo 2011-11-19 18:40:15 -05:00
parent 707bd30b3f
commit 7d09c0064b
10 changed files with 125 additions and 58 deletions

View File

@ -59,10 +59,8 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( ! vc.hasGenotypes() )
return null;
Map<String, Object> map = new HashMap<String, Object>();
VariantContextUtils.calculateChromosomeCounts(vc, map, true);
return map;
return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap<String, Object>(), true);
}
public List<String> getKeyNames() {

View File

@ -338,23 +338,21 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
builder.alleles(new HashSet<Allele>(Arrays.asList(vc_input.getReference()))).filters(removedFilters);
}
HashMap<String, Object> attributes = new HashMap<String, Object>(vc_input.getAttributes());
// re-compute chromosome counts
VariantContextUtils.calculateChromosomeCounts(vc_input, attributes, false);
VariantContextUtils.calculateChromosomeCounts(builder, false);
// Get Hapmap AC and AF
if (vc_comp != null) {
attributes.put("ACH", alleleCountH.toString() );
attributes.put("ANH", chrCountH.toString() );
attributes.put("AFH", String.format("%4.2f", (double)alleleCountH/chrCountH) );
builder.attribute("ACH", alleleCountH.toString() );
builder.attribute("ANH", chrCountH.toString() );
builder.attribute("AFH", String.format("%4.2f", (double)alleleCountH/chrCountH) );
}
attributes.put("NumGenotypesChanged", numGenotypesChangedByBeagle );
builder.attribute("NumGenotypesChanged", numGenotypesChangedByBeagle );
if( !beagleR2Feature.getR2value().equals(Double.NaN) ) {
attributes.put("R2", beagleR2Feature.getR2value().toString() );
builder.attribute("R2", beagleR2Feature.getR2value().toString() );
}
builder.attributes(attributes);
vcfWriter.add(builder.make());

View File

@ -106,9 +106,9 @@ public class UGCallVariants extends RodWalker<VariantCallContext, Integer> {
return sum;
try {
Map<String, Object> attrs = new HashMap<String, Object>(value.getAttributes());
VariantContextUtils.calculateChromosomeCounts(value, attrs, true);
writer.add(new VariantContextBuilder(value).attributes(attrs).make());
VariantContextBuilder builder = new VariantContextBuilder(value);
VariantContextUtils.calculateChromosomeCounts(builder, true);
writer.add(builder.make());
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
}

View File

@ -131,13 +131,9 @@ class PhasingUtils {
if ( vc2.hasID() ) mergedIDs.add(vc2.getID());
String mergedID = mergedIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs);
VariantContext mergedVc = new VariantContextBuilder(mergedName, vc1.getChr(), vc1.getStart(), vc2.getEnd(), mergeData.getAllMergedAlleles()).id(mergedID).genotypes(mergedGenotypes).log10PError(mergedLog10PError).filters(mergedFilters).attributes(mergedAttribs).make();
mergedAttribs = new HashMap<String, Object>(mergedVc.getAttributes());
VariantContextUtils.calculateChromosomeCounts(mergedVc, mergedAttribs, true);
mergedVc = new VariantContextBuilder(mergedVc).attributes(mergedAttribs).make();
return mergedVc;
VariantContextBuilder mergedBuilder = new VariantContextBuilder(mergedName, vc1.getChr(), vc1.getStart(), vc2.getEnd(), mergeData.getAllMergedAlleles()).id(mergedID).genotypes(mergedGenotypes).log10PError(mergedLog10PError).filters(mergedFilters).attributes(mergedAttribs);
VariantContextUtils.calculateChromosomeCounts(mergedBuilder, true);
return mergedBuilder.make();
}
static String mergeVariantContextNames(String name1, String name2) {

View File

@ -279,22 +279,17 @@ public class VariantEvalUtils {
*/
public VariantContext getSubsetOfVariantContext(VariantContext vc, Set<String> sampleNames) {
VariantContext vcsub = vc.subContextFromSamples(sampleNames, vc.getAlleles());
VariantContextBuilder builder = new VariantContextBuilder(vcsub);
HashMap<String, Object> newAts = new HashMap<String, Object>(vcsub.getAttributes());
int originalAlleleCount = vc.getHetCount() + 2 * vc.getHomVarCount();
int newAlleleCount = vcsub.getHetCount() + 2 * vcsub.getHomVarCount();
final int originalAlleleCount = vc.getHetCount() + 2 * vc.getHomVarCount();
final int newAlleleCount = vcsub.getHetCount() + 2 * vcsub.getHomVarCount();
if (originalAlleleCount == newAlleleCount && newAlleleCount == 1) {
newAts.put("ISSINGLETON", true);
builder.attribute("ISSINGLETON", true);
}
VariantContextUtils.calculateChromosomeCounts(vcsub, newAts, true);
vcsub = new VariantContextBuilder(vcsub).attributes(newAts).make();
//VariantEvalWalker.logger.debug(String.format("VC %s subset to %s AC%n", vc.getSource(), vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY)));
return vcsub;
VariantContextUtils.calculateChromosomeCounts(builder, true);
return builder.make();
}
/**

View File

@ -222,7 +222,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
for ( final VariantContext vc : vcs ) {
vcfWriter.add(vc);
}
return vcs.isEmpty() ? 0 : 1;
}
@ -245,18 +245,17 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
}
for ( VariantContext mergedVC : mergedVCs ) {
for ( VariantContext mergedVC : mergedVCs ) {
// only operate at the start of events
if ( mergedVC == null )
continue;
HashMap<String, Object> attributes = new HashMap<String, Object>(mergedVC.getAttributes());
final VariantContextBuilder builder = new VariantContextBuilder(mergedVC);
// re-compute chromosome counts
VariantContextUtils.calculateChromosomeCounts(mergedVC, attributes, false);
VariantContext annotatedMergedVC = new VariantContextBuilder(mergedVC).attributes(attributes).make();
VariantContextUtils.calculateChromosomeCounts(builder, false);
if ( minimalVCF )
annotatedMergedVC = VariantContextUtils.pruneVariantContext(annotatedMergedVC, Arrays.asList(SET_KEY));
vcfWriter.add(annotatedMergedVC);
VariantContextUtils.pruneVariantContext(builder, Arrays.asList(SET_KEY));
vcfWriter.add(builder.make());
}
return vcs.isEmpty() ? 0 : 1;

View File

@ -684,11 +684,10 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
builder.attribute("AN_Orig", sub.getAttribute(VCFConstants.ALLELE_NUMBER_KEY));
}
Map<String, Object> attributes = new HashMap<String, Object>(builder.make().getAttributes());
VariantContextUtils.calculateChromosomeCounts(sub, attributes, false);
attributes.put("DP", depth);
VariantContextUtils.calculateChromosomeCounts(builder, false);
builder.attribute("DP", depth);
return new VariantContextBuilder(builder.make()).attributes(attributes).make();
return builder.make();
}
private void randomlyAddVariant(int rank, VariantContext vc) {

View File

@ -158,12 +158,34 @@ public class VariantContextBuilder {
@Requires({"key != null"})
@Ensures({"this.attributes.size() == old(this.attributes.size()) || this.attributes.size() == old(this.attributes.size()+1)"})
public VariantContextBuilder attribute(final String key, final Object value) {
makeAttributesModifiable();
attributes.put(key, value);
return this;
}
/**
* Removes key if present in the attributes
*
* @param key
* @return
*/
@Requires({"key != null"})
@Ensures({"this.attributes.size() == old(this.attributes.size()) || this.attributes.size() == old(this.attributes.size()-1)"})
public VariantContextBuilder rmAttribute(final String key) {
makeAttributesModifiable();
attributes.remove(key);
return this;
}
/**
* Makes the attributes field modifiable. In many cases attributes is just a pointer to an immutable
* collection, so methods that want to add / remove records require the attributes to be copied first
*/
private void makeAttributesModifiable() {
if ( ! attributesCanBeModified ) {
this.attributesCanBeModified = true;
this.attributes = new HashMap<String, Object>(attributes);
}
attributes.put(key, value);
return this;
}
/**

View File

@ -55,13 +55,15 @@ public class VariantContextUtils {
}
/**
* Update the attributes of the attributes map given the VariantContext to reflect the proper chromosome-based VCF tags
* Update the attributes of the attributes map given the VariantContext to reflect the
* proper chromosome-based VCF tags
*
* @param vc the VariantContext
* @param attributes the attributes map to populate; must not be null; may contain old values
* @param removeStaleValues should we remove stale values from the mapping?
* @return the attributes map provided as input, returned for programming convenience
*/
public static void calculateChromosomeCounts(VariantContext vc, Map<String, Object> attributes, boolean removeStaleValues) {
public static Map<String, Object> calculateChromosomeCounts(VariantContext vc, Map<String, Object> attributes, boolean removeStaleValues) {
// if everyone is a no-call, remove the old attributes if requested
if ( vc.getCalledChrCount() == 0 && removeStaleValues ) {
if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) )
@ -70,7 +72,7 @@ public class VariantContextUtils {
attributes.remove(VCFConstants.ALLELE_FREQUENCY_KEY);
if ( attributes.containsKey(VCFConstants.ALLELE_NUMBER_KEY) )
attributes.remove(VCFConstants.ALLELE_NUMBER_KEY);
return;
return attributes;
}
if ( vc.hasGenotypes() ) {
@ -96,6 +98,54 @@ public class VariantContextUtils {
attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, 0.0);
}
}
return attributes;
}
/**
* Update the attributes of the attributes map in the VariantContextBuilder to reflect the proper
* chromosome-based VCF tags based on the current VC produced by builder.make()
*
* @param builder the VariantContextBuilder we are updating
* @param removeStaleValues should we remove stale values from the mapping?
*/
public static void calculateChromosomeCounts(VariantContextBuilder builder, boolean removeStaleValues) {
final VariantContext vc = builder.make();
// if everyone is a no-call, remove the old attributes if requested
if ( vc.getCalledChrCount() == 0 && removeStaleValues ) {
if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) )
builder.rmAttribute(VCFConstants.ALLELE_COUNT_KEY);
if ( vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) )
builder.rmAttribute(VCFConstants.ALLELE_FREQUENCY_KEY);
if ( vc.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) )
builder.rmAttribute(VCFConstants.ALLELE_NUMBER_KEY);
return;
}
if ( vc.hasGenotypes() ) {
builder.attribute(VCFConstants.ALLELE_NUMBER_KEY, vc.getCalledChrCount());
// if there are alternate alleles, record the relevant tags
if ( vc.getAlternateAlleles().size() > 0 ) {
ArrayList<String> alleleFreqs = new ArrayList<String>();
ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
double totalChromosomes = (double)vc.getCalledChrCount();
for ( Allele allele : vc.getAlternateAlleles() ) {
int altChromosomes = vc.getCalledChrCount(allele);
alleleCounts.add(altChromosomes);
String freq = String.format(makePrecisionFormatStringFromDenominatorValue(totalChromosomes), ((double)altChromosomes / totalChromosomes));
alleleFreqs.add(freq);
}
builder.attribute(VCFConstants.ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
builder.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs);
}
else {
builder.attribute(VCFConstants.ALLELE_COUNT_KEY, 0);
builder.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, 0.0);
}
}
}
private static String makePrecisionFormatStringFromDenominatorValue(double maxValue) {
@ -348,10 +398,6 @@ public class VariantContextUtils {
return r;
}
public static VariantContext pruneVariantContext(VariantContext vc) {
return pruneVariantContext(vc, null);
}
private final static Map<String, Object> subsetAttributes(final CommonInfo igc, final Collection<String> keysToPreserve) {
Map<String, Object> attributes = new HashMap<String, Object>(keysToPreserve.size());
for ( final String key : keysToPreserve ) {
@ -361,7 +407,19 @@ public class VariantContextUtils {
return attributes;
}
/**
* @deprecated use variant context builder version instead
* @param vc
* @param keysToPreserve
* @return
*/
@Deprecated
public static VariantContext pruneVariantContext(final VariantContext vc, Collection<String> keysToPreserve ) {
return pruneVariantContext(new VariantContextBuilder(vc), keysToPreserve).make();
}
public static VariantContextBuilder pruneVariantContext(final VariantContextBuilder builder, Collection<String> keysToPreserve ) {
final VariantContext vc = builder.make();
if ( keysToPreserve == null ) keysToPreserve = Collections.emptyList();
// VC info
@ -375,7 +433,7 @@ public class VariantContextUtils {
genotypeAttributes, g.isPhased()));
}
return new VariantContextBuilder(vc).genotypes(genotypes).attributes(attributes).make();
return builder.genotypes(genotypes).attributes(attributes);
}
public enum GenotypeMergeType {

View File

@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("9beb795536e95954f810835c6058f2ad"));
Arrays.asList("fbb656369eaa48153d127bd12db59d8f"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -54,9 +54,11 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testNoAnnotsNotAsking2() {
// this genotype annotations in this file are actually out of order. If you don't parse the genotypes
// they don't get reordered. It's a good test of the genotype ordering system.
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("f2ddfa8105c290b1f34b7a261a02a1ac"));
Arrays.asList("0cc0ec59f0328792e6413b6ff3f71780"));
executeTest("test file doesn't have annotations, not asking for annotations, #2", spec);
}
@ -64,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("49d989f467b8d6d8f98f7c1b67cd4a05"));
Arrays.asList("42dd979a0a931c18dc9be40308bac321"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -80,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testExcludeAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("33062eccd6eb73bc49440365430454c4"));
Arrays.asList("477eac07989593b58bb361f3429c085a"));
executeTest("test exclude annotations", spec);
}