Merge pull request #486 from broadinstitute/rp_fix_missing_annotations_CombineReferenceCalculationVariants

Bug fix for missing annotations in CombineReferenceCalculationVariants. ...
This commit is contained in:
Ryan Poplin 2014-02-05 14:22:59 -05:00
commit 6a7a197362
3 changed files with 21 additions and 44 deletions

View File

@ -69,6 +69,7 @@ import org.broadinstitute.sting.utils.help.HelpConstants;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.variant.vcf.*;
@ -201,10 +202,11 @@ public class CombineReferenceCalculationVariants extends RodWalker<VariantContex
if ( combinedVC == null ) throw new IllegalArgumentException("combinedVC cannot be null");
VariantContext result = combinedVC;
final Map<String,Object> originalAttributes = combinedVC.getAttributes();
// only re-genotype polymorphic sites
if ( combinedVC.isVariant() )
result = genotypingEngine.calculateGenotypes(result);
result = new VariantContextBuilder(genotypingEngine.calculateGenotypes(result)).attributes(originalAttributes).make();
// if it turned monomorphic and we don't want such sites, quit
if ( result == null || (!INCLUDE_NON_VARIANTS && result.isMonomorphicInSamples()) )

View File

@ -57,16 +57,28 @@ public class CombineReferenceCalculationVariantsIntegrationTest extends WalkerTe
return "-T CombineReferenceCalculationVariants --no_cmdline_in_header -L 1:1-50,000,000 -o %s -R " + ref + args;
}
// TODO -- enable this test (and create others) once the Haplotype Caller produces appropriate gVCFs (with <NON-REF> for every record)
@Test(enabled = false)
@Test(enabled = true)
public void combineSingleSamplePipelineGVCF() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -V:sample1 " + privateTestDir + "combine.single.sample.pipeline.1.vcf" +
" -V:sample2 " + privateTestDir + "combine.single.sample.pipeline.2.vcf" +
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
" -L 20:10,000,000-10,001,000", b37KGReference),
" -L 20:10,000,000-20,000,000", b37KGReference),
1,
Arrays.asList("0413f0725fc5ec3a4f1ee246f6cb3a2a"));
Arrays.asList("66e3b512de9de64b03c708386736cc2f"));
executeTest("combineSingleSamplePipelineGVCF", spec);
}
@Test(enabled = true)
public void combineSingleSamplePipelineGVCF_includeNonVariants() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -V:sample1 " + privateTestDir + "combine.single.sample.pipeline.1.vcf" +
" -V:sample2 " + privateTestDir + "combine.single.sample.pipeline.2.vcf" +
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
" -inv -L 20:10,000,000-10,010,000", b37KGReference),
1,
Arrays.asList("de957075796512cb9f333f77515e97d5"));
executeTest("combineSingleSamplePipelineGVCF", spec);
}
}

View File

@ -873,7 +873,6 @@ public class GATKVariantContextUtils {
int depth = 0;
int maxAC = -1;
final Map<String, Object> attributesWithMaxAC = new LinkedHashMap<>();
final Map<String, List<Comparable>> annotationMap = new LinkedHashMap<>();
double log10PError = CommonInfo.NO_LOG10_PERROR;
boolean anyVCHadFiltersApplied = false;
VariantContext vcWithMaxAC = null;
@ -1063,7 +1062,6 @@ public class GATKVariantContextUtils {
final List<Allele> alleles = getAllelesListFromMapper(refAllele, alleleMapper);
final Map<String, Object> attributes = new LinkedHashMap<>();
final Set<String> inconsistentAttributes = new HashSet<>();
final Set<String> rsIDs = new LinkedHashSet<>(1); // most of the time there's one id
int depth = 0;
@ -1089,7 +1087,7 @@ public class GATKVariantContextUtils {
if ( vc.hasID() ) rsIDs.add(vc.getID());
// add attributes
addReferenceConfidenceAttributes(vc.getAttributes(), attributes, inconsistentAttributes, annotationMap);
addReferenceConfidenceAttributes(vc.getAttributes(), annotationMap);
}
// when combining annotations use the median value from all input VCs which had annotations provided
@ -1161,18 +1159,13 @@ public class GATKVariantContextUtils {
* Adds attributes to the global map from the new context in a sophisticated manner
*
* @param myAttributes attributes to add from
* @param globalAttributes global set of attributes to add to
* @param inconsistentAttributes set of attributes that are inconsistent among samples
* @param annotationMap map of annotations for combining later
*/
private static void addReferenceConfidenceAttributes(final Map<String, Object> myAttributes,
final Map<String, Object> globalAttributes,
final Set<String> inconsistentAttributes,
final Map<String, List<Comparable>> annotationMap) {
for ( final Map.Entry<String, Object> p : myAttributes.entrySet() ) {
final String key = p.getKey();
final Object value = p.getValue();
boolean badAnnotation = false;
// add the annotation values to a list for combining later
List<Comparable> values = annotationMap.get(key);
@ -1188,38 +1181,8 @@ public class GATKVariantContextUtils {
else
values.add(Integer.parseInt(stringValue));
} catch (final NumberFormatException e) {
badAnnotation = true;
// nothing to do
}
// only output annotations that have the same value in every input VC
if ( badAnnotation && ! inconsistentAttributes.contains(key) ) {
checkForConsistency(key, value, globalAttributes, inconsistentAttributes);
}
}
}
/**
* Check attributes for consistency to others in the merge
*
* @param key the attribute key
* @param value the attribute value
* @param globalAttributes the global list of attributes being merged
* @param inconsistentAttributes the list of inconsistent attributes in the merge
*/
private static void checkForConsistency(final String key,
final Object value,
final Map<String, Object> globalAttributes,
final Set<String> inconsistentAttributes) {
final boolean alreadyFound = globalAttributes.containsKey(key);
final Object boundValue = globalAttributes.get(key);
final boolean boundIsMissingValue = alreadyFound && boundValue.equals(VCFConstants.MISSING_VALUE_v4);
if ( alreadyFound && ! boundValue.equals(value) && ! boundIsMissingValue ) {
// we found the value but we're inconsistent, put it in the exclude list
inconsistentAttributes.add(key);
globalAttributes.remove(key);
} else if ( ! alreadyFound || boundIsMissingValue ) { // no value
globalAttributes.put(key, value);
}
}