VC utils prune method now will keep genotype attributes as well as info keys. RBP now emits a far reduce (NO INFO, only GT:GQ:PG) records, further reducing size of phasing output

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4882 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-12-20 16:33:14 +00:00
parent 8604335566
commit 60880b925f
2 changed files with 7 additions and 2 deletions

View File

@ -293,7 +293,8 @@ public class VariantContextUtils {
Map<String, Object> d = mvc.getAttributes();
mvc.clearAttributes();
for ( String key : keysToPreserve )
mvc.putAttribute(key, d.get(key));
if ( d.containsKey(key) )
mvc.putAttribute(key, d.get(key));
}
Collection<Genotype> gs = mvc.getGenotypes().values();
@ -301,6 +302,9 @@ public class VariantContextUtils {
for ( Genotype g : gs ) {
MutableGenotype mg = new MutableGenotype(g);
mg.clearAttributes();
for ( String key : keysToPreserve )
if ( g.hasAttribute(key) )
mg.putAttribute(key, g.getAttribute(key));
mvc.addGenotype(mg);
}

View File

@ -241,13 +241,14 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
return new PhasingStatsAndOutput(phaseStats, completedList);
}
private static final Set<String> KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet<String>(Arrays.asList("PQ"));
private VariantContext reduceVCToSamples(VariantContext vc, List<String> samplesToPhase) {
// for ( String sample : samplesToPhase )
// logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() ));
VariantContext subvc = vc.subContextFromGenotypes(vc.getGenotypes(samplesToPhase).values());
// logger.debug("original VC = " + vc);
// logger.debug("sub VC = " + subvc);
return subvc;
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF );
}
private List<VariantContext> processQueue(PhasingStats phaseStats, boolean processAll) {