Merge pull request #703 from broadinstitute/eb_keep_original_ac_for_multiallelics

Update the --keepOriginalAC functionality in SelectVariants to work for ...
This commit is contained in:
Eric Banks 2014-08-15 17:36:32 -04:00
commit cab02f8401
2 changed files with 64 additions and 9 deletions

View File

@ -277,7 +277,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants --keepOriginalAC -env -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("e9b8292212545684cdb163423329ee7e")
Arrays.asList("4695c99d96490ed4e5b1568c5b52dea6")
);
executeTest("testKeepOriginalACAndENV--" + testFile, spec);

View File

@ -701,7 +701,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
builder.genotypes(newGC);
addAnnotations(builder, sub, vc.getAlleles().size() != sub.getAlleles().size());
addAnnotations(builder, vc, sub.getSampleNames());
return builder.make();
}
@ -710,17 +710,42 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
* Add annotations to the new VC
*
* @param builder the new VC to annotate
* @param originalVC the original -- but post-selection -- VC
* @param lostAllelesInSelection true if the original (pre-selection) VC has more alleles than the new one
* @param originalVC the original VC
* @param selectedSampleNames the post-selection list of sample names
*/
private void addAnnotations(final VariantContextBuilder builder, final VariantContext originalVC, final boolean lostAllelesInSelection) {
private void addAnnotations(final VariantContextBuilder builder, final VariantContext originalVC, final Set<String> selectedSampleNames) {
if ( fullyDecode ) return; // TODO -- annotations are broken with fully decoded data
if ( KEEP_ORIGINAL_CHR_COUNTS && !lostAllelesInSelection ) {
if ( KEEP_ORIGINAL_CHR_COUNTS ) {
final int[] indexOfOriginalAlleleForNewAllele;
final List<Allele> newAlleles = builder.getAlleles();
final int numOriginalAlleles = originalVC.getNAlleles();
// if the alleles already match up, we can just copy the previous list of counts
if ( numOriginalAlleles == newAlleles.size() ) {
indexOfOriginalAlleleForNewAllele = null;
}
// otherwise we need to parse them and select out the correct ones
else {
indexOfOriginalAlleleForNewAllele = new int[newAlleles.size() - 1];
Arrays.fill(indexOfOriginalAlleleForNewAllele, -1);
// note that we don't care about the reference allele at position 0
for ( int newI = 1; newI < newAlleles.size(); newI++ ) {
final Allele newAlt = newAlleles.get(newI);
for ( int oldI = 0; oldI < numOriginalAlleles - 1; oldI++ ) {
if ( newAlt.equals(originalVC.getAlternateAllele(oldI), false) ) {
indexOfOriginalAlleleForNewAllele[newI - 1] = oldI;
break;
}
}
}
}
if ( originalVC.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) )
builder.attribute("AC_Orig", originalVC.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
builder.attribute("AC_Orig", getReorderedAttributes(originalVC.getAttribute(VCFConstants.ALLELE_COUNT_KEY), indexOfOriginalAlleleForNewAllele));
if ( originalVC.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) )
builder.attribute("AF_Orig", originalVC.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY));
builder.attribute("AF_Orig", getReorderedAttributes(originalVC.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY), indexOfOriginalAlleleForNewAllele));
if ( originalVC.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) )
builder.attribute("AN_Orig", originalVC.getAttribute(VCFConstants.ALLELE_NUMBER_KEY));
}
@ -729,7 +754,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
boolean sawDP = false;
int depth = 0;
for (String sample : originalVC.getSampleNames()) {
for ( final String sample : selectedSampleNames ) {
Genotype g = originalVC.getGenotype(sample);
if ( ! g.isFiltered() ) {
@ -743,4 +768,34 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
if ( sawDP )
builder.attribute("DP", depth);
}
/**
* Pulls out the appropriate tokens from the old ordering of an attribute to the new ordering
*
* @param attribute the non-null attribute (from the INFO field)
* @param oldToNewIndexOrdering the mapping from new to old ordering
* @return non-null Object attribute
*/
private Object getReorderedAttributes(final Object attribute, final int[] oldToNewIndexOrdering) {
// if the ordering is the same, then just use the original attribute
if ( oldToNewIndexOrdering == null )
return attribute;
// break the original attributes into separate tokens; unfortunately, this means being smart about class types
final Object[] tokens;
if ( attribute.getClass().isArray() )
tokens = (Object[])attribute;
else if ( List.class.isAssignableFrom(attribute.getClass()) )
tokens = ((List)attribute).toArray();
else
tokens = attribute.toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
final List<Object> result = new ArrayList<>();
for ( final int index : oldToNewIndexOrdering ) {
if ( index >= tokens.length )
throw new IllegalArgumentException("the old attribute has an incorrect number of elements: " + attribute);
result.add(tokens[index]);
}
return result;
}
}