Merge pull request #703 from broadinstitute/eb_keep_original_ac_for_multiallelics

Update the --keepOriginalAC functionality in SelectVariants to work for ...
This commit is contained in:
Eric Banks 2014-08-15 17:36:32 -04:00
commit cab02f8401
2 changed files with 64 additions and 9 deletions

View File

@ -277,7 +277,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants --keepOriginalAC -env -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header", "-T SelectVariants --keepOriginalAC -env -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header",
1, 1,
Arrays.asList("e9b8292212545684cdb163423329ee7e") Arrays.asList("4695c99d96490ed4e5b1568c5b52dea6")
); );
executeTest("testKeepOriginalACAndENV--" + testFile, spec); executeTest("testKeepOriginalACAndENV--" + testFile, spec);

View File

@ -701,7 +701,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
builder.genotypes(newGC); builder.genotypes(newGC);
addAnnotations(builder, sub, vc.getAlleles().size() != sub.getAlleles().size()); addAnnotations(builder, vc, sub.getSampleNames());
return builder.make(); return builder.make();
} }
@ -710,17 +710,42 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
* Add annotations to the new VC * Add annotations to the new VC
* *
* @param builder the new VC to annotate * @param builder the new VC to annotate
* @param originalVC the original -- but post-selection -- VC * @param originalVC the original VC
* @param lostAllelesInSelection true if the original (pre-selection) VC has more alleles than the new one * @param selectedSampleNames the post-selection list of sample names
*/ */
private void addAnnotations(final VariantContextBuilder builder, final VariantContext originalVC, final boolean lostAllelesInSelection) { private void addAnnotations(final VariantContextBuilder builder, final VariantContext originalVC, final Set<String> selectedSampleNames) {
if ( fullyDecode ) return; // TODO -- annotations are broken with fully decoded data if ( fullyDecode ) return; // TODO -- annotations are broken with fully decoded data
if ( KEEP_ORIGINAL_CHR_COUNTS && !lostAllelesInSelection ) { if ( KEEP_ORIGINAL_CHR_COUNTS ) {
final int[] indexOfOriginalAlleleForNewAllele;
final List<Allele> newAlleles = builder.getAlleles();
final int numOriginalAlleles = originalVC.getNAlleles();
// if the alleles already match up, we can just copy the previous list of counts
if ( numOriginalAlleles == newAlleles.size() ) {
indexOfOriginalAlleleForNewAllele = null;
}
// otherwise we need to parse them and select out the correct ones
else {
indexOfOriginalAlleleForNewAllele = new int[newAlleles.size() - 1];
Arrays.fill(indexOfOriginalAlleleForNewAllele, -1);
// note that we don't care about the reference allele at position 0
for ( int newI = 1; newI < newAlleles.size(); newI++ ) {
final Allele newAlt = newAlleles.get(newI);
for ( int oldI = 0; oldI < numOriginalAlleles - 1; oldI++ ) {
if ( newAlt.equals(originalVC.getAlternateAllele(oldI), false) ) {
indexOfOriginalAlleleForNewAllele[newI - 1] = oldI;
break;
}
}
}
}
if ( originalVC.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) if ( originalVC.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) )
builder.attribute("AC_Orig", originalVC.getAttribute(VCFConstants.ALLELE_COUNT_KEY)); builder.attribute("AC_Orig", getReorderedAttributes(originalVC.getAttribute(VCFConstants.ALLELE_COUNT_KEY), indexOfOriginalAlleleForNewAllele));
if ( originalVC.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) if ( originalVC.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) )
builder.attribute("AF_Orig", originalVC.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)); builder.attribute("AF_Orig", getReorderedAttributes(originalVC.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY), indexOfOriginalAlleleForNewAllele));
if ( originalVC.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) ) if ( originalVC.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) )
builder.attribute("AN_Orig", originalVC.getAttribute(VCFConstants.ALLELE_NUMBER_KEY)); builder.attribute("AN_Orig", originalVC.getAttribute(VCFConstants.ALLELE_NUMBER_KEY));
} }
@ -729,7 +754,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
boolean sawDP = false; boolean sawDP = false;
int depth = 0; int depth = 0;
for (String sample : originalVC.getSampleNames()) { for ( final String sample : selectedSampleNames ) {
Genotype g = originalVC.getGenotype(sample); Genotype g = originalVC.getGenotype(sample);
if ( ! g.isFiltered() ) { if ( ! g.isFiltered() ) {
@ -743,4 +768,34 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
if ( sawDP ) if ( sawDP )
builder.attribute("DP", depth); builder.attribute("DP", depth);
} }
/**
* Pulls out the appropriate tokens from the old ordering of an attribute to the new ordering
*
* @param attribute the non-null attribute (from the INFO field)
* @param oldToNewIndexOrdering the mapping from new to old ordering
* @return non-null Object attribute
*/
private Object getReorderedAttributes(final Object attribute, final int[] oldToNewIndexOrdering) {
// if the ordering is the same, then just use the original attribute
if ( oldToNewIndexOrdering == null )
return attribute;
// break the original attributes into separate tokens; unfortunately, this means being smart about class types
final Object[] tokens;
if ( attribute.getClass().isArray() )
tokens = (Object[])attribute;
else if ( List.class.isAssignableFrom(attribute.getClass()) )
tokens = ((List)attribute).toArray();
else
tokens = attribute.toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
final List<Object> result = new ArrayList<>();
for ( final int index : oldToNewIndexOrdering ) {
if ( index >= tokens.length )
throw new IllegalArgumentException("the old attribute has an incorrect number of elements: " + attribute);
result.add(tokens[index]);
}
return result;
}
} }