Merge pull request #703 from broadinstitute/eb_keep_original_ac_for_multiallelics
Update the --keepOriginalAC functionality in SelectVariants to work for ...
This commit is contained in:
commit
cab02f8401
|
|
@ -277,7 +277,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T SelectVariants --keepOriginalAC -env -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header",
|
"-T SelectVariants --keepOriginalAC -env -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("e9b8292212545684cdb163423329ee7e")
|
Arrays.asList("4695c99d96490ed4e5b1568c5b52dea6")
|
||||||
);
|
);
|
||||||
|
|
||||||
executeTest("testKeepOriginalACAndENV--" + testFile, spec);
|
executeTest("testKeepOriginalACAndENV--" + testFile, spec);
|
||||||
|
|
|
||||||
|
|
@ -701,7 +701,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
|
|
||||||
builder.genotypes(newGC);
|
builder.genotypes(newGC);
|
||||||
|
|
||||||
addAnnotations(builder, sub, vc.getAlleles().size() != sub.getAlleles().size());
|
addAnnotations(builder, vc, sub.getSampleNames());
|
||||||
|
|
||||||
return builder.make();
|
return builder.make();
|
||||||
}
|
}
|
||||||
|
|
@ -710,17 +710,42 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
* Add annotations to the new VC
|
* Add annotations to the new VC
|
||||||
*
|
*
|
||||||
* @param builder the new VC to annotate
|
* @param builder the new VC to annotate
|
||||||
* @param originalVC the original -- but post-selection -- VC
|
* @param originalVC the original VC
|
||||||
* @param lostAllelesInSelection true if the original (pre-selection) VC has more alleles than the new one
|
* @param selectedSampleNames the post-selection list of sample names
|
||||||
*/
|
*/
|
||||||
private void addAnnotations(final VariantContextBuilder builder, final VariantContext originalVC, final boolean lostAllelesInSelection) {
|
private void addAnnotations(final VariantContextBuilder builder, final VariantContext originalVC, final Set<String> selectedSampleNames) {
|
||||||
if ( fullyDecode ) return; // TODO -- annotations are broken with fully decoded data
|
if ( fullyDecode ) return; // TODO -- annotations are broken with fully decoded data
|
||||||
|
|
||||||
if ( KEEP_ORIGINAL_CHR_COUNTS && !lostAllelesInSelection ) {
|
if ( KEEP_ORIGINAL_CHR_COUNTS ) {
|
||||||
|
final int[] indexOfOriginalAlleleForNewAllele;
|
||||||
|
final List<Allele> newAlleles = builder.getAlleles();
|
||||||
|
final int numOriginalAlleles = originalVC.getNAlleles();
|
||||||
|
|
||||||
|
// if the alleles already match up, we can just copy the previous list of counts
|
||||||
|
if ( numOriginalAlleles == newAlleles.size() ) {
|
||||||
|
indexOfOriginalAlleleForNewAllele = null;
|
||||||
|
}
|
||||||
|
// otherwise we need to parse them and select out the correct ones
|
||||||
|
else {
|
||||||
|
indexOfOriginalAlleleForNewAllele = new int[newAlleles.size() - 1];
|
||||||
|
Arrays.fill(indexOfOriginalAlleleForNewAllele, -1);
|
||||||
|
|
||||||
|
// note that we don't care about the reference allele at position 0
|
||||||
|
for ( int newI = 1; newI < newAlleles.size(); newI++ ) {
|
||||||
|
final Allele newAlt = newAlleles.get(newI);
|
||||||
|
for ( int oldI = 0; oldI < numOriginalAlleles - 1; oldI++ ) {
|
||||||
|
if ( newAlt.equals(originalVC.getAlternateAllele(oldI), false) ) {
|
||||||
|
indexOfOriginalAlleleForNewAllele[newI - 1] = oldI;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ( originalVC.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) )
|
if ( originalVC.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) )
|
||||||
builder.attribute("AC_Orig", originalVC.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
|
builder.attribute("AC_Orig", getReorderedAttributes(originalVC.getAttribute(VCFConstants.ALLELE_COUNT_KEY), indexOfOriginalAlleleForNewAllele));
|
||||||
if ( originalVC.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) )
|
if ( originalVC.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) )
|
||||||
builder.attribute("AF_Orig", originalVC.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY));
|
builder.attribute("AF_Orig", getReorderedAttributes(originalVC.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY), indexOfOriginalAlleleForNewAllele));
|
||||||
if ( originalVC.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) )
|
if ( originalVC.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) )
|
||||||
builder.attribute("AN_Orig", originalVC.getAttribute(VCFConstants.ALLELE_NUMBER_KEY));
|
builder.attribute("AN_Orig", originalVC.getAttribute(VCFConstants.ALLELE_NUMBER_KEY));
|
||||||
}
|
}
|
||||||
|
|
@ -729,7 +754,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
|
|
||||||
boolean sawDP = false;
|
boolean sawDP = false;
|
||||||
int depth = 0;
|
int depth = 0;
|
||||||
for (String sample : originalVC.getSampleNames()) {
|
for ( final String sample : selectedSampleNames ) {
|
||||||
Genotype g = originalVC.getGenotype(sample);
|
Genotype g = originalVC.getGenotype(sample);
|
||||||
|
|
||||||
if ( ! g.isFiltered() ) {
|
if ( ! g.isFiltered() ) {
|
||||||
|
|
@ -743,4 +768,34 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
if ( sawDP )
|
if ( sawDP )
|
||||||
builder.attribute("DP", depth);
|
builder.attribute("DP", depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pulls out the appropriate tokens from the old ordering of an attribute to the new ordering
|
||||||
|
*
|
||||||
|
* @param attribute the non-null attribute (from the INFO field)
|
||||||
|
* @param oldToNewIndexOrdering the mapping from new to old ordering
|
||||||
|
* @return non-null Object attribute
|
||||||
|
*/
|
||||||
|
private Object getReorderedAttributes(final Object attribute, final int[] oldToNewIndexOrdering) {
|
||||||
|
// if the ordering is the same, then just use the original attribute
|
||||||
|
if ( oldToNewIndexOrdering == null )
|
||||||
|
return attribute;
|
||||||
|
|
||||||
|
// break the original attributes into separate tokens; unfortunately, this means being smart about class types
|
||||||
|
final Object[] tokens;
|
||||||
|
if ( attribute.getClass().isArray() )
|
||||||
|
tokens = (Object[])attribute;
|
||||||
|
else if ( List.class.isAssignableFrom(attribute.getClass()) )
|
||||||
|
tokens = ((List)attribute).toArray();
|
||||||
|
else
|
||||||
|
tokens = attribute.toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
|
||||||
|
|
||||||
|
final List<Object> result = new ArrayList<>();
|
||||||
|
for ( final int index : oldToNewIndexOrdering ) {
|
||||||
|
if ( index >= tokens.length )
|
||||||
|
throw new IllegalArgumentException("the old attribute has an incorrect number of elements: " + attribute);
|
||||||
|
result.add(tokens[index]);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue