Merge pull request #1248 from broadinstitute/rhl_keep_gt
LeftAlignAndTrimVariants --splitMultiallelics keeps GT if valid
This commit is contained in:
commit
0c3c775566
|
|
@ -136,9 +136,8 @@ public class LeftAlignAndTrimVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LeftAlignAndTrimVariants -o %s -R " + b37KGReference + " --variant:vcf " + privateTestDir + "forHardLeftAlignVariantsTest.vcf --no_cmdline_in_header -split",
|
||||
1,
|
||||
Arrays.asList("534bea653d4a0e59e74f4107c1768558"));
|
||||
Arrays.asList("1158324223c312e4767fcefe9dde2fe1"));
|
||||
executeTest("test left alignment with hard multiple alleles", spec);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -146,8 +145,34 @@ public class LeftAlignAndTrimVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LeftAlignAndTrimVariants -o %s -R " + b37KGReference + " --variant:vcf " + privateTestDir + "forHardLeftAlignVariantsTest.vcf --dontTrimAlleles --no_cmdline_in_header -split",
|
||||
1,
|
||||
Arrays.asList("189b8136ee62b54bf7b227e99c892440"));
|
||||
Arrays.asList("923cb1d06e2d0d9a98cda8f8f637d108"));
|
||||
executeTest("test left alignment with hard multiple alleles, don't trim", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLeftAlignmentWithMultialleliecGenotypes() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LeftAlignAndTrimVariants -o %s -R " + b37KGReference + " --variant:vcf " + privateTestDir + "multiallele-gt.vcf --no_cmdline_in_header -split",
|
||||
1,
|
||||
Arrays.asList("f7be485b0cc7b8db75b7139f31c0708d"));
|
||||
executeTest("test left alignment of multiple alleles with genoptypes", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLeftAlignmentWithMultialleliecGenotypesHetNoRef() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LeftAlignAndTrimVariants -o %s -R " + b37KGReference + " --variant:vcf " + privateTestDir + "multiallele-gt-het-noref.vcf --no_cmdline_in_header -split",
|
||||
1,
|
||||
Arrays.asList("cd686641ab7fe491a0acc7ff07535192"));
|
||||
executeTest("test left alignment of multiple alleles with genoptypes, including het-noref", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLeftAlignmentWithMultialleliecGenotypesKeepOriginalAC() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T LeftAlignAndTrimVariants -o %s -R " + b37KGReference + " --variant:vcf " + privateTestDir + "multiallele-gt.vcf --no_cmdline_in_header -split -keepOriginalAC",
|
||||
1,
|
||||
Arrays.asList("5da4ca9705fbb63446c1d317f7b6cae0"));
|
||||
executeTest("test left alignment of multiple alleles with genoptypes, keep original AC", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,6 +44,9 @@ import org.broadinstitute.gatk.engine.walkers.Window;
|
|||
import org.broadinstitute.gatk.engine.SampleUtils;
|
||||
import org.broadinstitute.gatk.utils.collections.Pair;
|
||||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.ChromosomeCountConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVCFHeaderLines;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import htsjdk.variant.vcf.VCFHeader;
|
||||
import htsjdk.variant.vcf.VCFHeaderLine;
|
||||
|
|
@ -118,7 +121,7 @@ import java.util.*;
|
|||
* --splitMultiallelics
|
||||
* </pre>
|
||||
*
|
||||
* <h4>Split multiallelics into biallics, left align but don't trim alleles</h4>
|
||||
* <h4>Split multiallelics into biallics, left align but don't trim alleles, and store the original AC, AF, and AN values</h4>
|
||||
* <pre>
|
||||
* java -jar GenomeAnalysisTK.jar \
|
||||
* -T LeftAlignAndTrimVariants \
|
||||
|
|
@ -127,6 +130,7 @@ import java.util.*;
|
|||
* -o output.vcf \
|
||||
* --splitMultiallelics \
|
||||
* --dontTrimAlleles
|
||||
* --keepOriginalAC
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
|
|
@ -153,6 +157,14 @@ public class LeftAlignAndTrimVariants extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="splitMultiallelics", shortName="split", doc="Split multiallelic records and left-align individual alleles", required=false)
|
||||
protected boolean splitMultiallelics = false;
|
||||
|
||||
/**
|
||||
* When subsetting a callset, this tool recalculates the AC, AF, and AN values corresponding to the contents of the
|
||||
* subset. If this flag is enabled, the original values of those annotations will be stored in new annotations called
|
||||
* AC_Orig, AF_Orig, and AN_Orig.
|
||||
*/
|
||||
@Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Store the original AC, AF, and AN values after subsetting", required=false)
|
||||
private boolean keepOriginalChrCounts = false;
|
||||
|
||||
|
||||
@Output(doc="File to which variants should be written")
|
||||
protected VariantContextWriter baseWriter = null;
|
||||
|
|
@ -165,11 +177,21 @@ public class LeftAlignAndTrimVariants extends RodWalker<Integer, Integer> {
|
|||
private int referenceWindowStop;
|
||||
|
||||
public void initialize() {
|
||||
String trackName = variantCollection.variants.getName();
|
||||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
|
||||
Map<String, VCFHeader> vcfHeaders = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
|
||||
final String trackName = variantCollection.variants.getName();
|
||||
final Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
|
||||
final Map<String, VCFHeader> vcfHeaders = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
|
||||
|
||||
final Set<VCFHeaderLine> headerLines = new HashSet<>();
|
||||
headerLines.addAll(vcfHeaders.get(trackName).getMetaDataInSortedOrder()) ;
|
||||
if ( splitMultiallelics )
|
||||
headerLines.addAll(Arrays.asList(ChromosomeCountConstants.descriptions));
|
||||
|
||||
if (keepOriginalChrCounts) {
|
||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ORIGINAL_AC_KEY));
|
||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ORIGINAL_AF_KEY));
|
||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ORIGINAL_AN_KEY));
|
||||
}
|
||||
|
||||
Set<VCFHeaderLine> headerLines = vcfHeaders.get(trackName).getMetaDataInSortedOrder();
|
||||
baseWriter.writeHeader(new VCFHeader(headerLines, samples));
|
||||
|
||||
writer = VariantContextWriterFactory.sortOnTheFly(baseWriter, MAX_INDEL_LENGTH);
|
||||
|
|
@ -187,7 +209,8 @@ public class LeftAlignAndTrimVariants extends RodWalker<Integer, Integer> {
|
|||
for ( final VariantContext vc : VCs ) {
|
||||
// split first into biallelics, and optionally don't trim alleles to minimal representation
|
||||
if (splitMultiallelics) {
|
||||
final List<VariantContext> vcList = GATKVariantContextUtils.splitVariantContextToBiallelics(vc);
|
||||
final List<VariantContext> vcList = GATKVariantContextUtils.splitVariantContextToBiallelics(vc, false,
|
||||
GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, keepOriginalChrCounts);
|
||||
for (final VariantContext biallelicVC: vcList) {
|
||||
changedSites += trimAlignWrite(biallelicVC, ref, vcList.size());
|
||||
}
|
||||
|
|
@ -237,10 +260,6 @@ public class LeftAlignAndTrimVariants extends RodWalker<Integer, Integer> {
|
|||
// align the VC
|
||||
final Pair<VariantContext,Integer> result = alignAndWrite(v, ref);
|
||||
|
||||
// strip out PLs and AD if we've subsetted the alleles
|
||||
if ( numBiallelics > 1 )
|
||||
result.first = new VariantContextBuilder(result.first).genotypes(GATKVariantContextUtils.stripPLsAndAD(result.first.getGenotypes())).make();
|
||||
|
||||
// write out new VC
|
||||
writer.add(result.first);
|
||||
|
||||
|
|
@ -301,7 +320,6 @@ public class LeftAlignAndTrimVariants extends RodWalker<Integer, Integer> {
|
|||
if ( !newCigar.equals(originalCigar) && newCigar.numCigarElements() > 1 ) {
|
||||
int difference = originalIndex - newCigar.getCigarElement(0).getLength();
|
||||
VariantContext newVC = new VariantContextBuilder(vc).start(vc.getStart()-difference).stop(vc.getEnd()-difference).make();
|
||||
//System.out.println("Moving record from " + vc.getChr()+":"+vc.getStart() + " to " + vc.getChr()+":"+(vc.getStart()-difference));
|
||||
|
||||
final int indelIndex = originalIndex-difference;
|
||||
final byte[] newBases = new byte[indelLength + 1];
|
||||
|
|
@ -323,7 +341,7 @@ public class LeftAlignAndTrimVariants extends RodWalker<Integer, Integer> {
|
|||
* @param ref Ref bases
|
||||
* @param indexOfRef Index in ref where to create indel
|
||||
* @param indelLength Indel length
|
||||
* @return
|
||||
* @return Haplotype, containing the reference and the indel
|
||||
*/
|
||||
@Requires({"vc != null","ref != null", "indexOfRef +indelLength < ref.length", "vc.getNAlleles() == 2"})
|
||||
@Ensures("result != null")
|
||||
|
|
|
|||
|
|
@ -31,6 +31,8 @@ import htsjdk.tribble.TribbleException;
|
|||
import htsjdk.tribble.util.popgen.HardyWeinbergCalculation;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
import htsjdk.variant.vcf.VCFConstants;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineCount;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.utils.*;
|
||||
|
|
@ -558,6 +560,12 @@ public class GATKVariantContextUtils {
|
|||
*/
|
||||
SET_TO_NO_CALL,
|
||||
|
||||
/**
|
||||
* set all of the genotype GT values to NO_CALL and remove annotations
|
||||
*/
|
||||
SET_TO_NO_CALL_NO_ANNOTATIONS,
|
||||
|
||||
|
||||
/**
|
||||
* Use the subsetted PLs to greedily assigned genotypes
|
||||
*/
|
||||
|
|
@ -589,7 +597,7 @@ public class GATKVariantContextUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
|
||||
* Subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
|
||||
*
|
||||
* @param vc variant context with genotype likelihoods
|
||||
* @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC ***
|
||||
|
|
@ -601,6 +609,7 @@ public class GATKVariantContextUtils {
|
|||
final GenotypeAssignmentMethod assignGenotypes) {
|
||||
if ( vc == null ) throw new IllegalArgumentException("the VariantContext cannot be null");
|
||||
if ( allelesToUse == null ) throw new IllegalArgumentException("the alleles to use cannot be null");
|
||||
if ( allelesToUse.isEmpty() ) throw new IllegalArgumentException("must have alleles to use");
|
||||
if ( allelesToUse.get(0).isNonReference() ) throw new IllegalArgumentException("First allele must be the reference allele");
|
||||
if ( allelesToUse.size() == 1 ) throw new IllegalArgumentException("Cannot subset to only 1 alt allele");
|
||||
|
||||
|
|
@ -896,6 +905,10 @@ public class GATKVariantContextUtils {
|
|||
final GenotypeAssignmentMethod assignmentMethod,
|
||||
final double[] newLikelihoods,
|
||||
final List<Allele> allelesToUse) {
|
||||
if ( originalGT == null ) throw new IllegalArgumentException("originalGT cannot be null");
|
||||
if ( gb == null ) throw new IllegalArgumentException("gb cannot be null");
|
||||
if ( allelesToUse.isEmpty() || allelesToUse == null ) throw new IllegalArgumentException("allelesToUse cannot be empty or null");
|
||||
|
||||
switch ( assignmentMethod ) {
|
||||
case DO_NOT_ASSIGN_GENOTYPES:
|
||||
break;
|
||||
|
|
@ -903,6 +916,13 @@ public class GATKVariantContextUtils {
|
|||
gb.alleles(NO_CALL_ALLELES);
|
||||
gb.noGQ();
|
||||
break;
|
||||
case SET_TO_NO_CALL_NO_ANNOTATIONS:
|
||||
gb.alleles(NO_CALL_ALLELES);
|
||||
gb.noGQ();
|
||||
gb.noAD();
|
||||
gb.noPL();
|
||||
gb.noAttributes();
|
||||
break;
|
||||
case USE_PLS_TO_ASSIGN:
|
||||
if ( newLikelihoods == null || likelihoodsAreUninformative(newLikelihoods) ) {
|
||||
// if there is no mass on the (new) likelihoods, then just no-call the sample
|
||||
|
|
@ -918,12 +938,10 @@ public class GATKVariantContextUtils {
|
|||
break;
|
||||
case BEST_MATCH_TO_ORIGINAL:
|
||||
final List<Allele> best = new LinkedList<>();
|
||||
final Allele ref = allelesToUse.get(0); // WARNING -- should be checked in input argument
|
||||
final Allele ref = allelesToUse.get(0);
|
||||
for ( final Allele originalAllele : originalGT ) {
|
||||
best.add(allelesToUse.contains(originalAllele) ? originalAllele : ref);
|
||||
}
|
||||
gb.noGQ();
|
||||
gb.noPL();
|
||||
gb.alleles(best);
|
||||
break;
|
||||
}
|
||||
|
|
@ -970,7 +988,7 @@ public class GATKVariantContextUtils {
|
|||
/**
|
||||
* Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs
|
||||
*
|
||||
* @param vc variant context with genotype likelihoods
|
||||
* @param vc variant context with genotype likelihoods
|
||||
* @return genotypes context
|
||||
*/
|
||||
public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) {
|
||||
|
|
@ -999,7 +1017,6 @@ public class GATKVariantContextUtils {
|
|||
* Split variant context into its biallelic components if there are more than 2 alleles
|
||||
*
|
||||
* For VC has A/B/C alleles, returns A/B and A/C contexts.
|
||||
* Genotypes are all no-calls now (it's not possible to fix them easily)
|
||||
* Alleles are right trimmed to satisfy VCF conventions
|
||||
*
|
||||
* If vc is biallelic or non-variant it is just returned
|
||||
|
|
@ -1008,21 +1025,63 @@ public class GATKVariantContextUtils {
|
|||
*
|
||||
* @param vc a potentially multi-allelic variant context
|
||||
* @param trimLeft if true, we will also left trim alleles, potentially moving the resulting vcs forward on the genome
|
||||
* @param genotypeAssignmentMethod assignment strategy for the (subsetted) PLs
|
||||
* @return a list of bi-allelic (or monomorphic) variant context
|
||||
*/
|
||||
public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc, final boolean trimLeft, final GenotypeAssignmentMethod genotypeAssignmentMethod) {
|
||||
public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc, final boolean trimLeft, final GenotypeAssignmentMethod genotypeAssignmentMethod){
|
||||
return splitVariantContextToBiallelics(vc, trimLeft,genotypeAssignmentMethod, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split variant context into its biallelic components if there are more than 2 alleles
|
||||
*
|
||||
* For VC has A/B/C alleles, returns A/B and A/C contexts.
|
||||
* Alleles are right trimmed to satisfy VCF conventions
|
||||
*
|
||||
* If vc is biallelic or non-variant it is just returned
|
||||
*
|
||||
* Chromosome counts are updated (but they are by definition 0)
|
||||
*
|
||||
* @param vc a potentially multi-allelic variant context
|
||||
* @param trimLeft if true, we will also left trim alleles, potentially moving the resulting vcs forward on the genome
|
||||
* @param genotypeAssignmentMethod assignment strategy for the (subsetted) PLs
|
||||
* @param keepOriginalChrCounts keep the orignal chromosome counts before subsetting
|
||||
* @return a list of bi-allelic (or monomorphic) variant context
|
||||
*/
|
||||
public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc, final boolean trimLeft, final GenotypeAssignmentMethod genotypeAssignmentMethod,
|
||||
final boolean keepOriginalChrCounts) {
|
||||
if ( vc == null ) throw new IllegalArgumentException("vc cannot be null");
|
||||
|
||||
if ( ! vc.isVariant() || vc.isBiallelic() )
|
||||
// non variant or biallelics already satisfy the contract
|
||||
return Collections.singletonList(vc);
|
||||
else {
|
||||
final List<VariantContext> biallelics = new LinkedList<>();
|
||||
|
||||
// if any of the genotypes ar ehet-not-ref (i.e. 1/2), set all of them to no-call
|
||||
final GenotypeAssignmentMethod genotypeAssignmentMethodUsed = hasHetNonRef(vc.getGenotypes()) ? GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL_NO_ANNOTATIONS : genotypeAssignmentMethod;
|
||||
|
||||
for ( final Allele alt : vc.getAlternateAlleles() ) {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
|
||||
// make biallelic alleles
|
||||
final List<Allele> alleles = Arrays.asList(vc.getReference(), alt);
|
||||
builder.alleles(alleles);
|
||||
builder.genotypes(subsetDiploidAlleles(vc, alleles, genotypeAssignmentMethod));
|
||||
VariantContextUtils.calculateChromosomeCounts(builder, true);
|
||||
|
||||
// since the VC has been subset, remove the invalid attributes
|
||||
for ( final String key : vc.getAttributes().keySet() ) {
|
||||
if ( !(key.equals(VCFConstants.ALLELE_COUNT_KEY) || key.equals(VCFConstants.ALLELE_FREQUENCY_KEY) || key.equals(VCFConstants.ALLELE_NUMBER_KEY)) ||
|
||||
genotypeAssignmentMethodUsed == GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL_NO_ANNOTATIONS ) {
|
||||
builder.rmAttribute(key);
|
||||
}
|
||||
}
|
||||
|
||||
// subset INFO field annotations if available if genotype is called
|
||||
if ( genotypeAssignmentMethodUsed != GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL_NO_ANNOTATIONS &&
|
||||
genotypeAssignmentMethodUsed != GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL )
|
||||
addInfoFiledAnnotations(vc, builder, alt, keepOriginalChrCounts);
|
||||
|
||||
builder.genotypes(subsetDiploidAlleles(vc, alleles, genotypeAssignmentMethodUsed));
|
||||
final VariantContext trimmed = trimAlleles(builder.make(), trimLeft, true);
|
||||
biallelics.add(trimmed);
|
||||
}
|
||||
|
|
@ -1031,6 +1090,21 @@ public class GATKVariantContextUtils {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if any of the genotypes is heterozygous, non-reference (i.e. 1/2)
|
||||
*
|
||||
* @param genotypesContext genotype information
|
||||
* @return true if any of the genotypes are heterozygous, non-reference, false otherwise
|
||||
*/
|
||||
private static boolean hasHetNonRef(final GenotypesContext genotypesContext ){
|
||||
for (final Genotype gt : genotypesContext ) {
|
||||
if (gt.isHetNonRef())
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
public static Genotype removePLsAndAD(final Genotype g) {
|
||||
return ( g.hasLikelihoods() || g.hasAD() ) ? new GenotypeBuilder(g).noPL().noAD().make() : g;
|
||||
}
|
||||
|
|
@ -1336,7 +1410,7 @@ public class GATKVariantContextUtils {
|
|||
*
|
||||
* @param originalGs the original GenotypesContext
|
||||
* @param originalVC the original VariantContext
|
||||
* @param allelesToUse the new (sub)set of alleles to use
|
||||
* @param allelesToUse the new subset of alleles to use
|
||||
* @return a new non-null GenotypesContext
|
||||
*/
|
||||
static private GenotypesContext fixDiploidGenotypesFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List<Allele> allelesToUse) {
|
||||
|
|
@ -2121,4 +2195,91 @@ public class GATKVariantContextUtils {
|
|||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the VCF INFO field annotations for the used alleles
|
||||
*
|
||||
* @param vc original variant context
|
||||
* @param builder variant context builder with subset of original variant context's alleles
|
||||
* @param altAllele alternate allele
|
||||
* @param keepOriginalChrCounts keep the orignal chromosome counts before subsetting
|
||||
* @return variant context builder with updated INFO field attribute values
|
||||
*/
|
||||
private static void addInfoFiledAnnotations(final VariantContext vc, final VariantContextBuilder builder, final Allele altAllele,
|
||||
final boolean keepOriginalChrCounts){
|
||||
|
||||
if (vc == null) throw new IllegalArgumentException("the variant context cannot be null");
|
||||
if (builder == null) throw new IllegalArgumentException("the variant context builder cannot be null");
|
||||
if (builder.getAlleles() == null) throw new IllegalArgumentException("the variant context builder alleles cannot be null");
|
||||
|
||||
final List<Allele> alleles = builder.getAlleles();
|
||||
if (alleles.size() < 2) throw new IllegalArgumentException("the variant context builder must contain at least 2 alleles");
|
||||
|
||||
// don't have to subset, the original vc has the same number and hence, the same alleles
|
||||
boolean keepOriginal = ( vc.getAlleles().size() == builder.getAlleles().size() );
|
||||
|
||||
if ( keepOriginalChrCounts ) {
|
||||
if (vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY))
|
||||
builder.attribute(GATKVCFConstants.ORIGINAL_AC_KEY, keepOriginal ?
|
||||
vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY) : getAltAlleleInfoFieldValue(VCFConstants.ALLELE_COUNT_KEY, vc, altAllele));
|
||||
if (vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY))
|
||||
builder.attribute(GATKVCFConstants.ORIGINAL_AF_KEY, keepOriginal ?
|
||||
vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) : getAltAlleleInfoFieldValue(VCFConstants.ALLELE_FREQUENCY_KEY, vc, altAllele));
|
||||
if (vc.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY)) {
|
||||
builder.attribute(GATKVCFConstants.ORIGINAL_AN_KEY, vc.getAttribute(VCFConstants.ALLELE_NUMBER_KEY));
|
||||
}
|
||||
}
|
||||
|
||||
VariantContextUtils.calculateChromosomeCounts(builder, true);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the alternate allele INFO field value
|
||||
*
|
||||
* @param infoFieldName VCF INFO field name
|
||||
* @param vc variant context
|
||||
* @param altAllele the alternate allele
|
||||
* @return alternate allele INFO field value
|
||||
* @throws ReviewedGATKException if the alternate allele is part of the variant context
|
||||
*/
|
||||
private static Object getAltAlleleInfoFieldValue(final String infoFieldName, final VariantContext vc, final Allele altAllele) {
|
||||
|
||||
//final String[] splitOriginalField = vc.getAttribute(infoFieldName).toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
|
||||
final Object[] splitOriginalField = getVAttributeValues(vc.getAttribute(infoFieldName));
|
||||
|
||||
// subset the field
|
||||
final boolean[] alleleIndexesToUse = getAlleleIndexBitset(vc, Arrays.asList(altAllele));
|
||||
|
||||
// skip the first allele, which is the reference
|
||||
for (int i = 1; i < alleleIndexesToUse.length; i++) {
|
||||
if (alleleIndexesToUse[i] == true)
|
||||
return splitOriginalField[i-1];
|
||||
}
|
||||
|
||||
throw new ReviewedGATKException("Alternate allele " + altAllele.toString() + " not in Variant Context " + vc.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Pulls out the appropriate values for the INFO field attribute
|
||||
*
|
||||
* @param attribute INFO field attribute
|
||||
* @return tokenized attribute values
|
||||
*/
|
||||
private static Object[] getVAttributeValues(final Object attribute) {
|
||||
|
||||
if (attribute == null) throw new IllegalArgumentException("the attribute cannot be null");
|
||||
|
||||
// break the original attributes into separate tokens
|
||||
final Object[] tokens;
|
||||
if ( attribute.getClass().isArray() )
|
||||
tokens = (Object[])attribute;
|
||||
else if ( List.class.isAssignableFrom(attribute.getClass()) )
|
||||
tokens = ((List)attribute).toArray();
|
||||
else
|
||||
tokens = attribute.toString().split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR);
|
||||
|
||||
return tokens;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue