From 0d97394c4ffe91a46f47529736cc87b271ca4d89 Mon Sep 17 00:00:00 2001 From: ebanks Date: Mon, 25 Oct 2010 20:03:03 +0000 Subject: [PATCH] Add capability to liftover to do the right thing when sections of the genome are reverse complemented. This does not work for indels (we don't try to reverse complement) because we need to figure out what the hell to do about the fact that the 'base to the left' that we automatically add on will be wrong because the location of the indel actually changes when reverse complemented. Sheesh. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4569 348d0f76-0448-11de-a6fe-93d51630548a --- .../variantcontext/VariantContextUtils.java | 35 +++++++++++++++++++ .../variantutils/LiftoverVariants.java | 6 ++++ 2 files changed, 41 insertions(+) diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index ea220d926..d32c840db 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -675,6 +675,41 @@ public class VariantContextUtils { return new VariantContext(vc.getName(), loc.getContig(), loc.getStart(), loc.getStop(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes()); } + /** + * Returns a context identical to this with the REF and ALT alleles reverse complemented. + * + * @param vc variant context + * @return new vc + */ + public static VariantContext reverseComplement(VariantContext vc) { + // create a mapping from original allele to reverse complemented allele + HashMap alleleMap = new HashMap(vc.getAlleles().size()); + for ( Allele originalAllele : vc.getAlleles() ) { + Allele newAllele; + if ( originalAllele.isNoCall() || originalAllele.isNull() ) + newAllele = originalAllele; + else + newAllele = Allele.create(BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference()); + alleleMap.put(originalAllele, newAllele); + } + + // create new Genotype objects + Map newGenotypes = new HashMap(vc.getNSamples()); + for ( Map.Entry genotype : vc.getGenotypes().entrySet() ) { + List newAlleles = new ArrayList(); + for ( Allele allele : genotype.getValue().getAlleles() ) { + Allele newAllele = alleleMap.get(allele); + if ( newAllele == null ) + newAllele = Allele.NO_CALL; + newAlleles.add(newAllele); + } + newGenotypes.put(genotype.getKey(), Genotype.modifyAlleles(genotype.getValue(), newAlleles)); + } + + return new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd(), alleleMap.values(), newGenotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes()); + + } + public static VariantContext purgeUnallowedGenotypeAttributes(VariantContext vc, Set allowedAttributes) { if ( allowedAttributes == null ) return vc; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index bf0084fd0..b78001994 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -88,6 +88,12 @@ public class LiftoverVariants extends RodWalker { final Interval toInterval = liftOver.liftOver(fromInterval); if ( toInterval != null ) { + // check whether the strand flips, and if so reverse complement everything + // TODO -- make this work for indels (difficult because the 'previous base' context needed will be changing based on indel type/size) + if ( fromInterval.isPositiveStrand() != toInterval.isPositiveStrand() && (vc.isSNP() || !vc.isVariant()) ) { + vc = VariantContextUtils.reverseComplement(vc); + } + vc = VariantContextUtils.modifyLocation(vc, GenomeLocParser.createPotentiallyInvalidGenomeLoc(toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length)); writer.add(vc, ref.getBase()); successfulIntervals++;