Add capability to liftover to do the right thing when sections of the genome are reverse complemented. This does not work for indels (we don't try to reverse complement) because we need to figure out what the hell to do about the fact that the 'base to the left' that we automatically add on will be wrong because the location of the indel actually changes when reverse complemented. Sheesh.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4569 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-10-25 20:03:03 +00:00
parent c357ec775a
commit 0d97394c4f
2 changed files with 41 additions and 0 deletions

View File

@ -675,6 +675,41 @@ public class VariantContextUtils {
return new VariantContext(vc.getName(), loc.getContig(), loc.getStart(), loc.getStop(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes());
}
/**
* Returns a context identical to this with the REF and ALT alleles reverse complemented.
*
* @param vc variant context
* @return new vc
*/
public static VariantContext reverseComplement(VariantContext vc) {
// create a mapping from original allele to reverse complemented allele
HashMap<Allele, Allele> alleleMap = new HashMap<Allele, Allele>(vc.getAlleles().size());
for ( Allele originalAllele : vc.getAlleles() ) {
Allele newAllele;
if ( originalAllele.isNoCall() || originalAllele.isNull() )
newAllele = originalAllele;
else
newAllele = Allele.create(BaseUtils.simpleReverseComplement(originalAllele.getBases()), originalAllele.isReference());
alleleMap.put(originalAllele, newAllele);
}
// create new Genotype objects
Map<String, Genotype> newGenotypes = new HashMap<String, Genotype>(vc.getNSamples());
for ( Map.Entry<String, Genotype> genotype : vc.getGenotypes().entrySet() ) {
List<Allele> newAlleles = new ArrayList<Allele>();
for ( Allele allele : genotype.getValue().getAlleles() ) {
Allele newAllele = alleleMap.get(allele);
if ( newAllele == null )
newAllele = Allele.NO_CALL;
newAlleles.add(newAllele);
}
newGenotypes.put(genotype.getKey(), Genotype.modifyAlleles(genotype.getValue(), newAlleles));
}
return new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd(), alleleMap.values(), newGenotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes());
}
public static VariantContext purgeUnallowedGenotypeAttributes(VariantContext vc, Set<String> allowedAttributes) {
if ( allowedAttributes == null )
return vc;

View File

@ -88,6 +88,12 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
final Interval toInterval = liftOver.liftOver(fromInterval);
if ( toInterval != null ) {
// check whether the strand flips, and if so reverse complement everything
// TODO -- make this work for indels (difficult because the 'previous base' context needed will be changing based on indel type/size)
if ( fromInterval.isPositiveStrand() != toInterval.isPositiveStrand() && (vc.isSNP() || !vc.isVariant()) ) {
vc = VariantContextUtils.reverseComplement(vc);
}
vc = VariantContextUtils.modifyLocation(vc, GenomeLocParser.createPotentiallyInvalidGenomeLoc(toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length));
writer.add(vc, ref.getBase());
successfulIntervals++;