From 6f5a08ddc610b7a00556c776013727ac0c968e73 Mon Sep 17 00:00:00 2001 From: droazen Date: Wed, 22 Jun 2011 22:54:44 +0000 Subject: [PATCH] Simple walker to look at SNPs near indels. Didn't need to make this a walker and commit it, but used it as an opportunity to play with GIT in unstable. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6049 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/AssessSnpsNearIndels.java | 201 ++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 java/src/org/broadinstitute/sting/oneoffprojects/walkers/AssessSnpsNearIndels.java diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AssessSnpsNearIndels.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AssessSnpsNearIndels.java new file mode 100644 index 000000000..f3baf24f7 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AssessSnpsNearIndels.java @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2010. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.oneoffprojects.walkers; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFWriter; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.iterators.DownsampleIterator; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.Reference; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.walkers.Window; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.vcf.VCFUtils; + +import java.io.PrintStream; +import java.io.Writer; +import java.security.PrivilegedActionException; +import java.util.*; + +/** + * Assesses distance of SNP calls to nearest indel call in Exomes. + * Use -B:snps,vcf and -B:indels,vcf + */ +public class AssessSnpsNearIndels extends RodWalker { + + private class TiTv { + int TiCount = 0, TvCount = 0; + + public TiTv() {}; + } + + private static final int Bin0to5 = 0; + private static final int Bin6to10 = 1; + private static final int Bin11to15 = 2; + private static final int Bin16to20 = 3; + private static final int Bin21to25 = 4; + private static final int Bin26to30 = 5; + private static final int BinMoreThan30 = 6; + + private GenomeLoc previousIndel = null; + private ArrayList snpQueue = new ArrayList(); + private TiTv[] counts = new TiTv[7]; + private GenomeLocParser GLparser = null; + + @Output(doc="File to which results should be written",required=true) + protected PrintStream out = null; + + public void initialize() { + GLparser = getToolkit().getGenomeLocParser(); + + for (int i = 0; i < 7; i++) + counts[i] = new TiTv(); + } + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null ) // RodWalkers can make funky map calls + return 0; + + VariantContext snp = tracker.getVariantContext(ref, "snps", null, ref.getLocus(), true); + VariantContext indel = tracker.getVariantContext(ref, "indels", null, ref.getLocus(), true); + + // first add the snp if available + if ( snp != null && !snp.isFiltered() ) { + + // flush the queue on a new contig + if ( !snpQueue.isEmpty() && !snpQueue.get(0).getChr().equals(snp.getChr()) ) { + for ( VariantContext vc : snpQueue ) + calculateDistance(vc, previousIndel, null); + snpQueue.clear(); + } + + snpQueue.add(snp); + } + + // then look for the indel + if ( indel != null && !indel.isFiltered() ) { + + GenomeLoc loc = GLparser.createGenomeLoc(indel.getChr(), indel.getStart(), indel.getEnd()); + boolean sameContig = !snpQueue.isEmpty() && snpQueue.get(0).getChr().equals(indel.getChr()); + + // flush the queue + for ( VariantContext vc : snpQueue ) + calculateDistance(vc, previousIndel, sameContig ? loc : null); + snpQueue.clear(); + + previousIndel = loc; + } + + return 1; + } + + private void calculateDistance(VariantContext snp, GenomeLoc previousIndel, GenomeLoc nextIndel) { + + GenomeLoc loc = GLparser.createGenomeLoc(snp.getChr(), snp.getStart(), snp.getEnd()); + + int previousDistance = -1, nextDistance = -1; + + if ( previousIndel != null ) { + // watch out for spanning deletions + if ( previousIndel.getStop() > snp.getStart() ) + previousDistance = 0; + else + previousDistance = snp.getStart() - previousIndel.getStop(); + } + + if ( nextIndel != null ) + nextDistance = nextIndel.getStart() - loc.getStart(); + + if ( previousDistance == -1 && nextDistance == -1 ) + return; + + int distance = -1; + if ( previousDistance == -1 ) + distance = nextDistance; + else if ( nextDistance == -1 ) + distance = previousDistance; + else + distance = Math.min(previousDistance, nextDistance); + + TiTv obj; + if ( distance < 0 ) + throw new IllegalStateException("Found a negative distance at " + loc); + else if ( distance < 6 ) + obj = counts[Bin0to5]; + else if ( distance < 11 ) + obj = counts[Bin6to10]; + else if ( distance < 16 ) + obj = counts[Bin11to15]; + else if ( distance < 21 ) + obj = counts[Bin16to20]; + else if ( distance < 26 ) + obj = counts[Bin21to25]; + else if ( distance < 31 ) + obj = counts[Bin26to30]; + else + obj = counts[BinMoreThan30]; + + if ( BaseUtils.isTransition(snp.getReference().getBases()[0], snp.getAlternateAllele(0).getBases()[0]) ) + obj.TiCount++; + else + obj.TvCount++; + } + + public Integer reduceInit() { + return 0; + } + + public Integer reduce(Integer counter, Integer sum) { + return counter + sum; + } + + public void onTraversalDone(Integer sum) { + // flush the queue + for ( VariantContext vc : snpQueue ) + calculateDistance(vc, previousIndel, null); + + out.println("Bin\tnumTi\tnumTv\tTi/Tv"); + printLine(counts[Bin0to5], "0to5"); + printLine(counts[Bin6to10], "6to10"); + printLine(counts[Bin11to15], "11to15"); + printLine(counts[Bin16to20], "16to20"); + printLine(counts[Bin21to25], "21to25"); + printLine(counts[Bin26to30], "26to30"); + printLine(counts[BinMoreThan30], ">30"); + } + + private void printLine(TiTv obj, String s) { + out.println(String.format("%s\t%d\t%d\t%.2f", s, obj.TiCount, obj.TvCount, ((double)obj.TiCount/(double)obj.TvCount))); + } +}