From b492192838bd37af4462b94160fd788eedd97253 Mon Sep 17 00:00:00 2001 From: depristo Date: Thu, 4 Jun 2009 00:11:29 +0000 Subject: [PATCH] Pairwise SNP distance metrics now enabled git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@892 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/refdata/TabularROD.java | 5 +- .../varianteval/VariantEvalWalker.java | 64 +++++++++++++++++-- 2 files changed, 61 insertions(+), 8 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/TabularROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/TabularROD.java index 48a844ab9..6de6928fd 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/TabularROD.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/TabularROD.java @@ -55,6 +55,9 @@ public class TabularROD extends BasicReferenceOrderedDatum implements Map { public boolean printVariants = false; int nBasesCovered = 0; + int nSNPs = 0; VariantDBCoverage dbSNPStats = new VariantDBCoverage("dbSNP"); - int N_TRANSITION_TRANVERSION_BINS = 20; + int N_TRANSITION_TRANVERSION_BINS = 100; Histogram transitions; Histogram transversions; + + @Argument(shortName="outputFilenameBase", doc="", required=false) + String outputFilenameBase = null; + + ArrayList pairWiseDistances; + int[] pairWiseBoundries = {1, 10, 100, 1000, 10000, 1000000000}; + AllelicVariant lastVariant = null; + //EnumMap transition_transversion_counts[]; public void initialize() { @@ -39,6 +51,7 @@ public class VariantEvalWalker extends RefWalker { //transversions = new Histogram(N_TRANSITION_TRANVERSION_BINS, Math.log10(1e-10), 0.0, 0); transitions = new Histogram(N_TRANSITION_TRANVERSION_BINS, 0.0, 1.0, 0); transversions = new Histogram(N_TRANSITION_TRANVERSION_BINS, 0.0, 1.0, 0); + pairWiseDistances = new ArrayList(); } //private int transition_transversion_bin(double het) { @@ -66,11 +79,20 @@ public class VariantEvalWalker extends RefWalker { matchFlag, eval); } + if ( eval != null ) { + //System.out.printf("GREP ME%n"); + if ( ! eval.isSNP() || eval.getVariationConfidence() < minDiscoveryQ ) + System.out.printf("We have a problem at %s: %b %f%n", eval, eval.isSNP(), eval.getVariationConfidence()); + } + if ( eval != null && eval.isSNP() && eval.getVariationConfidence() >= minDiscoveryQ ) { //System.out.printf("%s has: %nDBSNP: %s%nEVAL:%s%n", context.getLocation(), dbsnp, eval); + //System.out.printf("N snps %d = %s%n", ++nSNPs, eval.getLocation()); updateTransitionTransversion(eval, ref, context); + if (lastVariant != null) updatePairwiseDistances(eval, lastVariant); + lastVariant = eval; //updateHapMapRate(dbsnp, eval, ref, context); } updateVariantDBCoverage(dbsnp, eval, ref, context); @@ -78,6 +100,16 @@ public class VariantEvalWalker extends RefWalker { return 1; } + private void updatePairwiseDistances(AllelicVariant eval, AllelicVariant lastVariant) { + GenomeLoc eL = eval.getLocation(); + GenomeLoc lvL = lastVariant.getLocation(); + if (eL.getContigIndex() == lvL.getContigIndex()) { + long d = eL.distance(lvL); + pairWiseDistances.add(d); + out.printf("# Pairwise-distance %d %s %s%n", d, eL, lvL); + } + } + private void updateTransitionTransversion(AllelicVariant dbsnp, char ref, LocusContext context) { char refBase = dbsnp.getRefSnpFWD(); char altBase = dbsnp.getAltSnpFWD(); @@ -127,12 +159,10 @@ public class VariantEvalWalker extends RefWalker { double ratio = (float)sit/ver; s.append(String.format("%.2f %d %d %f%n", avHet, sit, ver, ratio)); } - long nSites = nTransitions + nTransversions; - out.printf("# n bases covered: %d%n", nBasesCovered); - out.printf("# sites: %d%n", nSites); - out.printf("# variant rate: %.5f confident variants per base%n", nSites / (1.0 * nBasesCovered)); - out.printf("# variant rate: 1 / %d confident variants per base%n", nBasesCovered / nSites); + out.printf("# sites: %d%n", nSNPs); + out.printf("# variant rate: %.5f confident variants per base%n", nSNPs / (1.0 * nBasesCovered)); + out.printf("# variant rate: 1 / %d confident variants per base%n", nBasesCovered / nSNPs); out.printf("# transitions: %d%n", nTransitions); out.printf("# transversions: %d%n", nTransversions); out.printf("# ratio: %.2f%n", nTransitions / (1.0 * nTransversions)); @@ -140,7 +170,27 @@ public class VariantEvalWalker extends RefWalker { // dbSNP stats out.println(dbSNPStats.toSingleLineString("#")); out.print(dbSNPStats.toMultiLineString("#")); - + + int[] pairCounts = new int[pairWiseBoundries.length]; + Arrays.fill(pairCounts, 0); + for ( long dist : pairWiseDistances ) { + boolean done = false; + for ( int i = 0; i < pairWiseBoundries.length && ! done ; i++ ) { + int maxDist = pairWiseBoundries[i]; + if ( dist < maxDist ) { + pairCounts[i]++; + done = true; + } + } + } + + out.printf("# snps counted for pairwise distance: %d%n", pairWiseDistances.size()); + for ( int i = 0; i < pairWiseBoundries.length; i++ ) { + int maxDist = pairWiseBoundries[i]; + int count = pairCounts[i]; + out.printf("# snps within %d bp: %d%n", maxDist, count); + } + out.print(s); } }