From 84d407ff3fef14b21eba7ea98714311898812434 Mon Sep 17 00:00:00 2001 From: depristo Date: Tue, 14 Jul 2009 18:53:27 +0000 Subject: [PATCH] Fixing odd merge problem with VariantEval -- better cluster analysis (no cumsum), rodVariant is now an AllelicVariant git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1239 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/refdata/rodVariants.java | 29 ++++++++++++++++++- .../varianteval/ClusterCounterAnalysis.java | 6 ++-- .../varianteval/VariantEvalWalker.java | 8 ----- python/Gelis2PopSNPs.py | 25 ++++++++-------- 4 files changed, 43 insertions(+), 25 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java index decc9a4b2..3c39f11ad 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java @@ -4,6 +4,8 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import java.io.IOException; +import java.util.List; +import java.util.Arrays; /* * Copyright (c) 2009 The Broad Institute @@ -30,7 +32,7 @@ import java.io.IOException; * OTHER DEALINGS IN THE SOFTWARE. */ -public class rodVariants extends BasicReferenceOrderedDatum { +public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVariant { private enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT } private GenomeLoc loc; @@ -140,4 +142,29 @@ public class rodVariants extends BasicReferenceOrderedDatum { this.lodBtr = (float) (bestLikelihood - refLikelihood); this.lodBtnb = (float) (bestLikelihood - nextBestLikelihood); } + + public String getRefBasesFWD() { + char[] b = { getReferenceBase() }; + return new String( b ); + } + + public char getRefSnpFWD() throws IllegalStateException { return 0; } + public String getAltBasesFWD() { return null; } + public char getAltSnpFWD() throws IllegalStateException { return 0; } + public boolean isReference() { return ! isSNP(); } + public boolean isSNP() { return getLodBtr() > 5; } + public boolean isInsertion() { return false; } + public boolean isDeletion() { return false; } + public boolean isIndel() { return false; } + public double getMAF() { return 0; } + public double getHeterozygosity() { return 0; } + public boolean isGenotype() { return true; } + public double getVariationConfidence() { return getLodBtr(); } + public double getConsensusConfidence() { return getLodBtnb(); } + public List getGenotype() throws IllegalStateException { + return Arrays.asList(getBestGenotype()); + } + + public int getPloidy() throws IllegalStateException { return 2; } + public boolean isBiallelic() { return true; } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ClusterCounterAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ClusterCounterAnalysis.java index a1b904cbf..3b222b112 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ClusterCounterAnalysis.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ClusterCounterAnalysis.java @@ -74,13 +74,11 @@ public class ClusterCounterAnalysis extends BasicVariantAnalysis { public List done() { List s = new ArrayList(); s.add(String.format("snps_counted_for_neighbor_distances %d", nSeen)); - int cumulative = 0; - s.add(String.format("description maxDist count cumulative")); + s.add(String.format("description maxDist count")); for ( int i = 0; i < neighborWiseBoundries.length; i++ ) { int maxDist = neighborWiseBoundries[i]; int count = variantsWithClusters.get(i).size(); - cumulative += count; - s.add(String.format("snps_within_clusters_of_size %10d %10d %10d", maxDist, count, cumulative)); + s.add(String.format("snps_within_clusters_of_size %10d %10d", maxDist, count)); } return s; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java index 394ff6653..7ad78f212 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java @@ -133,14 +133,6 @@ public class VariantEvalWalker extends RefWalker { updateAnalysisSet(s, eval, tracker, ref, context); } - if ( eval instanceof SNPCallFromGenotypes ) { - SNPCallFromGenotypes call = (SNPCallFromGenotypes)eval; - int nVarGenotypes = call.nHetGenotypes() + call.nHomVarGenotypes(); - //System.out.printf("%d variant genotypes at %s%n", nVarGenotypes, calls); - final String s = nVarGenotypes == 1 ? SINGLETON_SNPS : TWOHIT_SNPS; - updateAnalysisSet(s, eval, tracker, ref, context); - } - return 1; } diff --git a/python/Gelis2PopSNPs.py b/python/Gelis2PopSNPs.py index b4da5b201..352ff6801 100755 --- a/python/Gelis2PopSNPs.py +++ b/python/Gelis2PopSNPs.py @@ -150,19 +150,20 @@ def main(): sortedCallFile = 'all.sorted.calls' cmd = ("~/dev/GenomeAnalysisTK/trunk/perl/sortByRef.pl -k 1 tmp.calls ~/work/humanref/Homo_sapiens_assembly18.fasta.fai > %s" % ( sortedCallFile ) ) jobid = farm_commands.cmd(cmd, OPTIONS.farmQueue, just_print_commands = OPTIONS.dry, waitID = jobid) - - sortedCalls = [line.split() for line in open(sortedCallFile)] - aggregratedCalls = picard_utils.aggregateGeliCalls(sortedCalls) - - print >> outputFile, 'loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom' - for snp in map( lambda x: picard_utils.aggregatedGeliCalls2SNP(x, nIndividuals), aggregratedCalls ): - if snp == None: continue # ignore bad calls - #print snp - #sharedCalls = list(sharedCallsGroup) - #genotype = list(sharedCalls[0][5]) - print >> outputFile, '%s %s %s %.6f -0.0 -0.0 0.000000 100.0 %d %d %d %d' % (snp.loc, snp.ref, snp.alt(), snp.q(), nIndividuals, snp.nRefGenotypes(), snp.nHetGenotypes(), snp.nHomVarGenotypes()) - outputFile.close() + if not OPTIONS.dry: + sortedCalls = [line.split() for line in open(sortedCallFile)] + aggregratedCalls = picard_utils.aggregateGeliCalls(sortedCalls) + + print >> outputFile, 'loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom' + + for snp in map( lambda x: picard_utils.aggregatedGeliCalls2SNP(x, nIndividuals), aggregratedCalls ): + if snp == None: continue # ignore bad calls + #print snp + #sharedCalls = list(sharedCallsGroup) + #genotype = list(sharedCalls[0][5]) + print >> outputFile, '%s %s %s %.6f -0.0 -0.0 0.000000 100.0 %d %d %d %d' % (snp.loc, snp.ref, snp.alt(), snp.q(), nIndividuals, snp.nRefGenotypes(), snp.nHetGenotypes(), snp.nHomVarGenotypes()) + outputFile.close() if __name__ == "__main__":