From af8c47fc2f2f0f51f1ab2b19cd6733226a2d19ad Mon Sep 17 00:00:00 2001 From: depristo Date: Fri, 5 Feb 2010 15:42:54 +0000 Subject: [PATCH] Fixing up testVariantContext for integration tests for variant context. Printing of VCs and genotypes now stable using sorting. Cleaned up comments in quality score by strand. RefMetaDataTracker now directly allows walkers to obtain VariantContexts using the simple Collection getAllVariantContexts(GenomeLoc curLocation, EnumSet allowedTypes, boolean requireStartHere, boolean takeFirstOnly) function. VCF and dbSNP VariantContexts now officially supported. Other importan types can be added to the adapator system in refdata package. Integration tests later today git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2791 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/contexts/variantcontext/Allele.java | 11 +++- .../contexts/variantcontext/Genotype.java | 2 +- .../variantcontext/VariantContext.java | 6 +- .../gatk/refdata/RefMetaDataTracker.java | 37 +++++++++++ .../walkers/QualityScoreByStrandWalker.java | 7 -- .../walkers/TestVariantContextWalker.java | 64 ++++++++----------- .../varianteval2/VariantEval2Walker.java | 14 ++-- .../org/broadinstitute/sting/utils/Utils.java | 28 ++++++++ 8 files changed, 112 insertions(+), 57 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Allele.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Allele.java index 550ba9624..0d48a4105 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Allele.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Allele.java @@ -75,7 +75,7 @@ import java.util.Collection; * @author ebanks, depristo */ -public class Allele { +public class Allele implements Comparable { private static final byte[] EMPTY_ALLELE_BASES = new byte[0]; private boolean isRef = false; @@ -302,4 +302,13 @@ public class Allele { return myAlleles; } + + public int compareTo(Allele other) { + if ( isReference() && other.isNonReference() ) + return -1; + else if ( isNonReference() && other.isReference() ) + return 1; + else + return new String(getBases()).compareTo(new String(other.getBases())); // todo -- potential performance issue + } } diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Genotype.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Genotype.java index 327d9767d..ea1200bfe 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Genotype.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Genotype.java @@ -120,7 +120,7 @@ public class Genotype { } public String toString() { - return String.format("[GT: %s %s %s Q%.2f %s]", getSampleName(), getAlleles(), getType(), 10 * getNegLog10PError(), getAttributes()); + return String.format("[GT: %s %s %s Q%.2f %s]", getSampleName(), getAlleles(), getType(), 10 * getNegLog10PError(), Utils.sortedString(getAttributes())); } public String toBriefString() { diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java index 0a868f316..3203a68ce 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.contexts.variantcontext; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; import java.util.*; @@ -830,8 +831,9 @@ public class VariantContext { } public String toString() { - return String.format("[VC @ %s of type=%s alleles=%s attr=%s GT=%s", - getLocation(), this.getType(), this.getAlleles(), this.getAttributes(), this.getGenotypes().values()); + return String.format("[VC %s @ %s of type=%s alleles=%s attr=%s GT=%s", + getName(), getLocation(), this.getType(), + Utils.sorted(this.getAlleles()), Utils.sortedString(this.getAttributes()), Utils.sorted(this.getGenotypes())); } // protected basic manipulation routines diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 9ca54741a..570ecf79f 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -1,6 +1,8 @@ package org.broadinstitute.sting.gatk.refdata; import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.GenomeLoc; import java.util.*; @@ -193,6 +195,41 @@ public class RefMetaDataTracker { return bound; } + + + public Collection getAllVariantContexts(GenomeLoc curLocation) { + return getAllVariantContexts(curLocation, null, false, false); + } + + public Collection getAllVariantContexts(GenomeLoc curLocation, EnumSet allowedTypes, boolean requireStartHere, boolean takeFirstOnly ) { + List contexts = new ArrayList(); + + for ( RODRecordList rodList : getBoundRodTracks() ) { + for ( ReferenceOrderedDatum rec : rodList.getRecords() ) { + if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec) ) { + // ok, we might actually be able to turn this record in a variant context + VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec); + + // now, let's decide if we want to keep it + boolean goodType = allowedTypes == null || allowedTypes.contains(vc.getType()); + boolean goodPos = ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart(); + + if ( goodType && goodPos ) { // ok, we are going to keep this thing + contexts.add(vc); + + if ( takeFirstOnly ) + // we only want the first passing instance, so break the loop over records in rodList + break; + } + } + } + } + + return contexts; + } + + + /** * Binds the list of reference ordered data records (RODs) to track name at this site. Should be used only by the traversal * system to provide access to RODs in a structured way to the walkers. diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/QualityScoreByStrandWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/QualityScoreByStrandWalker.java index 8ecd7b5b6..ab930afd1 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/QualityScoreByStrandWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/QualityScoreByStrandWalker.java @@ -21,13 +21,6 @@ import java.util.HashMap; import java.io.PrintWriter; /** - * Created by IntelliJ IDEA. - * User: Ghost - * Date: Dec 15, 2009 - * Time: 11:56:22 AM - * To change this template use File | Settings | File Templates. - */ -/* * This walker prints out quality score counts for forward and reverse stranded reads aggregated over all loci * in the interval. Furthermore, it prints out quality score counts at a particular offset of forward and reverse * reads, aggregated across all paired-end reads in the interval. diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java index 87b792fd3..2d2d87fdb 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java @@ -2,56 +2,48 @@ package org.broadinstitute.sting.oneoffprojects.walkers; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.cmdLine.Argument; + +import java.util.EnumSet; /** * Test routine for new VariantContext object */ public class TestVariantContextWalker extends RodWalker { + @Argument(fullName="takeFirstOnly", doc="Only take the first second at a locus, as opposed to all", required=false) + boolean takeFirstOnly = false; + + @Argument(fullName="onlyContextsOfType", doc="Only take variant contexts of this type", required=false) + VariantContext.Type onlyOfThisType = null; + + @Argument(fullName="onlyContextsStartinAtCurrentPosition", doc="Only take variant contexts at actually start at the current position, excluding those at span to the current location but start earlier", required=false) + boolean onlyContextsStartinAtCurrentPosition = false; + + @Argument(fullName="printPerLocus", doc="If true, we'll print the variant contexts, in addition to counts", required=false) + boolean printContexts = false; public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( ref == null ) return 0; else { - // todo -- this should just invoke the new RefMetaDataConverter, and print out all of the info -// RODRecordList dbsnpList = tracker.getTrackData("dbsnp", null); -// -// if (dbsnpList != null) { -// // do dbSNP conversion -// int n = 0; -// for (ReferenceOrderedDatum d : dbsnpList) { -// rodDbSNP dbsnpRecord = (rodDbSNP)d; -// if ( dbsnpRecord.getLocation().getStart() == context.getLocation().getStart() ) { -// VariantContext vc = VariantContextAdaptors.convertToVariantContext("dbsnp", dbsnpRecord); -// if ( vc != null ) { -// n++; -// System.out.printf("%s%n", vc); -// } -// } -// } -// -// return n; -// } -// -// RODRecordList vcfList = tracker.getTrackData("vcf", null); -// if (vcfList != null) { -// // do vcf conversion -// int n = 0; -// for (ReferenceOrderedDatum d : vcfList) { -// RodVCF vcfRecord = (RodVCF)d; -// VariantContext vc = VariantContextAdaptors.convertToVariantContext("vcf", vcfRecord); -// if ( vc != null ) { -// n++; -// System.out.printf("%s%n", vc); -// } -// } -// -// return n; -// } + EnumSet allowedTypes = onlyOfThisType == null ? null : EnumSet.of(onlyOfThisType); - return 0; + int n = 0; + for (VariantContext vc : tracker.getAllVariantContexts(context.getLocation(), allowedTypes, onlyContextsStartinAtCurrentPosition, takeFirstOnly) ) { + n++; + if ( printContexts ) out.printf(" %s%n", vc); + } + + if ( n > 0 && printContexts ) { + out.printf("%s => had %d variant context objects%n", context.getLocation(), n); + out.printf("---------------------------------------------%n"); + } + + return n; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java index e8d4b8ada..55e413374 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java @@ -319,12 +319,6 @@ public class VariantEval2Walker extends RodWalker { return null; } - private > List sorted(Collection c ) { - List l = new ArrayList(c); - Collections.sort(l); - return l; - } - private final static String CONTEXT_HEADER = "track.subset.novelty.filter"; private final static int N_CONTEXT_NAME_PARTS = CONTEXT_HEADER.split("\\.").length; private static int[] nameSizes = new int[N_CONTEXT_NAME_PARTS]; @@ -335,9 +329,9 @@ public class VariantEval2Walker extends RodWalker { } private void determineContextNamePartSizes() { - for ( String contextName : sorted(contexts.keySet()) ) { + for ( String contextName : Utils.sorted(contexts.keySet()) ) { EvaluationContext group = contexts.get(contextName); - for ( String evalSubgroupName : sorted(group.keySet()) ) { + for ( String evalSubgroupName : Utils.sorted(group.keySet()) ) { String keyWord = contextName + "." + evalSubgroupName; String[] parts = keyWord.split("\\."); if ( parts.length != N_CONTEXT_NAME_PARTS ) { @@ -372,11 +366,11 @@ public class VariantEval2Walker extends RodWalker { boolean first = true; out.printf("%n%n"); // todo -- show that comp is dbsnp, etc. is columns - for ( String contextName : sorted(contexts.keySet()) ) { + for ( String contextName : Utils.sorted(contexts.keySet()) ) { EvaluationContext group = contexts.get(contextName); out.printf("%s%n", Utils.dupString('-', 80)); - for ( String evalSubgroupName : sorted(group.keySet()) ) { + for ( String evalSubgroupName : Utils.sorted(group.keySet()) ) { Set evalSet = group.get(evalSubgroupName); VariantEvaluator eval = getEvalByName(evalName, evalSet); String keyWord = contextName + "." + evalSubgroupName; diff --git a/java/src/org/broadinstitute/sting/utils/Utils.java b/java/src/org/broadinstitute/sting/utils/Utils.java index b89d06f92..8d44c08fc 100755 --- a/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/java/src/org/broadinstitute/sting/utils/Utils.java @@ -567,7 +567,35 @@ public class Utils { return str.charAt(0); } + public static > List sorted(Collection c) { + List l = new ArrayList(c); + Collections.sort(l); + return l; + } + public static , V> List sorted(Map c) { + List t = new ArrayList(c.keySet()); + Collections.sort(t); + + List l = new ArrayList(); + for ( T k : t ) { + l.add(c.get(k)); + } + return l; + } + + public static , V> String sortedString(Map c) { + List t = new ArrayList(c.keySet()); + Collections.sort(t); + + List l = new ArrayList(); + List pairs = new ArrayList(); + for ( T k : t ) { + pairs.add(k + "=" + c.get(k)); + } + + return "{" + join(", ", pairs) + "}"; + } }