Fixing up testVariantContext for integration tests for variant context. Printing of VCs and genotypes now stable using sorting. Cleaned up comments in quality score by strand. RefMetaDataTracker now directly allows walkers to obtain VariantContexts using the simple Collection<VariantContext> getAllVariantContexts(GenomeLoc curLocation, EnumSet<VariantContext.Type> allowedTypes, boolean requireStartHere, boolean takeFirstOnly) function. VCF and dbSNP VariantContexts now officially supported. Other importan types can be added to the adapator system in refdata package. Integration tests later today
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2791 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0d8d6e0a14
commit
af8c47fc2f
|
|
@ -75,7 +75,7 @@ import java.util.Collection;
|
|||
|
||||
* @author ebanks, depristo
|
||||
*/
|
||||
public class Allele {
|
||||
public class Allele implements Comparable<Allele> {
|
||||
private static final byte[] EMPTY_ALLELE_BASES = new byte[0];
|
||||
|
||||
private boolean isRef = false;
|
||||
|
|
@ -302,4 +302,13 @@ public class Allele {
|
|||
|
||||
return myAlleles;
|
||||
}
|
||||
|
||||
public int compareTo(Allele other) {
|
||||
if ( isReference() && other.isNonReference() )
|
||||
return -1;
|
||||
else if ( isNonReference() && other.isReference() )
|
||||
return 1;
|
||||
else
|
||||
return new String(getBases()).compareTo(new String(other.getBases())); // todo -- potential performance issue
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -120,7 +120,7 @@ public class Genotype {
|
|||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("[GT: %s %s %s Q%.2f %s]", getSampleName(), getAlleles(), getType(), 10 * getNegLog10PError(), getAttributes());
|
||||
return String.format("[GT: %s %s %s Q%.2f %s]", getSampleName(), getAlleles(), getType(), 10 * getNegLog10PError(), Utils.sortedString(getAttributes()));
|
||||
}
|
||||
|
||||
public String toBriefString() {
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.contexts.variantcontext;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -830,8 +831,9 @@ public class VariantContext {
|
|||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("[VC @ %s of type=%s alleles=%s attr=%s GT=%s",
|
||||
getLocation(), this.getType(), this.getAlleles(), this.getAttributes(), this.getGenotypes().values());
|
||||
return String.format("[VC %s @ %s of type=%s alleles=%s attr=%s GT=%s",
|
||||
getName(), getLocation(), this.getType(),
|
||||
Utils.sorted(this.getAlleles()), Utils.sortedString(this.getAttributes()), Utils.sorted(this.getGenotypes()));
|
||||
}
|
||||
|
||||
// protected basic manipulation routines
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -193,6 +195,41 @@ public class RefMetaDataTracker {
|
|||
|
||||
return bound;
|
||||
}
|
||||
|
||||
|
||||
public Collection<VariantContext> getAllVariantContexts(GenomeLoc curLocation) {
|
||||
return getAllVariantContexts(curLocation, null, false, false);
|
||||
}
|
||||
|
||||
public Collection<VariantContext> getAllVariantContexts(GenomeLoc curLocation, EnumSet<VariantContext.Type> allowedTypes, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
List<VariantContext> contexts = new ArrayList<VariantContext>();
|
||||
|
||||
for ( RODRecordList<ReferenceOrderedDatum> rodList : getBoundRodTracks() ) {
|
||||
for ( ReferenceOrderedDatum rec : rodList.getRecords() ) {
|
||||
if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec) ) {
|
||||
// ok, we might actually be able to turn this record in a variant context
|
||||
VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec);
|
||||
|
||||
// now, let's decide if we want to keep it
|
||||
boolean goodType = allowedTypes == null || allowedTypes.contains(vc.getType());
|
||||
boolean goodPos = ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart();
|
||||
|
||||
if ( goodType && goodPos ) { // ok, we are going to keep this thing
|
||||
contexts.add(vc);
|
||||
|
||||
if ( takeFirstOnly )
|
||||
// we only want the first passing instance, so break the loop over records in rodList
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Binds the list of reference ordered data records (RODs) to track name at this site. Should be used only by the traversal
|
||||
* system to provide access to RODs in a structured way to the walkers.
|
||||
|
|
|
|||
|
|
@ -21,13 +21,6 @@ import java.util.HashMap;
|
|||
import java.io.PrintWriter;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: Ghost
|
||||
* Date: Dec 15, 2009
|
||||
* Time: 11:56:22 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
/*
|
||||
* This walker prints out quality score counts for forward and reverse stranded reads aggregated over all loci
|
||||
* in the interval. Furthermore, it prints out quality score counts at a particular offset of forward and reverse
|
||||
* reads, aggregated across all paired-end reads in the interval.
|
||||
|
|
|
|||
|
|
@ -2,56 +2,48 @@ package org.broadinstitute.sting.oneoffprojects.walkers;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
||||
import java.util.EnumSet;
|
||||
|
||||
/**
|
||||
* Test routine for new VariantContext object
|
||||
*/
|
||||
public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
|
||||
@Argument(fullName="takeFirstOnly", doc="Only take the first second at a locus, as opposed to all", required=false)
|
||||
boolean takeFirstOnly = false;
|
||||
|
||||
@Argument(fullName="onlyContextsOfType", doc="Only take variant contexts of this type", required=false)
|
||||
VariantContext.Type onlyOfThisType = null;
|
||||
|
||||
@Argument(fullName="onlyContextsStartinAtCurrentPosition", doc="Only take variant contexts at actually start at the current position, excluding those at span to the current location but start earlier", required=false)
|
||||
boolean onlyContextsStartinAtCurrentPosition = false;
|
||||
|
||||
@Argument(fullName="printPerLocus", doc="If true, we'll print the variant contexts, in addition to counts", required=false)
|
||||
boolean printContexts = false;
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( ref == null )
|
||||
return 0;
|
||||
else {
|
||||
// todo -- this should just invoke the new RefMetaDataConverter, and print out all of the info
|
||||
// RODRecordList<ReferenceOrderedDatum> dbsnpList = tracker.getTrackData("dbsnp", null);
|
||||
//
|
||||
// if (dbsnpList != null) {
|
||||
// // do dbSNP conversion
|
||||
// int n = 0;
|
||||
// for (ReferenceOrderedDatum d : dbsnpList) {
|
||||
// rodDbSNP dbsnpRecord = (rodDbSNP)d;
|
||||
// if ( dbsnpRecord.getLocation().getStart() == context.getLocation().getStart() ) {
|
||||
// VariantContext vc = VariantContextAdaptors.convertToVariantContext("dbsnp", dbsnpRecord);
|
||||
// if ( vc != null ) {
|
||||
// n++;
|
||||
// System.out.printf("%s%n", vc);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// return n;
|
||||
// }
|
||||
//
|
||||
// RODRecordList<ReferenceOrderedDatum> vcfList = tracker.getTrackData("vcf", null);
|
||||
// if (vcfList != null) {
|
||||
// // do vcf conversion
|
||||
// int n = 0;
|
||||
// for (ReferenceOrderedDatum d : vcfList) {
|
||||
// RodVCF vcfRecord = (RodVCF)d;
|
||||
// VariantContext vc = VariantContextAdaptors.convertToVariantContext("vcf", vcfRecord);
|
||||
// if ( vc != null ) {
|
||||
// n++;
|
||||
// System.out.printf("%s%n", vc);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// return n;
|
||||
// }
|
||||
EnumSet<VariantContext.Type> allowedTypes = onlyOfThisType == null ? null : EnumSet.of(onlyOfThisType);
|
||||
|
||||
return 0;
|
||||
int n = 0;
|
||||
for (VariantContext vc : tracker.getAllVariantContexts(context.getLocation(), allowedTypes, onlyContextsStartinAtCurrentPosition, takeFirstOnly) ) {
|
||||
n++;
|
||||
if ( printContexts ) out.printf(" %s%n", vc);
|
||||
}
|
||||
|
||||
if ( n > 0 && printContexts ) {
|
||||
out.printf("%s => had %d variant context objects%n", context.getLocation(), n);
|
||||
out.printf("---------------------------------------------%n");
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -319,12 +319,6 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
return null;
|
||||
}
|
||||
|
||||
private <T extends Comparable<T>> List<T> sorted(Collection<T> c ) {
|
||||
List<T> l = new ArrayList<T>(c);
|
||||
Collections.sort(l);
|
||||
return l;
|
||||
}
|
||||
|
||||
private final static String CONTEXT_HEADER = "track.subset.novelty.filter";
|
||||
private final static int N_CONTEXT_NAME_PARTS = CONTEXT_HEADER.split("\\.").length;
|
||||
private static int[] nameSizes = new int[N_CONTEXT_NAME_PARTS];
|
||||
|
|
@ -335,9 +329,9 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
private void determineContextNamePartSizes() {
|
||||
for ( String contextName : sorted(contexts.keySet()) ) {
|
||||
for ( String contextName : Utils.sorted(contexts.keySet()) ) {
|
||||
EvaluationContext group = contexts.get(contextName);
|
||||
for ( String evalSubgroupName : sorted(group.keySet()) ) {
|
||||
for ( String evalSubgroupName : Utils.sorted(group.keySet()) ) {
|
||||
String keyWord = contextName + "." + evalSubgroupName;
|
||||
String[] parts = keyWord.split("\\.");
|
||||
if ( parts.length != N_CONTEXT_NAME_PARTS ) {
|
||||
|
|
@ -372,11 +366,11 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
|
|||
boolean first = true;
|
||||
out.printf("%n%n");
|
||||
// todo -- show that comp is dbsnp, etc. is columns
|
||||
for ( String contextName : sorted(contexts.keySet()) ) {
|
||||
for ( String contextName : Utils.sorted(contexts.keySet()) ) {
|
||||
EvaluationContext group = contexts.get(contextName);
|
||||
|
||||
out.printf("%s%n", Utils.dupString('-', 80));
|
||||
for ( String evalSubgroupName : sorted(group.keySet()) ) {
|
||||
for ( String evalSubgroupName : Utils.sorted(group.keySet()) ) {
|
||||
Set<VariantEvaluator> evalSet = group.get(evalSubgroupName);
|
||||
VariantEvaluator eval = getEvalByName(evalName, evalSet);
|
||||
String keyWord = contextName + "." + evalSubgroupName;
|
||||
|
|
|
|||
|
|
@ -567,7 +567,35 @@ public class Utils {
|
|||
return str.charAt(0);
|
||||
}
|
||||
|
||||
public static <T extends Comparable<T>> List<T> sorted(Collection<T> c) {
|
||||
List<T> l = new ArrayList<T>(c);
|
||||
Collections.sort(l);
|
||||
return l;
|
||||
}
|
||||
|
||||
public static <T extends Comparable<T>, V> List<V> sorted(Map<T,V> c) {
|
||||
List<T> t = new ArrayList<T>(c.keySet());
|
||||
Collections.sort(t);
|
||||
|
||||
List<V> l = new ArrayList<V>();
|
||||
for ( T k : t ) {
|
||||
l.add(c.get(k));
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
public static <T extends Comparable<T>, V> String sortedString(Map<T,V> c) {
|
||||
List<T> t = new ArrayList<T>(c.keySet());
|
||||
Collections.sort(t);
|
||||
|
||||
List<V> l = new ArrayList<V>();
|
||||
List<String> pairs = new ArrayList<String>();
|
||||
for ( T k : t ) {
|
||||
pairs.add(k + "=" + c.get(k));
|
||||
}
|
||||
|
||||
return "{" + join(", ", pairs) + "}";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue