diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index d2a331766..ae6b04770 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -159,17 +159,9 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { * @return */ private long getSkippedBases( GenomeLoc currentPos ) { - long skippedBases = 0; - - if ( lastLoc == null ) { - // special case -- we're at the start - //System.out.printf("Cur=%s, shard=%s%n", currentPos, shard.getGenomeLoc()); - GenomeLoc firstLoc = shard.getGenomeLocs().get(0); - skippedBases = currentPos.getStart() - firstLoc.getStart(); - } else { - //System.out.printf("Cur=%s, last=%s%n", currentPos, lastLoc); - skippedBases = currentPos.minus(lastLoc) - 1; - } + // the minus - is because if lastLoc == null, you haven't yet seen anything in this interval, so it should also be counted as skipped + Long compStop = lastLoc == null ? shard.getGenomeLocs().get(0).getStart() - 1 : lastLoc.getStop(); + long skippedBases = currentPos.getStart() - compStop - 1; if ( skippedBases < -1 ) { // minus 1 value is ok throw new RuntimeException(String.format("BUG: skipped bases=%d is < 0: cur=%s vs. last=%s, shard=%s", diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java index cfb9aca2e..049bb9880 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java @@ -109,7 +109,6 @@ public class TraverseLoci extends TraversalEngine { RodLocusView rodLocusView = (RodLocusView)locusView; long nSkipped = rodLocusView.getLastSkippedBases(); if ( nSkipped > 0 ) { - // no sense in making the call if you don't have anything interesting to say GenomeLoc site = rodLocusView.getLocOneBeyondShard(); AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileup(site), nSkipped); M x = locusWalker.map(null, null, ac); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/TestVariantContextWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/TestVariantContextWalker.java index e1b59e33d..c524fbf48 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/TestVariantContextWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/TestVariantContextWalker.java @@ -23,7 +23,7 @@ public class TestVariantContextWalker extends RodWalker { for (ReferenceOrderedDatum d : dbsnpList) { rodDbSNP dbsnpRecord = (rodDbSNP)d; if ( dbsnpRecord.getLocation().getStart() == context.getLocation().getStart() ) { - VariantContext vc = VariantContextAdaptors.dbsnpToVariantContext(dbsnpRecord); + VariantContext vc = VariantContextAdaptors.convertToVariantContext(dbsnpRecord); if ( vc != null ) { n++; System.out.printf("%s%n", vc); @@ -40,7 +40,7 @@ public class TestVariantContextWalker extends RodWalker { int n = 0; for (ReferenceOrderedDatum d : vcfList) { RodVCF vcfRecord = (RodVCF)d; - VariantContext vc = VariantContextAdaptors.vcfToVariantContext(vcfRecord); + VariantContext vc = VariantContextAdaptors.convertToVariantContext(vcfRecord); if ( vc != null ) { n++; System.out.printf("%s%n", vc); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java index a3518bef5..b7b4ddf3f 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java @@ -17,10 +17,10 @@ import java.util.*; */ public class VariantContext extends AttributedObject { private GenomeLoc loc; - Type type = Type.UNDETERMINED; + private Type type = Type.UNDETERMINED; private Set alleles = new HashSet(); private Map genotypes = new HashMap(); - Set filters = new HashSet(); + private Set filters = new HashSet(); // --------------------------------------------------------------------------------------------------------- // diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextAdaptors.java index 0f50513ff..4a19432a0 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextAdaptors.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextAdaptors.java @@ -2,20 +2,39 @@ package org.broadinstitute.sting.oneoffprojects.variantcontext; import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.refdata.RodVCF; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding; +import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; import java.util.*; public class VariantContextAdaptors { - public static VariantContext dbsnpToVariantContext(rodDbSNP dbsnp) { + public static boolean canBeConvertedToVariantContext(Object variantContainingObject) { + return convertToVariantContext(variantContainingObject) != null; + } + + public static VariantContext convertToVariantContext(Object variantContainingObject) { + if ( variantContainingObject instanceof rodDbSNP ) + return dbsnpToVariantContext((rodDbSNP)variantContainingObject); + else if ( variantContainingObject instanceof RodVCF ) + return vcfToVariantContext(((RodVCF)variantContainingObject).getRecord()); + else if ( variantContainingObject instanceof VCFRecord ) + return vcfToVariantContext((VCFRecord)variantContainingObject); + else + return null; + //throw new IllegalArgumentException("Cannot convert object " + variantContainingObject + " of class " + variantContainingObject.getClass() + " to a variant context"); + + } + + private static VariantContext dbsnpToVariantContext(rodDbSNP dbsnp) { if ( dbsnp.isSNP() || dbsnp.isIndel() || dbsnp.varType.contains("mixed") ) { VariantContext vc = new VariantContext(dbsnp.getLocation()); // add the reference allele if ( ! Allele.acceptableAlleleBases(dbsnp.getReference()) ) { - System.out.printf("Excluding dbsnp record %s%n", dbsnp); + //System.out.printf("Excluding dbsnp record %s%n", dbsnp); return null; } @@ -25,7 +44,7 @@ public class VariantContextAdaptors { // add all of the alt alleles for ( String alt : dbsnp.getAlternateAlleleList() ) { if ( ! Allele.acceptableAlleleBases(alt) ) { - System.out.printf("Excluding dbsnp record %s%n", dbsnp); + //System.out.printf("Excluding dbsnp record %s%n", dbsnp); return null; } vc.addAllele(new Allele(alt, false)); @@ -37,7 +56,7 @@ public class VariantContextAdaptors { return null; // can't handle anything else } - public static VariantContext vcfToVariantContext(RodVCF vcf) { + private static VariantContext vcfToVariantContext(VCFRecord vcf) { if ( vcf.isSNP() || vcf.isIndel() ) { VariantContext vc = new VariantContext(vcf.getLocation()); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java index 4268aba4d..b7ca0bfeb 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java @@ -2,9 +2,43 @@ package org.broadinstitute.sting.oneoffprojects.variantcontext; import java.util.*; import org.apache.commons.jexl.*; +import org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2.VariantEvaluator; +import org.broadinstitute.sting.utils.StingException; public class VariantContextUtils { + /** */ + public static class MatchExp { + String name; + String expStr; + Expression exp; + + public MatchExp(String name, String str, Expression exp) { + this.name = name; + this.expStr = str; + this.exp = exp; + } + } + + public static List initializeMatchExps(Map names_and_exps) { + List exps = new ArrayList(); + + for ( Map.Entry elt : names_and_exps.entrySet() ) { + String name = elt.getKey(); + String expStr = elt.getValue(); + + if ( name == null || expStr == null ) throw new IllegalArgumentException("Cannot create null expressions : " + name + " " + expStr); + try { + Expression filterExpression = ExpressionFactory.createExpression(expStr); + exps.add(new MatchExp(name, expStr, filterExpression)); + } catch (Exception e) { + throw new StingException("Invalid expression used (" + expStr + "). Please see the JEXL docs for correct syntax."); + } + } + + return exps; + } + private static final String UNIQUIFIED_SUFFIX = ".unique"; /** diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/CountVariants.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/CountVariants.java new file mode 100755 index 000000000..b0a22e4e5 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/CountVariants.java @@ -0,0 +1,113 @@ +package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2; + +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext; +import org.broadinstitute.sting.oneoffprojects.variantcontext.Genotype; +import org.broadinstitute.sting.utils.StingException; + +import java.util.List; +import java.util.Arrays; + +public class CountVariants extends VariantEvaluator { + int nProcessedLoci = 0; + int nCalledLoci = 0; + int nVariantLoci = 0; + int nRefLoci = 0; + + int nSNPs = 0; + int nInsertions = 0; + int nDeletions = 0; + int nComplex = 0; + + int nNoCalls = 0; + int nHets = 0; + int nHomRef = 0; + int nHomVar = 0; + + private double rate(long n) { + return n / (1.0 * Math.max(nProcessedLoci, 1)); + } + + private long inverseRate(long n) { + return n == 0 ? 0 : nProcessedLoci / Math.max(n, 1); + } + + private double ratio(long num, long denom) { + return ((double)num) / (Math.max(denom, 1)); + } + + public String getName() { + return "Counter"; + } + + public int getComparisonOrder() { + return 1; // we only need to see each eval track + } + + public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + nProcessedLoci += context.getSkippedBases() + 1; + } + + public void update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + //nProcessedLoci++; + nCalledLoci++; + + if ( vc1.isVariant() ) nVariantLoci++; + switch ( vc1.getType() ) { + case NO_VARIATION: nRefLoci++; break; + case SNP: nSNPs++; break; + case INDEL: + if ( vc1.isInsertion() ) nInsertions++; else nDeletions++; + break; + case MIXED: nComplex++; break; + } + + for ( Genotype g : vc1.getGenotypes().values() ) { + switch ( g.getType() ) { + case NO_CALL: nNoCalls++; break; + case HOM_REF: nHomRef++; break; + case HET: nHets++; break; + case HOM_VAR: nHomVar++; break; + default: throw new StingException("BUG: Unexpected genotype type: " + g); + } + } + } + + public String toString() { + return "Counter: " + summaryLine(); + } + + private String summaryLine() { + return String.format("%d %d %d %d " + + "%.2e %d " + + "%d %d %d %d " + + "%d %d %d " + + "%.2e %d %.2f " + + "%.2f %d %.2f", + nProcessedLoci, nCalledLoci, nRefLoci, nVariantLoci, + rate(nVariantLoci), inverseRate(nVariantLoci), + nSNPs, nDeletions, nInsertions, nComplex, + nHomRef, nHets, nHomVar, + rate(nHets), inverseRate(nHets), ratio(nHets, nHomVar), + rate(nDeletions + nInsertions), inverseRate(nDeletions + nInsertions), ratio(nDeletions, nInsertions)); + } + + private static List HEADER = + Arrays.asList("nProcessedLoci", "nCalledLoci", "nRefLoci", "nVariantLoci", + "variantRate", "variantRatePerBp", + "nSNPs", "nDeletions", "nInsertions", "nComplex", + "nHomRefGenotypes", "nHetGenotypes", "nHomVarGenotypes", + "heterozygosity", "heterozygosityPerBp", "hetHomRatio", + "indelRate", "indelRatePerBp", "deletionInsertionRatio"); + + // making it a table + public List getTableHeader() { + return HEADER; + } + + public List> getTableRows() { + return Arrays.asList(Arrays.asList(summaryLine().split(" "))); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEval2Walker.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEval2Walker.java new file mode 100755 index 000000000..0eed3a161 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEval2Walker.java @@ -0,0 +1,241 @@ +package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext; +import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextAdaptors; +import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextUtils; + +import java.util.*; + +/** + * Test routine for new VariantContext object + */ +public class VariantEval2Walker extends RodWalker { + // todo -- add doc string + @Argument(shortName="select", doc="", required=false) + protected String[] SELECT_STRINGS = {}; + // todo -- add doc string + @Argument(shortName="selectName", doc="", required=false) + protected String[] SELECT_NAMES = {}; + + @Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false) + protected String[] KNOWN_NAMES = {"dbsnp"}; + + + private class EvaluationGroup extends HashMap> { + // useful for typing + } + + // todo -- generalize to multiple contexts, one for each eval + private HashMap contexts = new HashMap(); + private List selectExps = null; + + public void initialize() { + if ( SELECT_NAMES.length != SELECT_STRINGS.length ) + throw new StingException("Inconsistent number of provided filter names and expressions."); + Map map = new HashMap(); + for ( int i = 0; i < SELECT_NAMES.length; i++ ) { map.put(SELECT_NAMES[i], SELECT_STRINGS[i]); } + + selectExps = VariantContextUtils.initializeMatchExps(map); + + // setup contexts + // todo -- add selects + contexts.put("eval", createEvaluationGroup()); + contexts.put("eval.filtered", createEvaluationGroup()); + } + + private EvaluationGroup createEvaluationGroup() { + EvaluationGroup group = new EvaluationGroup(); + + for ( String name : Arrays.asList("all", "known", "novel") ) { + group.put(name, new HashSet(Arrays.asList(new CountVariants()))); + } + + return group; + } + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + //System.out.printf("map at %s with %d skipped%n", context.getLocation(), context.getSkippedBases()); + + Map allNamedVCs = getVariantContexts(context, tracker); + Map evalNamedVCs = getEvalVCs(allNamedVCs); + Map comps = getComparisonVCs(allNamedVCs); + + for ( VariantEvaluator eval : getAllEvaluations() ) { + eval.update0(tracker, ref, context); + } + + if ( evalNamedVCs.size() > 1 ) throw new StingException("VariantEval doesn't yet support for multiple independent eval tracks"); + for ( Map.Entry evalNameVC: evalNamedVCs.entrySet() ) { + String name = evalNameVC.getKey(); + VariantContext vc = evalNameVC.getValue(); + boolean isKnown = vcIsKnown(vc, allNamedVCs); + + if ( vc.isFiltered() ) name = name + ".filtered"; + EvaluationGroup group = contexts.get(name); + + for ( Set evaluations : Arrays.asList(group.get("all"), group.get(isKnown ? "known" : "novel")) ) { + for ( VariantEvaluator evaluation : evaluations ) { + switch ( evaluation.getComparisonOrder() ) { + case 1: + evaluation.update1(vc, tracker, ref, context); + break; + case 2: + for ( VariantContext comp : comps.values() ) { + evaluation.update2(vc, comp, tracker, ref, context); + } + break; + default: + throw new StingException("BUG: Unexpected evaluation order " + evaluation); + } + } + } + } + + return 0; + } + + private boolean vcIsKnown(VariantContext vc, Map vcs) { + for ( VariantContext known : getKnownVCs(vcs).values() ) { + if ( known.isNotFiltered() && known.getType() == vc.getType() ) + return true; + } + + return false; + } + + + private Map getEvalVCs(Map vcs) { + return getVCsStartingWith(vcs, false); + } + + private Map getComparisonVCs(Map vcs) { + return getVCsStartingWith(vcs, true); + } + + private Map getVCsStartingWith(Map vcs, boolean notStartsWith) { + Map map = new HashMap(); + + for ( Map.Entry elt : vcs.entrySet() ) { + boolean startP = elt.getKey().startsWith("eval"); + + if ( (startP && ! notStartsWith) || (!startP && notStartsWith) ) { + map.put(elt.getKey(), elt.getValue()); + } + } + + return map; + } + + private Map getKnownVCs(Map vcs) { + Map map = new HashMap(); + + for ( Map.Entry elt : vcs.entrySet() ) { + for ( String known1 : KNOWN_NAMES ) { + if ( elt.getKey().equals(known1) ) { + map.put(elt.getKey(), elt.getValue()); + } + } + } + + return map; + } + + private List getAllEvaluationNames() { + List names = new ArrayList(); + if ( contexts.size() == 0 ) throw new IllegalStateException("Contexts shouldn't be sized 0 when calling getAllEvaluationNames()"); + + for ( VariantEvaluator eval : contexts.values().iterator().next().get("all") ) { + names.add(eval.getName()); + } + + return names; + } + + + private Map getVariantContexts(AlignmentContext context, RefMetaDataTracker tracker) { + Map map = new HashMap(); + + if ( tracker != null ) { + for ( RODRecordList rodList : tracker.getBoundRodTracks() ) { + boolean alreadyGrabbedOne = false; + + for ( ReferenceOrderedDatum rec : rodList.getRecords() ) { + if ( rec.getLocation().getStart() == context.getLocation().getStart() ) { + // ignore things that span this location but started earlier + if ( alreadyGrabbedOne ) { + // can't handle this situation + ; + //logger.info(String.format("Ignore second+ events at locus %s in rod %s => rec is %s", context.getLocation(), rodList.getName(), rec)); + } else { + VariantContext vc = VariantContextAdaptors.convertToVariantContext(rec); + if ( vc != null ) { + alreadyGrabbedOne = true; + map.put(rec.getName(), vc); + } + } + } + } + } + } + + return map; + } + + public Integer reduceInit() { + return 0; + } + + public Integer reduce(Integer point, Integer sum) { + return point + sum; + } + + public VariantEvaluator getEvalByName(String name, Set s) { + for ( VariantEvaluator e : s ) + if ( e.getName().equals(name) ) + return e; + return null; + } + + + public List getAllEvaluations() { + List l = new ArrayList(); + + for ( EvaluationGroup group : contexts.values() ) { + for ( Set evals : group.values() ) { + l.addAll(evals); + } + } + + return l; + } + + + public void onTraversalDone(Integer result) { + for ( String evalName : getAllEvaluationNames() ) { + boolean first = true; + for ( Map.Entry elt : contexts.entrySet() ) { + String contextName = elt.getKey(); + EvaluationGroup group = elt.getValue(); + + for ( Map.Entry> namedEvalGroup : group.entrySet() ) { + String evalSubgroupName = namedEvalGroup.getKey(); + VariantEvaluator eval = getEvalByName(evalName, namedEvalGroup.getValue()); + String keyWord = contextName + "." + evalSubgroupName; + if ( first ) { + out.printf("%s\t%s\t%s%n", evalName, "context", Utils.join("\t", eval.getTableHeader())); + first = false; + } + + for ( List row : eval.getTableRows() ) + out.printf("%s\t%s\t%s%n", evalName, keyWord, Utils.join("\t", row)); + } + } + } + } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEvaluator.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEvaluator.java new file mode 100755 index 000000000..35fd84af2 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/varianteval2/VariantEvaluator.java @@ -0,0 +1,49 @@ +package org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2; + +import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; + +import java.util.List; +import java.util.ArrayList; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: Jan 29, 2010 + * Time: 3:38:02 PM + * To change this template use File | Settings | File Templates. + */ +public abstract class VariantEvaluator { + protected boolean accumulateInterestingSites = false; + protected boolean processedASite = false; + protected List interestingSites = new ArrayList(); + + public abstract String getName(); + + // do we want to keep track of things that are interesting + public void accumulateInterestingSites(boolean enable) { accumulateInterestingSites = enable; } + public boolean isAccumulatingInterestingSites() { return accumulateInterestingSites; } + public List getInterestingSites() { return interestingSites; } + protected void addInterestingSite(String why, VariantContext vc) { interestingSites.add(vc); } + + public boolean processedAnySites() { return processedASite; } + protected void markSiteAsProcessed() { processedASite = true; } + + /** Should return the number of VariantContexts expected as inputs to update. Can be 1 or 2 */ + public abstract int getComparisonOrder(); + + /** called at all sites, regardless of eval context itself; useful for counting processed bases */ + public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { } + public void update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { } + public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { } + + public void finalize() {} + + public abstract String toString(); + + // making it a table + public abstract List getTableHeader(); + public abstract List> getTableRows(); +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/SNPDensity.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/SNPDensity.java index f300ae29f..c7855b277 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/SNPDensity.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/SNPDensity.java @@ -8,23 +8,10 @@ import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.PackageUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.utils.genotype.Genotype; -import org.broadinstitute.sting.utils.genotype.Variation; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; -import org.broadinstitute.sting.playground.gatk.walkers.varianteval.VariantAnalysis; -import org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval.VariantEvaluation; import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext; import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextAdaptors; -import org.apache.commons.jexl.ExpressionFactory; -import org.apache.commons.jexl.Expression; -import org.apache.commons.jexl.JexlHelper; -import org.apache.commons.jexl.JexlContext; import java.util.*; @@ -60,7 +47,7 @@ public class SNPDensity extends RefWalker, SNPDe if (vcfList != null) { for (ReferenceOrderedDatum d : vcfList) { RodVCF vcfRecord = (RodVCF)d; - vc = VariantContextAdaptors.vcfToVariantContext(vcfRecord); + vc = VariantContextAdaptors.convertToVariantContext(vcfRecord); break; } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/newvarianteval/CounterEvaluation.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/newvarianteval/CounterEvaluation.java deleted file mode 100755 index 1078838f8..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/newvarianteval/CounterEvaluation.java +++ /dev/null @@ -1,42 +0,0 @@ -package org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval; - -import org.broadinstitute.sting.gatk.refdata.RodVCF; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; - -import java.util.List; -import java.util.ArrayList; - -public class CounterEvaluation implements VariantEvaluation { - private int numVariants = 0; - private int numCalls = 0; - private int numKnown = 0; - - public CounterEvaluation() {} - - public void update(RodVCF variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - numVariants += (variant != null && variant.isSNP()) ? 1 : 0; - numKnown += (variant != null && variant.isSNP() && !variant.getID().equals(".")) ? 1 : 0; - numCalls++; - } - - public String getName() { - return "Counter"; - } - - public List getResult() { - ArrayList results = new ArrayList(); - - double variantRate = ((double) numVariants)/((double) numCalls); - double knownVariantsPct = 100.0*((double) numKnown)/((double) numVariants); - - results.add(String.format("regionSize=%d", numCalls)); - results.add(String.format("variants=%d", numVariants)); - results.add(String.format("variantsKnown=%d", numKnown)); - results.add(String.format("knownVariantsPct=%f", knownVariantsPct)); - results.add(String.format("variantRate=1/%d", Math.round(1.0/variantRate))); - - return results; - } -} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/newvarianteval/NewVariantEval.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/newvarianteval/NewVariantEval.java deleted file mode 100755 index 0667dbd6a..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/newvarianteval/NewVariantEval.java +++ /dev/null @@ -1,153 +0,0 @@ -package org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval; - -import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.gatk.walkers.RMD; -import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.RODRecordList; -import org.broadinstitute.sting.gatk.refdata.RodVCF; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.PackageUtils; -import org.broadinstitute.sting.utils.genotype.Genotype; -import org.broadinstitute.sting.utils.genotype.Variation; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; -import org.broadinstitute.sting.playground.gatk.walkers.varianteval.VariantAnalysis; -import org.apache.commons.jexl.ExpressionFactory; -import org.apache.commons.jexl.Expression; -import org.apache.commons.jexl.JexlHelper; -import org.apache.commons.jexl.JexlContext; - -import java.util.*; - -@Requires(value={},referenceMetaData=@RMD(name="eval",type=RodVCF.class)) -public class NewVariantEval extends RodWalker { - @Argument(fullName="filterExpression", shortName="filter", doc="Expression used with INFO fields to filter (see wiki docs for more info)", required=false) - private String FILTER_STRING = "FILTER == false"; - - @Argument(fullName="sampleName", shortName="sample", doc="If the VCF file has multiple samples, evaluate only the specified sample", required=false) - private String SAMPLE_NAME = null; - - private Expression filterExpression; - private HashSet evals; - - public void initialize() { - try { - filterExpression = ExpressionFactory.createExpression(FILTER_STRING); - } catch (Exception e) { - throw new StingException("Invalid expression used (" + FILTER_STRING + "). Please see the JEXL docs for correct syntax."); - } - - try { - evals = new HashSet(); - - List> cevals = PackageUtils.getClassesImplementingInterface(VariantEvaluation.class); - for (Class ceval : cevals) { - out.printf("Analysis: %s%n", ceval.getName()); - - VariantEvaluation eval = (VariantEvaluation) ceval.newInstance(); - evals.add(eval); - } - } catch (InstantiationException e) { - throw new StingException(e.getMessage()); - } catch (IllegalAccessException e) { - throw new StingException(e.getMessage()); - } - } - - public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - RODRecordList rods = tracker.getTrackData("eval", null); - - if ( rods != null ) { - RodVCF variant = (RodVCF) rods.getRecords().get(0); - - Map infoMap = new HashMap(variant.mCurrentRecord.getInfoValues()); - infoMap.put("QUAL", String.valueOf(variant.mCurrentRecord.getQual())); - infoMap.put("FILTER", String.valueOf(variant.mCurrentRecord.isFiltered())); - infoMap.put("ID", String.valueOf(variant.mCurrentRecord.getID())); - - for (String filterCode : variant.mCurrentRecord.getFilteringCodes()) { - infoMap.put(filterCode, "1"); - } - - JexlContext jContext = JexlHelper.createContext(); - jContext.setVars(infoMap); - - try { - return (Boolean) filterExpression.evaluate(jContext); - } catch (Exception e) { - throw new StingException(e.getMessage()); - } - } - - return true; - } - - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - RODRecordList rods = tracker.getTrackData("eval", null); - RodVCF variant = (rods != null) ? (RodVCF) rods.getRecords().get(0) : null; - - if (variant != null && SAMPLE_NAME != null && variant.hasGenotypeData()) { - variant = selectSample(SAMPLE_NAME, variant); - - if (variant == null) { - throw new StingException(String.format("The sample '%s' is not present in the specified VCF", SAMPLE_NAME)); - } - } - - for (VariantEvaluation eval : evals) { - eval.update(variant, tracker, ref, context); - } - - return 1; - } - - private RodVCF selectSample(String sample, RodVCF variant) { - String[] samples = variant.getSampleNames(); - - for (int i = 0; i < samples.length; i++) { - if (samples[i].equalsIgnoreCase(sample)) { - List genotypeRecs = new ArrayList(); - genotypeRecs.add(variant.mCurrentRecord.getVCFGenotypeRecords().get(i)); - - variant.mCurrentRecord = new VCFRecord(variant.getReferenceForSNP(), - variant.getLocation().getContig(), - (int) variant.getLocation().getStart(), - variant.getID(), - variant.mCurrentRecord.getAlternateAlleles(), - variant.getQual(), - variant.getFilterString(), - variant.getInfoValues(), - variant.mCurrentRecord.getGenotypeFormatString(), - genotypeRecs); - - return variant; - } - } - - return null; - } - - public Integer reduceInit() { - return null; - } - - public Integer reduce(Integer value, Integer sum) { - return null; - } - - public void onTraversalDone(Integer sum) { - for (VariantEvaluation eval : evals) { - List results = eval.getResult(); - - for (String result : results) { - out.printf("%s:%s%n", eval.getName(), result); - } - } - } -} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/newvarianteval/SubstitutionEvaluation.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/newvarianteval/SubstitutionEvaluation.java deleted file mode 100755 index 411a2f26c..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/newvarianteval/SubstitutionEvaluation.java +++ /dev/null @@ -1,65 +0,0 @@ -package org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval; - -import org.broadinstitute.sting.gatk.refdata.RodVCF; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.playground.utils.NamedTable; -import org.broadinstitute.sting.utils.BaseUtils; - -import java.util.List; -import java.util.ArrayList; - -public class SubstitutionEvaluation implements VariantEvaluation { - private NamedTable substitutions; - - public SubstitutionEvaluation() { - substitutions = new NamedTable(); - } - - public void update(RodVCF variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if (variant != null) { - List altAlleles = variant.getAlternateAlleleList(); - String altAllele = altAlleles.get(0); - String refAllele = String.format("%c", BaseUtils.baseIndexToSimpleBase(ref.getBaseIndex())); - - substitutions.increment(refAllele, altAllele); - } - } - - public String getName() { - return "Substitution"; - } - - public List getResult() { - ArrayList results = new ArrayList(); - - int transitions = 0; - int transversions = 0; - int unknown = 0; - - for (char rbase : BaseUtils.BASES) { - String refAllele = String.format("%c", rbase); - - for (char abase : BaseUtils.BASES) { - String altAllele = String.format("%c", abase); - - if (BaseUtils.isTransition((byte)rbase, (byte)abase)) { - transitions += substitutions.get(refAllele, altAllele); - } else if (BaseUtils.isTransversion((byte)rbase, (byte)abase)) { - transversions += substitutions.get(refAllele, altAllele); - } else { - unknown += substitutions.get(refAllele, altAllele); - } - } - } - - results.add(String.format("%n%s", substitutions.toString())); - results.add(String.format("transitions=%d", transitions)); - results.add(String.format("transversions=%d", transversions)); - results.add(String.format("unknown=%d", unknown)); - results.add(String.format("titv=%f", ((double) transitions)/((double) transversions))); - - return results; - } -} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/newvarianteval/VariantEvaluation.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/newvarianteval/VariantEvaluation.java deleted file mode 100755 index 92b4b1d1e..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/newvarianteval/VariantEvaluation.java +++ /dev/null @@ -1,18 +0,0 @@ -package org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval; - -import org.broadinstitute.sting.gatk.refdata.RodVCF; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; - -import java.util.List; - -public interface VariantEvaluation { - public void update(RodVCF variant, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context); - - //public Integer map( - - public String getName(); - - public List getResult(); -} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java index 7062c6e84..f5a33042e 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java @@ -257,6 +257,7 @@ public class VariantEvalWalker extends RodWalker { } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + //System.out.printf("Tracker at %s is %s, ref is %s, skip is %d mapped is %d%n", context.getLocation(), tracker, ref, context.getSkippedBases(), nMappedSites); nMappedSites += context.getSkippedBases(); //System.out.printf("Tracker at %s is %s, ref is %s%n", context.getLocation(), tracker, ref);