From 825e6c7a4df3f85afdf8a582936b199c450c1df8 Mon Sep 17 00:00:00 2001 From: kcibul Date: Wed, 14 Oct 2009 20:32:26 +0000 Subject: [PATCH] added calculation for bases over 2x,10x,20x,30x plus gene name git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1844 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/HybSelPerformanceWalker.java | 67 ++++++++++++++++--- 1 file changed, 59 insertions(+), 8 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HybSelPerformanceWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HybSelPerformanceWalker.java index 05bd37ec5..ea9fff83a 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HybSelPerformanceWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HybSelPerformanceWalker.java @@ -5,10 +5,11 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.By; import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.Pair; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import java.util.List; @@ -40,20 +41,41 @@ public class HybSelPerformanceWalker extends LocusWalker refseqIterator=null; + public static class TargetInfo { public int counts = 0; // did at least two reads hit this target public boolean hitTwice = false; - // TODO: track max and min? - // TODO: median rather than average? - // TODO: bin into segments? (requires knowing position) + public int positionsOver2x = 0; + public int positionsOver10x = 0; + public int positionsOver20x = 0; + public int positionsOver30x = 0; } // @Argument(fullName="suppressLocusPrinting",required=false,defaultValue="false") // public boolean suppressPrinting; + @Override + public void initialize() { + if ( REFSEQ_FILE != null ) { + ReferenceOrderedData refseq = new ReferenceOrderedData("refseq", + new java.io.File(REFSEQ_FILE), rodRefSeq.class); + + refseqIterator = refseq.iterator(); + logger.info("Using RefSeq annotations from "+REFSEQ_FILE); + } + + if ( refseqIterator == null ) logger.info("No annotations available"); + + } + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { List reads = context.getReads(); @@ -91,7 +113,10 @@ public class HybSelPerformanceWalker extends LocusWalker= 2) { sum.hitTwice = true; } + if (value >= 2) { sum.hitTwice = true; sum.positionsOver2x++;} + if (value >= 10) { sum.positionsOver10x++; } + if (value >= 20) { sum.positionsOver20x++; } + if (value >= 30) { sum.positionsOver30x++; } return sum; } @@ -100,7 +125,7 @@ public class HybSelPerformanceWalker extends LocusWalker> results) { - out.println("location\tlength\tgc\tavg_coverage\tnormalized_coverage\thit_twice\tfreestanding\tboosted"); + out.println("location\tlength\tgc\tavg_coverage\tnormalized_coverage\thit_twice\tfreestanding\tboosted\tbases_over_2x\tbases_over_10x\tbases_over_20x\tbases_over_30x\tgene_name"); // first zip through and build an overlap detector of all the intervals, so later // we can calculate if this interval is free-standing @@ -157,9 +182,19 @@ public class HybSelPerformanceWalker extends LocusWalker 0); - out.printf("%s:%d-%d\t%d\t%6.4f\t%6.4f\t%6.4f\t%d\t%d\t%d\n", + // look up the gene name info + String geneName = getGeneName(target); + + + + out.printf("%s:%d-%d\t%d\t%6.4f\t%6.4f\t%6.4f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%s\n", target.getContig(), target.getStart(), target.getStop(), length, gc, - avgCoverage, normCoverage, ((ti.hitTwice)?1:0), ((freestanding)?1:0), ((boosted)?1:0) + avgCoverage, normCoverage, ((ti.hitTwice)?1:0), ((freestanding)?1:0), ((boosted)?1:0), + ti.positionsOver2x, + ti.positionsOver10x, + ti.positionsOver20x, + ti.positionsOver30x, + geneName ); @@ -170,6 +205,22 @@ public class HybSelPerformanceWalker extends LocusWalker annotationList = refseqIterator.seekForward(target); + if (annotationList == null) { return ""; } + + for(rodRefSeq rec : annotationList) { + if ( rec.overlapsExonP(target) ) { + return rec.getGeneName(); + } + } + + return ""; + + } + IndexedFastaSequenceFile seqFile = null; private double calculateGC(GenomeLoc target) {