added calculation for bases over 2x,10x,20x,30x plus gene name
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1844 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
727b69fce0
commit
825e6c7a4d
|
|
@ -5,10 +5,11 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.By;
|
import org.broadinstitute.sting.gatk.walkers.By;
|
||||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
import org.broadinstitute.sting.utils.Pair;
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -40,20 +41,41 @@ public class HybSelPerformanceWalker extends LocusWalker<Integer, HybSelPerforma
|
||||||
@Argument(fullName="booster_distance", required=false, doc="distance up to which a booster can affect a target")
|
@Argument(fullName="booster_distance", required=false, doc="distance up to which a booster can affect a target")
|
||||||
public Integer BOOSTER_DISTANCE = 100; // how far away can a booster be to "hit" its target?
|
public Integer BOOSTER_DISTANCE = 100; // how far away can a booster be to "hit" its target?
|
||||||
|
|
||||||
|
@Argument(fullName="refseq", shortName="refseq",
|
||||||
|
doc="Name of RefSeq transcript annotation file. If specified, intervals will be specified with gene names", required=false)
|
||||||
|
String REFSEQ_FILE = null;
|
||||||
|
|
||||||
|
private SeekableRODIterator<rodRefSeq> refseqIterator=null;
|
||||||
|
|
||||||
public static class TargetInfo {
|
public static class TargetInfo {
|
||||||
public int counts = 0;
|
public int counts = 0;
|
||||||
|
|
||||||
// did at least two reads hit this target
|
// did at least two reads hit this target
|
||||||
public boolean hitTwice = false;
|
public boolean hitTwice = false;
|
||||||
|
|
||||||
// TODO: track max and min?
|
public int positionsOver2x = 0;
|
||||||
// TODO: median rather than average?
|
public int positionsOver10x = 0;
|
||||||
// TODO: bin into segments? (requires knowing position)
|
public int positionsOver20x = 0;
|
||||||
|
public int positionsOver30x = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// @Argument(fullName="suppressLocusPrinting",required=false,defaultValue="false")
|
// @Argument(fullName="suppressLocusPrinting",required=false,defaultValue="false")
|
||||||
// public boolean suppressPrinting;
|
// public boolean suppressPrinting;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initialize() {
|
||||||
|
if ( REFSEQ_FILE != null ) {
|
||||||
|
ReferenceOrderedData<rodRefSeq> refseq = new ReferenceOrderedData<rodRefSeq>("refseq",
|
||||||
|
new java.io.File(REFSEQ_FILE), rodRefSeq.class);
|
||||||
|
|
||||||
|
refseqIterator = refseq.iterator();
|
||||||
|
logger.info("Using RefSeq annotations from "+REFSEQ_FILE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( refseqIterator == null ) logger.info("No annotations available");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
List<SAMRecord> reads = context.getReads();
|
List<SAMRecord> reads = context.getReads();
|
||||||
|
|
||||||
|
|
@ -91,7 +113,10 @@ public class HybSelPerformanceWalker extends LocusWalker<Integer, HybSelPerforma
|
||||||
|
|
||||||
public TargetInfo reduce(Integer value, TargetInfo sum) {
|
public TargetInfo reduce(Integer value, TargetInfo sum) {
|
||||||
sum.counts += value;
|
sum.counts += value;
|
||||||
if (value >= 2) { sum.hitTwice = true; }
|
if (value >= 2) { sum.hitTwice = true; sum.positionsOver2x++;}
|
||||||
|
if (value >= 10) { sum.positionsOver10x++; }
|
||||||
|
if (value >= 20) { sum.positionsOver20x++; }
|
||||||
|
if (value >= 30) { sum.positionsOver30x++; }
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -100,7 +125,7 @@ public class HybSelPerformanceWalker extends LocusWalker<Integer, HybSelPerforma
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onTraversalDone(List<Pair<GenomeLoc, TargetInfo>> results) {
|
public void onTraversalDone(List<Pair<GenomeLoc, TargetInfo>> results) {
|
||||||
out.println("location\tlength\tgc\tavg_coverage\tnormalized_coverage\thit_twice\tfreestanding\tboosted");
|
out.println("location\tlength\tgc\tavg_coverage\tnormalized_coverage\thit_twice\tfreestanding\tboosted\tbases_over_2x\tbases_over_10x\tbases_over_20x\tbases_over_30x\tgene_name");
|
||||||
|
|
||||||
// first zip through and build an overlap detector of all the intervals, so later
|
// first zip through and build an overlap detector of all the intervals, so later
|
||||||
// we can calculate if this interval is free-standing
|
// we can calculate if this interval is free-standing
|
||||||
|
|
@ -157,9 +182,19 @@ public class HybSelPerformanceWalker extends LocusWalker<Integer, HybSelPerforma
|
||||||
|
|
||||||
boolean boosted = (booster.getOverlaps(targetInterval).size() > 0);
|
boolean boosted = (booster.getOverlaps(targetInterval).size() > 0);
|
||||||
|
|
||||||
out.printf("%s:%d-%d\t%d\t%6.4f\t%6.4f\t%6.4f\t%d\t%d\t%d\n",
|
// look up the gene name info
|
||||||
|
String geneName = getGeneName(target);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
out.printf("%s:%d-%d\t%d\t%6.4f\t%6.4f\t%6.4f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%s\n",
|
||||||
target.getContig(), target.getStart(), target.getStop(), length, gc,
|
target.getContig(), target.getStart(), target.getStop(), length, gc,
|
||||||
avgCoverage, normCoverage, ((ti.hitTwice)?1:0), ((freestanding)?1:0), ((boosted)?1:0)
|
avgCoverage, normCoverage, ((ti.hitTwice)?1:0), ((freestanding)?1:0), ((boosted)?1:0),
|
||||||
|
ti.positionsOver2x,
|
||||||
|
ti.positionsOver10x,
|
||||||
|
ti.positionsOver20x,
|
||||||
|
ti.positionsOver30x,
|
||||||
|
geneName
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -170,6 +205,22 @@ public class HybSelPerformanceWalker extends LocusWalker<Integer, HybSelPerforma
|
||||||
return new Interval(target.getContig(), (int) target.getStart(), (int) target.getStop());
|
return new Interval(target.getContig(), (int) target.getStart(), (int) target.getStop());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String getGeneName(GenomeLoc target) {
|
||||||
|
if (refseqIterator == null) { return ""; }
|
||||||
|
|
||||||
|
RODRecordList<rodRefSeq> annotationList = refseqIterator.seekForward(target);
|
||||||
|
if (annotationList == null) { return ""; }
|
||||||
|
|
||||||
|
for(rodRefSeq rec : annotationList) {
|
||||||
|
if ( rec.overlapsExonP(target) ) {
|
||||||
|
return rec.getGeneName();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return "";
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
IndexedFastaSequenceFile seqFile = null;
|
IndexedFastaSequenceFile seqFile = null;
|
||||||
|
|
||||||
private double calculateGC(GenomeLoc target) {
|
private double calculateGC(GenomeLoc target) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue