Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2011-08-30 08:34:01 -04:00
commit e040ea5c0d
18 changed files with 437 additions and 96 deletions

View File

@ -0,0 +1,154 @@
library(gsalib)
require("ggplot2")
require("gplots")
#
# Standard command line switch. Can we loaded interactively for development
# or executed with RScript
#
args = commandArgs(TRUE)
onCMDLine = ! is.na(args[1])
if ( onCMDLine ) {
inputFileName = args[1]
outputPDF = args[2]
} else {
inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
outputPDF = NA
}
RUNTIME_UNITS = "(sec)"
ORIGINAL_UNITS_TO_SECONDS = 1/1000
#
# Helper function to aggregate all of the jobs in the report across all tables
#
allJobsFromReport <- function(report) {
names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts")
sub <- lapply(report, function(table) table[,names])
do.call("rbind", sub)
}
#
# Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job
#
plotJobsGantt <- function(gatkReport, sortOverall) {
allJobs = allJobsFromReport(gatkReport)
if ( sortOverall ) {
title = "All jobs, by analysis, by start time"
allJobs = allJobs[order(allJobs$analysisName, allJobs$startTime, decreasing=T), ]
} else {
title = "All jobs, sorted by start time"
allJobs = allJobs[order(allJobs$startTime, decreasing=T), ]
}
allJobs$index = 1:nrow(allJobs)
minTime = min(allJobs$startTime)
allJobs$relStartTime = allJobs$startTime - minTime
allJobs$relDoneTime = allJobs$doneTime - minTime
allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts)
maxRelTime = max(allJobs$relDoneTime)
p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName))
p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=2, arrow=arrow(length = unit(0.1, "cm")))
p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
p <- p + xlim(0, maxRelTime * 1.1)
p <- p + xlab(paste("Start time (relative to first job)", RUNTIME_UNITS))
p <- p + ylab("Job")
p <- p + opts(title=title)
print(p)
}
#
# Plots scheduling efficiency at job events
#
plotProgressByTime <- function(gatkReport) {
allJobs = allJobsFromReport(gatkReport)
nJobs = dim(allJobs)[1]
allJobs = allJobs[order(allJobs$startTime, decreasing=F),]
allJobs$index = 1:nrow(allJobs)
minTime = min(allJobs$startTime)
allJobs$relStartTime = allJobs$startTime - minTime
allJobs$relDoneTime = allJobs$doneTime - minTime
times = sort(c(allJobs$relStartTime, allJobs$relDoneTime))
countJobs <- function(p) {
s = allJobs$relStartTime
e = allJobs$relDoneTime
x = c() # I wish I knew how to make this work with apply
for ( time in times )
x = c(x, sum(p(s, e, time)))
x
}
pending = countJobs(function(s, e, t) s > t)
done = countJobs(function(s, e, t) e < t)
running = nJobs - pending - done
d = data.frame(times=times, pending=pending, running=running, done=done)
p <- ggplot(data=melt(d, id.vars=c("times")), aes(x=times, y=value, color=variable))
p <- p + facet_grid(variable ~ ., scales="free")
p <- p + geom_line(size=2)
p <- p + xlab(paste("Time since start of first job", RUNTIME_UNITS))
p <- p + opts(title = "Job scheduling")
print(p)
}
#
# Creates tables for each job in this group
#
standardColumns = c("jobName", "startTime", "formattedStartTime", "analysisName", "intermediate", "exechosts", "formattedDoneTime", "doneTime", "runtime")
plotGroup <- function(groupTable) {
name = unique(groupTable$analysisName)[1]
groupAnnotations = setdiff(names(groupTable), standardColumns)
sub = groupTable[,c("jobName", groupAnnotations, "runtime")]
sub = sub[order(sub$iteration, sub$jobName, decreasing=F), ]
# create a table showing each job and all annotations
textplot(sub, show.rownames=F)
title(paste("Job summary for", name, "full itemization"), cex=3)
# create the table for each combination of values in the group, listing iterations in the columns
sum = cast(melt(sub, id.vars=groupAnnotations, measure.vars=c("runtime")), ... ~ iteration, fun.aggregate=mean)
textplot(as.data.frame(sum), show.rownames=F)
title(paste("Job summary for", name, "itemizing each iteration"), cex=3)
# as above, but averaging over all iterations
groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration")
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
textplot(as.data.frame(sum), show.rownames=F)
title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
}
# print out some useful basic information
print("Report")
print(paste("Project :", inputFileName))
convertUnits <- function(gatkReportData) {
convertGroup <- function(g) {
g$runtime = g$runtime * ORIGINAL_UNITS_TO_SECONDS
g
}
lapply(gatkReportData, convertGroup)
}
# read the table
gatkReportData <- gsa.read.gatkreport(inputFileName)
gatkReportData <- convertUnits(gatkReportData)
#print(summary(gatkReportData))
if ( ! is.na(outputPDF) ) {
pdf(outputPDF, height=8.5, width=11)
}
plotJobsGantt(gatkReportData, T)
plotJobsGantt(gatkReportData, F)
plotProgressByTime(gatkReportData)
for ( group in gatkReportData ) {
plotGroup(group)
}
if ( ! is.na(outputPDF) ) {
dev.off()
}

View File

@ -303,7 +303,10 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
uac.alleles = alleles;
if (!bamIsTruth) uac.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
if (mbq >= 0) uac.MIN_BASE_QUALTY_SCORE = mbq;
if (deletions >= 0) uac.MAX_DELETION_FRACTION = deletions;
if (deletions >= 0)
uac.MAX_DELETION_FRACTION = deletions;
else
uac.MAX_DELETION_FRACTION = 1.0;
if (emitConf >= 0) uac.STANDARD_CONFIDENCE_FOR_EMITTING = emitConf;
if (callConf >= 0) uac.STANDARD_CONFIDENCE_FOR_CALLING = callConf;
@ -347,12 +350,14 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
}
VariantCallContext call;
if ( vcComp.isSNP() )
if ( vcComp.isSNP() ) {
call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
else if ( vcComp.isIndel() ) {
} else if ( vcComp.isIndel() ) {
call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
}
else {
} else if ( bamIsTruth ) {
// assume it's a SNP if no variation is present; this is necessary so that we can test supposed monomorphic sites against the truth bam
call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
} else {
logger.info("Not SNP or INDEL " + vcComp.getChr() + ":" + vcComp.getStart() + " " + vcComp.getAlleles());
return counter;
}

View File

@ -27,7 +27,6 @@ package org.broadinstitute.sting.utils;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import org.apache.lucene.messages.NLS;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -409,6 +408,17 @@ public class MathUtils {
return Math.sqrt(rms);
}
public static double rms(Collection<Double> l) {
if (l.size() == 0)
return 0.0;
double rms = 0.0;
for (Double i : l)
rms += i*i;
rms /= l.size();
return Math.sqrt(rms);
}
public static double distanceSquared( final double[] x, final double[] y ) {
double dist = 0.0;
for(int iii = 0; iii < x.length; iii++) {

View File

@ -5,15 +5,11 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.apache.poi.hssf.record.PageBreakRecord;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.io.PipedOutputStream;
import java.lang.reflect.Array;
import java.util.Iterator;
import java.util.List;
import java.util.Stack;
import java.util.Vector;
/**
@ -256,13 +252,17 @@ public class ClippingOp {
if (start == 0 && stop == read.getReadLength() -1)
return new SAMRecord(read.getHeader());
int newLength = read.getReadLength() - (stop - start + 1);
CigarShift cigarShift = hardClipCigar(read.getCigar(), start, stop);
// the cigar may force a shift left or right (or both) in case we are left with insertions
// starting or ending the read after applying the hard clip on start/stop.
int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd;
byte [] newBases = new byte[newLength];
byte [] newQuals = new byte[newLength];
int copyStart = (start == 0) ? stop + 1 : 0;
int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength); Cigar newCigar = hardClipCigar(read.getCigar(), start, stop);
System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength);
SAMRecord hardClippedRead;
try {
@ -273,19 +273,20 @@ public class ClippingOp {
hardClippedRead.setBaseQualities(newQuals);
hardClippedRead.setReadBases(newBases);
hardClippedRead.setCigar(newCigar);
hardClippedRead.setCigar(cigarShift.cigar);
if (start == 0)
hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), newCigar));
hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar));
return hardClippedRead;
}
@Requires({"!cigar.isEmpty()"})
private Cigar hardClipCigar (Cigar cigar, int start, int stop) {
private CigarShift hardClipCigar (Cigar cigar, int start, int stop) {
Cigar newCigar = new Cigar();
int index = 0;
int totalHardClipCount = stop - start + 1;
int alignmentShift = 0; // caused by hard clipping insertions or deletions
// hard clip the beginning of the cigar string
if (start == 0) {
@ -307,15 +308,19 @@ public class ClippingOp {
// we're still clipping or just finished perfectly
if (index + shift == stop + 1) {
newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP));
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
}
// element goes beyond what we need to clip
else if (index + shift > stop + 1) {
int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1);
newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP));
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop-index+1);
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
}
index += shift;
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, shift);
if (index <= stop && cigarElementIterator.hasNext())
cigarElement = cigarElementIterator.next();
}
@ -345,6 +350,8 @@ public class ClippingOp {
// element goes beyond our clip starting position
else {
int elementLengthAfterChopping = start - index;
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength() - (start - index));
// if this last element is a HARD CLIP operator, just merge it with our hard clip operator to be added later
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
totalHardClipCount += elementLengthAfterChopping;
@ -356,9 +363,67 @@ public class ClippingOp {
if (index < start && cigarElementIterator.hasNext())
cigarElement = cigarElementIterator.next();
}
newCigar.add(new CigarElement(totalHardClipCount, CigarOperator.HARD_CLIP));
// check if we are hard clipping indels
while(cigarElementIterator.hasNext()) {
cigarElement = cigarElementIterator.next();
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
}
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
}
return newCigar;
return cleanHardClippedCigar(newCigar);
}
/**
* Checks if a hard clipped cigar left a read starting or ending with insertions/deletions
* and cleans it up accordingly.
*
* @param cigar
* @return
*/
private CigarShift cleanHardClippedCigar(Cigar cigar) {
Cigar cleanCigar = new Cigar();
int shiftFromStart = 0;
int shiftFromEnd = 0;
Stack<CigarElement> cigarStack = new Stack<CigarElement>();
Stack<CigarElement> inverseCigarStack = new Stack<CigarElement>();
for (CigarElement cigarElement : cigar.getCigarElements())
cigarStack.push(cigarElement);
for (int i = 1; i <= 2; i++) {
int shift = 0;
boolean readHasStarted = false;
while(!cigarStack.empty()) {
CigarElement cigarElement = cigarStack.pop();
if ( !readHasStarted &&
cigarElement.getOperator() != CigarOperator.INSERTION &&
cigarElement.getOperator() != CigarOperator.DELETION &&
cigarElement.getOperator() != CigarOperator.HARD_CLIP)
readHasStarted = true;
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.INSERTION)
shift += cigarElement.getLength();
if (readHasStarted || cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
if (i==1)
inverseCigarStack.push(cigarElement);
else
cleanCigar.add(cigarElement);
}
}
// first pass (i=1) is from end to start of the cigar elements
if (i == 1) {
shiftFromEnd = shift;
cigarStack = inverseCigarStack;
}
// second pass (i=2) is from start to end with the end already cleaned
else {
shiftFromStart = shift;
}
}
return new CigarShift(cleanCigar, shiftFromStart, shiftFromEnd);
}
private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) {
@ -379,6 +444,34 @@ public class ClippingOp {
else
break;
}
return shift;
}
}
private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) {
if (cigarElement.getOperator() == CigarOperator.INSERTION) {
int cigarElementLength = cigarElement.getLength();
if (clippedLength >= cigarElementLength)
return -cigarElement.getLength();
else
return -clippedLength;
}
if (cigarElement.getOperator() == CigarOperator.DELETION)
return cigarElement.getLength();
return 0;
}
private class CigarShift {
private Cigar cigar;
private int shiftFromStart;
private int shiftFromEnd;
private CigarShift(Cigar cigar, int shiftFromStart, int shiftFromEnd) {
this.cigar = cigar;
this.shiftFromStart = shiftFromStart;
this.shiftFromEnd = shiftFromEnd;
}
}
}

View File

@ -1,11 +1,9 @@
package org.broadinstitute.sting.utils.clipreads;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import org.broad.tribble.util.PositionalStream;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.jets3t.service.multi.ThreadedStorageService;
import java.util.ArrayList;
import java.util.List;
@ -50,13 +48,26 @@ public class ReadClipper {
return read;
}
public SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
public SAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) {
return hardClipByReferenceCoordinates(-1, refStop);
}
public SAMRecord hardClipByReferenceCoordinatesRightTail(int refStart) {
return hardClipByReferenceCoordinates(refStart, -1);
}
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
System.out.println("Clipping start/stop: " + start + "/" + stop);
if (start < 0 || stop > read.getReadLength() - 1)
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
//System.out.println("Clipping start/stop: " + start + "/" + stop);
this.addOp(new ClippingOp(start, stop));
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
this.ops = null;
return clippedRead;
}
public SAMRecord hardClipByReadCoordinates(int start, int stop) {
@ -64,10 +75,12 @@ public class ReadClipper {
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
@Requires("left <= right")
public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
this.read = hardClipByReferenceCoordinates(-1, left);
this.ops = null; // reset the operations
return hardClipByReferenceCoordinates(right, -1);
if (left == right)
return new SAMRecord(read.getHeader());
this.read = hardClipByReferenceCoordinates(right, -1);
return hardClipByReferenceCoordinates(-1, left);
}
public SAMRecord hardClipLowQualEnds(byte lowQual) {

View File

@ -116,10 +116,26 @@ public class VCFUtils {
return fields;
}
/** Only displays a warning if a logger is provided and an identical warning hasn't been already issued */
private static final class HeaderConflictWarner {
Logger logger;
Set<String> alreadyIssued = new HashSet<String>();
private HeaderConflictWarner(final Logger logger) {
this.logger = logger;
}
public void warn(final VCFHeaderLine line, final String msg) {
if ( logger != null && ! alreadyIssued.contains(line.getKey()) ) {
alreadyIssued.add(line.getKey());
logger.warn(msg);
}
}
}
public static Set<VCFHeaderLine> smartMergeHeaders(Collection<VCFHeader> headers, Logger logger) throws IllegalStateException {
HashMap<String, VCFHeaderLine> map = new HashMap<String, VCFHeaderLine>(); // from KEY.NAME -> line
HeaderConflictWarner conflictWarner = new HeaderConflictWarner(logger);
// todo -- needs to remove all version headers from sources and add its own VCF version line
for ( VCFHeader source : headers ) {
@ -152,24 +168,24 @@ public class VCFUtils {
// number, then this value should be 1. However, if the INFO field describes a pair
// of numbers, then this value should be 2 and so on. If the number of possible
// values varies, is unknown, or is unbounded, then this value should be '.'.
if ( logger != null ) logger.warn("Promoting header field Number to . due to number differences in header lines: " + line + " " + other);
conflictWarner.warn(line, "Promoting header field Number to . due to number differences in header lines: " + line + " " + other);
compOther.setNumberToUnbounded();
} else if ( compLine.getType() == VCFHeaderLineType.Integer && compOther.getType() == VCFHeaderLineType.Float ) {
// promote key to Float
if ( logger != null ) logger.warn("Promoting Integer to Float in header: " + compOther);
conflictWarner.warn(line, "Promoting Integer to Float in header: " + compOther);
map.put(key, compOther);
} else if ( compLine.getType() == VCFHeaderLineType.Float && compOther.getType() == VCFHeaderLineType.Integer ) {
// promote key to Float
if ( logger != null ) logger.warn("Promoting Integer to Float in header: " + compOther);
conflictWarner.warn(line, "Promoting Integer to Float in header: " + compOther);
} else {
throw new IllegalStateException("Incompatible header types, collision between these two types: " + line + " " + other );
}
}
if ( ! compLine.getDescription().equals(compOther) )
if ( logger != null ) logger.warn("Allowing unequal description fields through: keeping " + compOther + " excluding " + compLine);
conflictWarner.warn(line, "Allowing unequal description fields through: keeping " + compOther + " excluding " + compLine);
} else {
// we are not equal, but we're not anything special either
if ( logger != null ) logger.warn("Ignoring header line already in map: this header line = " + line + " already present header = " + other);
conflictWarner.warn(line, "Ignoring header line already in map: this header line = " + line + " already present header = " + other);
}
} else {
map.put(key, line);

View File

@ -632,7 +632,7 @@ public class ReadUtils {
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) {
int start = getRefCoordSoftUnclippedStart(read);
int stop = getRefCoordSoftUnclippedStop(read);
int stop = getRefCoordSoftUnclippedEnd(read);
if ( !read.getReferenceName().equals(interval.getContig()) )
return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
@ -658,6 +658,7 @@ public class ReadUtils {
return ReadAndIntervalOverlap.OVERLAP_RIGHT;
}
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"})
public static int getRefCoordSoftUnclippedStart(SAMRecord read) {
int start = read.getUnclippedStart();
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
@ -669,51 +670,98 @@ public class ReadUtils {
return start;
}
public static int getRefCoordSoftUnclippedStop(SAMRecord read) {
int stop = read.getAlignmentEnd();
List<CigarElement> cigarElementList = read.getCigar().getCigarElements();
CigarElement lastCigarElement = cigarElementList.get(cigarElementList.size()-1);
if (lastCigarElement.getOperator() == CigarOperator.SOFT_CLIP)
stop += lastCigarElement.getLength();
return stop;
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"})
public static int getRefCoordSoftUnclippedEnd(SAMRecord read) {
int stop = read.getUnclippedStart();
int shift = 0;
CigarOperator lastOperator = null;
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
stop += shift;
lastOperator = cigarElement.getOperator();
if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP || cigarElement.getOperator() == CigarOperator.HARD_CLIP)
shift = cigarElement.getLength();
else
shift = 0;
}
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
}
/**
* Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region before
* the alignment start of the read.
*
* @param read
* @param refCoord
* @return the corresponding read coordinate or -1 if it failed to find it (it has been hard clipped before)
*/
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord < read.getAlignmentStart()"})
private static int getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(SAMRecord read, int refCoord) {
if (getRefCoordSoftUnclippedStart(read) <= refCoord)
return refCoord - getRefCoordSoftUnclippedStart(read) + 1;
return -1;
}
/**
* Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region after
* the alignment end of the read.
*
* @param read
* @param refCoord
* @return the corresponding read coordinate or -1 if it failed to find it (it has been hard clipped before)
*/
@Requires({"refCoord <= read.getUnclippedEnd()", "refCoord > read.getAlignmentEnd()"})
private static int getReadCoordinateForReferenceCoordinateBeforeAlignmentEnd(SAMRecord read, int refCoord) {
if (getRefCoordSoftUnclippedEnd(read) >= refCoord)
return refCoord - getRefCoordSoftUnclippedStart(read) + 1;
return -1;
}
@Requires({"refCoord >= read.getAlignmentStart()", "refCoord <= read.getAlignmentEnd()"})
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
@Ensures({"result >= 0", "result < read.getReadLength()"})
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
int readBases = 0;
int refBases = 0;
int goal = refCoord - read.getAlignmentStart(); // The goal is to move this many reference bases
boolean goalReached = refBases == goal;
Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();
while (!goalReached && cigarElementIterator.hasNext()) {
CigarElement cigarElement = cigarElementIterator.next();
int shift = 0;
//if (refBases == 0 && readBases == 0 && cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
// goal -= cigarElement.getLength();
//}
if (cigarElement.getOperator().consumesReferenceBases()) {
if (refBases + cigarElement.getLength() < goal) {
shift = cigarElement.getLength();
}
else {
shift = goal - refBases;
}
refBases += shift;
}
goalReached = refBases == goal;
if (cigarElement.getOperator().consumesReadBases()) {
readBases += goalReached ? shift : cigarElement.getLength();
}
if (refCoord < read.getAlignmentStart()) {
readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(read, refCoord);
if (readBases < 0)
throw new ReviewedStingException("Requested a coordinate in a hard clipped area of the read. No equivalent read coordinate.");
}
else if (refCoord > read.getAlignmentEnd()) {
readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentEnd(read, refCoord);
if (readBases < 0)
throw new ReviewedStingException("Requested a coordinate in a hard clipped area of the read. No equivalent read coordinate.");
}
else {
int goal = refCoord - read.getAlignmentStart(); // The goal is to move this many reference bases
boolean goalReached = refBases == goal;
if (!goalReached)
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();
while (!goalReached && cigarElementIterator.hasNext()) {
CigarElement cigarElement = cigarElementIterator.next();
int shift = 0;
if (cigarElement.getOperator().consumesReferenceBases()) {
if (refBases + cigarElement.getLength() < goal) {
shift = cigarElement.getLength();
}
else {
shift = goal - refBases;
}
refBases += shift;
}
goalReached = refBases == goal;
if (cigarElement.getOperator().consumesReadBases()) {
readBases += goalReached ? shift : cigarElement.getLength();
}
}
if (!goalReached)
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
}
return readBases;
}

View File

@ -40,6 +40,8 @@ import java.util.EnumSet;
* Test suite for the parsing engine.
*/
public class ParsingEngineUnitTest extends BaseTest {
/** we absolutely cannot have this file existing, or we'll fail the UnitTest */
private final static String NON_EXISTANT_FILENAME_VCF = "this_file_should_not_exist_on_disk_123456789.vcf";
private ParsingEngine parsingEngine;
@BeforeMethod
@ -636,7 +638,7 @@ public class ParsingEngineUnitTest extends BaseTest {
@Test
public void basicRodBindingArgumentTest() {
final String[] commandLine = new String[] {"-V:vcf","foo.vcf"};
final String[] commandLine = new String[] {"-V:vcf",NON_EXISTANT_FILENAME_VCF};
parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class );
parsingEngine.parse( commandLine );
@ -646,7 +648,7 @@ public class ParsingEngineUnitTest extends BaseTest {
parsingEngine.loadArgumentsIntoObject( argProvider );
Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly");
Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value");
Assert.assertEquals(argProvider.binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value");
Assert.assertEquals(argProvider.binding.getType(), Feature.class, "Type isn't set to its expected value");
Assert.assertEquals(argProvider.binding.isBound(), true, "Bound() isn't returning its expected value");
Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
@ -659,7 +661,7 @@ public class ParsingEngineUnitTest extends BaseTest {
@Test
public void shortNameOnlyRodBindingArgumentTest() {
final String[] commandLine = new String[] {"-short:vcf","foo.vcf"};
final String[] commandLine = new String[] {"-short:vcf",NON_EXISTANT_FILENAME_VCF};
parsingEngine.addArgumentSource( ShortNameOnlyRodBindingArgProvider.class );
parsingEngine.parse( commandLine );
@ -669,7 +671,7 @@ public class ParsingEngineUnitTest extends BaseTest {
parsingEngine.loadArgumentsIntoObject( argProvider );
Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly");
Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value");
Assert.assertEquals(argProvider.binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value");
Assert.assertEquals(argProvider.binding.getType(), Feature.class, "Type isn't set to its expected value");
Assert.assertEquals(argProvider.binding.isBound(), true, "Bound() isn't returning its expected value");
Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
@ -711,7 +713,7 @@ public class ParsingEngineUnitTest extends BaseTest {
@Test(expectedExceptions = UserException.class)
public void rodBindingArgumentTestMissingType() {
final String[] commandLine = new String[] {"-V","foo.vcf"};
final String[] commandLine = new String[] {"-V",NON_EXISTANT_FILENAME_VCF};
parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class );
parsingEngine.parse( commandLine );
@ -723,7 +725,7 @@ public class ParsingEngineUnitTest extends BaseTest {
@Test(expectedExceptions = UserException.class)
public void rodBindingArgumentTestTooManyTags() {
final String[] commandLine = new String[] {"-V:x,y,z","foo.vcf"};
final String[] commandLine = new String[] {"-V:x,y,z",NON_EXISTANT_FILENAME_VCF};
parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class );
parsingEngine.parse( commandLine );
@ -740,7 +742,7 @@ public class ParsingEngineUnitTest extends BaseTest {
@Test
public void variantContextBindingArgumentTest() {
final String[] commandLine = new String[] {"-V:vcf","foo.vcf"};
final String[] commandLine = new String[] {"-V:vcf",NON_EXISTANT_FILENAME_VCF};
parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class );
parsingEngine.parse( commandLine );
@ -750,14 +752,14 @@ public class ParsingEngineUnitTest extends BaseTest {
parsingEngine.loadArgumentsIntoObject( argProvider );
Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly");
Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value");
Assert.assertEquals(argProvider.binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value");
Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value");
Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
}
@Test
public void variantContextBindingArgumentTestVCF3() {
final String[] commandLine = new String[] {"-V:vcf3","foo.vcf"};
final String[] commandLine = new String[] {"-V:vcf3",NON_EXISTANT_FILENAME_VCF};
parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class );
parsingEngine.parse( commandLine );
@ -767,7 +769,7 @@ public class ParsingEngineUnitTest extends BaseTest {
parsingEngine.loadArgumentsIntoObject( argProvider );
Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly");
Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value");
Assert.assertEquals(argProvider.binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value");
Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value");
Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
}
@ -779,7 +781,7 @@ public class ParsingEngineUnitTest extends BaseTest {
@Test
public void listRodBindingArgumentTest() {
final String[] commandLine = new String[] {"-V:vcf","foo.vcf"};
final String[] commandLine = new String[] {"-V:vcf",NON_EXISTANT_FILENAME_VCF};
parsingEngine.addArgumentSource( ListRodBindingArgProvider.class );
parsingEngine.parse( commandLine );
@ -791,14 +793,14 @@ public class ParsingEngineUnitTest extends BaseTest {
Assert.assertEquals(argProvider.bindings.size(), 1, "Unexpected number of bindings");
RodBinding<Feature> binding = argProvider.bindings.get(0);
Assert.assertEquals(binding.getName(), "binding", "Name isn't set properly");
Assert.assertEquals(binding.getSource(), "foo.vcf", "Source isn't set to its expected value");
Assert.assertEquals(binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value");
Assert.assertEquals(binding.getType(), Feature.class, "Type isn't set to its expected value");
Assert.assertEquals(binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
}
@Test
public void listRodBindingArgumentTest2Args() {
final String[] commandLine = new String[] {"-V:vcf","foo.vcf", "-V:vcf", "bar.vcf"};
final String[] commandLine = new String[] {"-V:vcf",NON_EXISTANT_FILENAME_VCF, "-V:vcf", "bar.vcf"};
parsingEngine.addArgumentSource( ListRodBindingArgProvider.class );
parsingEngine.parse( commandLine );
@ -811,7 +813,7 @@ public class ParsingEngineUnitTest extends BaseTest {
RodBinding<Feature> binding = argProvider.bindings.get(0);
Assert.assertEquals(binding.getName(), "binding", "Name isn't set properly");
Assert.assertEquals(binding.getSource(), "foo.vcf", "Source isn't set to its expected value");
Assert.assertEquals(binding.getSource(), NON_EXISTANT_FILENAME_VCF, "Source isn't set to its expected value");
Assert.assertEquals(binding.getType(), Feature.class, "Type isn't set to its expected value");
Assert.assertEquals(binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set");
@ -838,7 +840,7 @@ public class ParsingEngineUnitTest extends BaseTest {
@Test
public void listRodBindingArgumentTestExplicitlyNamed() {
final String[] commandLine = new String[] {"-V:foo,vcf","foo.vcf", "-V:foo,vcf", "bar.vcf"};
final String[] commandLine = new String[] {"-V:foo,vcf",NON_EXISTANT_FILENAME_VCF, "-V:foo,vcf", "bar.vcf"};
parsingEngine.addArgumentSource( ListRodBindingArgProvider.class );
parsingEngine.parse( commandLine );

View File

@ -1,3 +1,3 @@
<ivy-module version="1.0">
<info organisation="edu.mit.broad" module="picard-private-parts" revision="1959" status="integration" publication="20110718185300" />
<info organisation="edu.mit.broad" module="picard-private-parts" revision="2034" status="integration" publication="20110718185300" />
</ivy-module>

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="picard" revision="1.49.895" status="release" />
</ivy-module>

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="picard" revision="1.52.944" status="release" />
</ivy-module>

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="sam" revision="1.49.895" status="release" />
</ivy-module>

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="net.sf" module="sam" revision="1.52.944" status="release" />
</ivy-module>

View File

@ -1,3 +1,3 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="18" status="integration" />
<info organisation="org.broad" module="tribble" revision="21" status="integration" />
</ivy-module>