Input
*
diff --git a/public/java/src/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSession.java b/public/java/src/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSession.java
index 480113e1e..830c6590d 100644
--- a/public/java/src/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSession.java
+++ b/public/java/src/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSession.java
@@ -210,13 +210,23 @@ public class JnaSession implements Session {
}
public static void setAttribute(Pointer jt, String name, String value) throws DrmaaException {
- checkError(LibDrmaa.drmaa_set_attribute(jt, name, value, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
+ if (getAttrNames().contains(name)) {
+ checkError(LibDrmaa.drmaa_set_attribute(jt, name, value, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
+ }
+ else {
+ throw new InvalidAttributeValueException("Attribute " + name + " is not supported by this implementation of DRMAA");
+ }
}
public static String getAttribute(Pointer jt, String name) throws DrmaaException {
- Memory attrBuffer = new Memory(LibDrmaa.DRMAA_ATTR_BUFFER);
- checkError(LibDrmaa.drmaa_get_attribute(jt, name, attrBuffer, LibDrmaa.DRMAA_ATTR_BUFFER_LEN, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
- return attrBuffer.getString(0);
+ if (getAttrNames().contains(name)) {
+ Memory attrBuffer = new Memory(LibDrmaa.DRMAA_ATTR_BUFFER);
+ checkError(LibDrmaa.drmaa_get_attribute(jt, name, attrBuffer, LibDrmaa.DRMAA_ATTR_BUFFER_LEN, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
+ return attrBuffer.getString(0);
+ }
+ else {
+ throw new InvalidAttributeValueException("Attribute " + name + " is not supported by this implementation of DRMAA");
+ }
}
public static void setVectorAttribute(Pointer jt, String name, Collection values) throws DrmaaException {
diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
index 4d2c26a79..ec82cdef2 100644
--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
@@ -495,4 +495,14 @@ public class GenomeLoc implements Comparable, Serializable, HasGenome
public long sizeOfOverlap( final GenomeLoc that ) {
return ( this.overlapsP(that) ? Math.min( getStop(), that.getStop() ) - Math.max( getStart(), that.getStart() ) + 1L : 0L );
}
+
+ /**
+ * Returns the maximum GenomeLoc of this and other
+ * @param other another non-null genome loc
+ * @return the max of this and other
+ */
+ public GenomeLoc max(final GenomeLoc other) {
+ final int cmp = this.compareTo(other);
+ return cmp == -1 ? other : this;
+ }
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
index a3ffe708c..bf60b4a80 100644
--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
@@ -374,7 +374,7 @@ public final class GenomeLocParser {
int start = 1;
int stop = -1;
- final int colonIndex = str.indexOf(":");
+ final int colonIndex = str.lastIndexOf(":");
if(colonIndex == -1) {
contig = str.substring(0, str.length()); // chr1
stop = Integer.MAX_VALUE;
diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
index 30fdce75d..4c708f2bf 100755
--- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
@@ -41,8 +41,6 @@ public class Haplotype {
protected final byte[] bases;
protected final double[] quals;
private GenomeLoc genomeLocation = null;
- private HashMap readLikelihoodsPerSample = null;
- private HashMap readCountsPerSample = null;
private HashMap eventMap = null;
private boolean isRef = false;
private Cigar cigar;
@@ -94,31 +92,6 @@ public class Haplotype {
return Arrays.hashCode(bases);
}
- public void addReadLikelihoods( final String sample, final double[] readLikelihoods, final int[] readCounts ) {
- if( readLikelihoodsPerSample == null ) {
- readLikelihoodsPerSample = new HashMap();
- }
- readLikelihoodsPerSample.put(sample, readLikelihoods);
- if( readCountsPerSample == null ) {
- readCountsPerSample = new HashMap();
- }
- readCountsPerSample.put(sample, readCounts);
- }
-
- @Ensures({"result != null"})
- public double[] getReadLikelihoods( final String sample ) {
- return readLikelihoodsPerSample.get(sample);
- }
-
- @Ensures({"result != null"})
- public int[] getReadCounts( final String sample ) {
- return readCountsPerSample.get(sample);
- }
-
- public Set getSampleKeySet() {
- return readLikelihoodsPerSample.keySet();
- }
-
public HashMap getEventMap() {
return eventMap;
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
index 1242e5b00..861f172d9 100755
--- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
@@ -9,12 +9,12 @@ import net.sf.samtools.SAMUtils;
* @author Kiran Garimella
*/
public class QualityUtils {
- public final static byte MAX_RECALIBRATED_Q_SCORE = 93;
+ public final static byte MAX_RECALIBRATED_Q_SCORE = SAMUtils.MAX_PHRED_SCORE;
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE);
public final static double MIN_REASONABLE_ERROR = 0.0001;
- public final static byte MAX_REASONABLE_Q_SCORE = 40;
+ public final static byte MAX_REASONABLE_Q_SCORE = 60; // quals above this value are extremely suspicious
public final static byte MIN_USABLE_Q_SCORE = 6;
public final static int MAPPING_QUALITY_UNAVAILABLE = 255;
diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java
new file mode 100644
index 000000000..00e491eb0
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java
@@ -0,0 +1,16 @@
+package org.broadinstitute.sting.utils.activeregion;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: thibault
+ * Date: 11/26/12
+ * Time: 2:35 PM
+ *
+ * Describes how a read relates to an assigned ActiveRegion
+ */
+public enum ActiveRegionReadState {
+ PRIMARY, // This is the read's primary region
+ NONPRIMARY, // This region overlaps the read, but it is not primary
+ EXTENDED, // This region would overlap the read if it were extended
+ UNMAPPED // This read is not mapped
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
index cf4d699ee..3966434c0 100644
--- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
+++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
@@ -406,10 +406,15 @@ public class BAQ {
// so BQi = Qi - BAQi + 64
byte[] bqTag = new byte[baq.length];
for ( int i = 0; i < bqTag.length; i++) {
- int bq = (int)read.getBaseQualities()[i] + 64;
- int baq_i = (int)baq[i];
- int tag = bq - baq_i;
- if ( tag < 0 ) throw new ReviewedStingException("BAQ tag calculation error. BAQ value above base quality at " + read);
+ final int bq = (int)read.getBaseQualities()[i] + 64;
+ final int baq_i = (int)baq[i];
+ final int tag = bq - baq_i;
+ // problem with the calculation of the correction factor; this is our problem
+ if ( tag < 0 )
+ throw new ReviewedStingException("BAQ tag calculation error. BAQ value above base quality at " + read);
+ // the original quality is too high, almost certainly due to using the wrong encoding in the BAM file
+ if ( tag > Byte.MAX_VALUE )
+ throw new UserException.MisencodedBAM(read, "we encountered an extremely high quality score (" + (int)read.getBaseQualities()[i] + ") with BAQ correction factor of " + baq_i);
bqTag[i] = (byte)tag;
}
return new String(bqTag);
diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
index a49a12292..523fd5a97 100755
--- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
+++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
@@ -30,6 +30,7 @@ import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
+import org.broadinstitute.sting.utils.help.HelpUtils;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@@ -239,6 +240,16 @@ public class UserException extends ReviewedStingException {
}
}
+ public static class MisencodedBAM extends UserException {
+ public MisencodedBAM(SAMRecord read, String message) {
+ this(read.getFileSource() != null ? read.getFileSource().getReader().toString() : "(none)", message);
+ }
+
+ public MisencodedBAM(String source, String message) {
+ super(String.format("SAM/BAM file %s appears to be using the wrong encoding for quality scores: %s; please see the GATK --help documentation for options related to this error", source, message));
+ }
+ }
+
public static class MalformedVCF extends UserException {
public MalformedVCF(String message, String line) {
super(String.format("The provided VCF file is malformed at line %s: %s", line, message));
@@ -267,7 +278,7 @@ public class UserException extends ReviewedStingException {
public static class ReadMissingReadGroup extends MalformedBAM {
public ReadMissingReadGroup(SAMRecord read) {
- super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));
+ super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use " + HelpUtils.forumPost("discussion/59/companion-utilities-replacereadgroups to fix this problem"), read.getReadName()));
}
}
@@ -343,7 +354,7 @@ public class UserException extends ReviewedStingException {
super(String.format("Lexicographically sorted human genome sequence detected in %s."
+ "\nFor safety's sake the GATK requires human contigs in karyotypic order: 1, 2, ..., 10, 11, ..., 20, 21, 22, X, Y with M either leading or trailing these contigs."
+ "\nThis is because all distributed GATK resources are sorted in karyotypic order, and your processing will fail when you need to use these files."
- + "\nYou can use the ReorderSam utility to fix this problem: http://www.broadinstitute.org/gsa/wiki/index.php/ReorderSam"
+ + "\nYou can use the ReorderSam utility to fix this problem: " + HelpUtils.forumPost("discussion/58/companion-utilities-reordersam")
+ "\n %s contigs = %s",
name, name, ReadUtils.prettyPrintSequenceRecords(dict)));
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java
index 22d249240..9bb0e646f 100644
--- a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java
+++ b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java
@@ -38,10 +38,10 @@ import java.util.*;
public abstract class PerReadAlleleLikelihoodMap {
- public static final double INDEL_LIKELIHOOD_THRESH = 0.1;
+ public static final double INFORMATIVE_LIKELIHOOD_THRESHOLD = 0.1;
protected List alleles;
- protected Map> likelihoodReadMap;
+ protected Map> likelihoodReadMap;
public abstract void performPerAlleleDownsampling(final double downsamplingFraction, final PrintStream log);
public abstract ReadBackedPileup createPerAlleleDownsampledBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log);
@@ -68,7 +68,7 @@ public abstract class PerReadAlleleLikelihoodMap {
}
public void add(PileupElement p, Allele a, Double likelihood) {
- add(p.getRead(),a,likelihood);
+ add(p.getRead(), a, likelihood);
}
public boolean containsPileupElement(PileupElement p) {
@@ -120,7 +120,7 @@ public abstract class PerReadAlleleLikelihoodMap {
prevMaxLike = el.getValue();
}
}
- return (maxLike - prevMaxLike > INDEL_LIKELIHOOD_THRESH ? mostLikelyAllele : Allele.NO_CALL );
+ return (maxLike - prevMaxLike > INFORMATIVE_LIKELIHOOD_THRESHOLD ? mostLikelyAllele : Allele.NO_CALL );
}
public static PerReadAlleleLikelihoodMap getBestAvailablePerReadAlleleLikelihoodMap() {
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java
index fe5f48a48..64238dc73 100644
--- a/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java
@@ -44,14 +44,13 @@ public class ForumAPIUtils {
/**
* How we post to the forum
*/
- private final static String API_URL = "https://gatkforums.broadinstitute.org/api/v1/";
final private static String ACCESS_TOKEN = "access_token=";
public static List getPostedTools(String forumKey) {
Gson gson = new Gson();
List output = new ArrayList();
- String text = httpGet(API_URL + "categories.json?CategoryIdentifier=tool-bulletin&page=1-100000&" + ACCESS_TOKEN + forumKey);
+ String text = httpGet(HelpUtils.GATK_FORUM_API_URL + "categories.json?CategoryIdentifier=tool-bulletin&page=1-100000&" + ACCESS_TOKEN + forumKey);
APIQuery details = gson.fromJson(text, APIQuery.class);
ForumDiscussion[] discussions = details.Discussions;
@@ -159,7 +158,7 @@ public class ForumAPIUtils {
Gson gson = new Gson();
String data = gson.toJson(post.getPostData());
- httpPost(data, API_URL + "post/discussion.json?" + ACCESS_TOKEN + forumKey);
+ httpPost(data, HelpUtils.GATK_FORUM_API_URL + "post/discussion.json?" + ACCESS_TOKEN + forumKey);
}
@@ -167,8 +166,7 @@ public class ForumAPIUtils {
class APIQuery {
ForumDiscussion[] Discussions;
- public APIQuery() {
- }
+ public APIQuery() {}
}
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java
index 4ec2ac6d7..21054a794 100644
--- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java
@@ -28,7 +28,7 @@ public class GATKDocUtils {
/**
* The URL root for RELEASED GATKDOC units
*/
- public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = "http://www.broadinstitute.org/gatk/gatkdocs/";
+ public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = HelpUtils.GATK_DOCS_URL;
/**
* The URL root for STABLE GATKDOC units
*/
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java
index 645ab34c1..930bbc996 100644
--- a/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java
@@ -32,6 +32,16 @@ import org.broadinstitute.sting.utils.classloader.JVMUtils;
import java.lang.reflect.Field;
public class HelpUtils {
+
+ public final static String BASE_GATK_URL = "http://www.broadinstitute.org/gatk";
+ public final static String GATK_DOCS_URL = BASE_GATK_URL + "/gatkdocs/";
+ public final static String GATK_FORUM_URL = "http://gatkforums.broadinstitute.org/";
+ public final static String GATK_FORUM_API_URL = "https://gatkforums.broadinstitute.org/api/v1/";
+
+ public static String forumPost(String post) {
+ return GATK_FORUM_URL + post;
+ }
+
protected static boolean assignableToClass(ProgramElementDoc classDoc, Class lhsClass, boolean requireConcrete) {
try {
Class type = getClassForDoc(classDoc);
diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java
index bd99a9266..0e0237412 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java
@@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.nanoScheduler;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.MultiThreadedErrorTracker;
-import org.broadinstitute.sting.utils.SimpleTimer;
import java.util.Iterator;
import java.util.concurrent.BlockingQueue;
@@ -19,11 +18,6 @@ class InputProducer implements Runnable {
*/
final Iterator inputReader;
- /**
- * Our timer (may be null) that we use to track our input costs
- */
- final SimpleTimer inputTimer;
-
/**
* Where we put our input values for consumption
*/
@@ -51,16 +45,13 @@ class InputProducer implements Runnable {
public InputProducer(final Iterator inputReader,
final MultiThreadedErrorTracker errorTracker,
- final SimpleTimer inputTimer,
final BlockingQueue outputQueue) {
if ( inputReader == null ) throw new IllegalArgumentException("inputReader cannot be null");
if ( errorTracker == null ) throw new IllegalArgumentException("errorTracker cannot be null");
- if ( inputTimer == null ) throw new IllegalArgumentException("inputTimer cannot be null");
if ( outputQueue == null ) throw new IllegalArgumentException("OutputQueue cannot be null");
this.inputReader = inputReader;
this.errorTracker = errorTracker;
- this.inputTimer = inputTimer;
this.outputQueue = outputQueue;
}
@@ -94,16 +85,15 @@ class InputProducer implements Runnable {
* @throws InterruptedException
*/
private synchronized InputType readNextItem() throws InterruptedException {
- inputTimer.restart();
if ( ! inputReader.hasNext() ) {
// we are done, mark ourselves as such and return null
readLastValue = true;
- inputTimer.stop();
return null;
} else {
// get the next value, and return it
final InputType input = inputReader.next();
- inputTimer.stop();
+ if ( input == null )
+ throw new IllegalStateException("inputReader.next() returned a null value, breaking our contract");
nRead++;
return input;
}
@@ -121,6 +111,9 @@ class InputProducer implements Runnable {
final InputType value = readNextItem();
if ( value == null ) {
+ if ( ! readLastValue )
+ throw new IllegalStateException("value == null but readLastValue is false!");
+
// add the EOF object so our consumer knows we are done in all inputs
// note that we do not increase inputID here, so that variable indicates the ID
// of the last real value read from the queue
@@ -133,8 +126,10 @@ class InputProducer implements Runnable {
}
latch.countDown();
- } catch (Exception ex) {
+ } catch (Throwable ex) {
errorTracker.notifyOfError(ex);
+ } finally {
+// logger.info("Exiting input thread readLastValue = " + readLastValue);
}
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSRuntimeProfile.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSRuntimeProfile.java
deleted file mode 100644
index 0926b4c50..000000000
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSRuntimeProfile.java
+++ /dev/null
@@ -1,67 +0,0 @@
-package org.broadinstitute.sting.utils.nanoScheduler;
-
-import org.apache.log4j.Logger;
-import org.broadinstitute.sting.utils.AutoFormattingTime;
-import org.broadinstitute.sting.utils.SimpleTimer;
-
-/**
- * Holds runtime profile (input, read, map) times as tracked by NanoScheduler
- *
- * User: depristo
- * Date: 9/10/12
- * Time: 8:31 PM
- */
-public class NSRuntimeProfile {
- final SimpleTimer outsideSchedulerTimer = new SimpleTimer("outside");
- final SimpleTimer inputTimer = new SimpleTimer("input");
- final SimpleTimer mapTimer = new SimpleTimer("map");
- final SimpleTimer reduceTimer = new SimpleTimer("reduce");
-
- /**
- * Combine the elapsed time information from other with this profile
- *
- * @param other a non-null profile
- */
- public void combine(final NSRuntimeProfile other) {
- outsideSchedulerTimer.addElapsed(other.outsideSchedulerTimer);
- inputTimer.addElapsed(other.inputTimer);
- mapTimer.addElapsed(other.mapTimer);
- reduceTimer.addElapsed(other.reduceTimer);
- }
-
- /**
- * Print the runtime profiling to logger
- *
- * @param logger
- */
- public void log(final Logger logger) {
- log1(logger, "Input time", inputTimer);
- log1(logger, "Map time", mapTimer);
- log1(logger, "Reduce time", reduceTimer);
- log1(logger, "Outside time", outsideSchedulerTimer);
- }
-
- /**
- * @return the total runtime for all functions of this nano scheduler
- */
- //@Ensures("result >= 0.0")
- public double totalRuntimeInSeconds() {
- return inputTimer.getElapsedTime()
- + mapTimer.getElapsedTime()
- + reduceTimer.getElapsedTime()
- + outsideSchedulerTimer.getElapsedTime();
- }
-
- /**
- * Print to logger.info timing information from timer, with name label
- *
- * @param label the name of the timer to display. Should be human readable
- * @param timer the timer whose elapsed time we will display
- */
- //@Requires({"label != null", "timer != null"})
- private void log1(final Logger logger, final String label, final SimpleTimer timer) {
- final double myTimeInSec = timer.getElapsedTime();
- final double myTimePercent = myTimeInSec / totalRuntimeInSeconds() * 100;
- logger.info(String.format("%s: %s (%5.2f%%)", label, new AutoFormattingTime(myTimeInSec), myTimePercent));
- }
-}
diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
index d83a23c0f..4cc91faa4 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
@@ -57,16 +57,6 @@ public class NanoScheduler {
boolean debug = false;
private NSProgressFunction progressFunction = null;
- /**
- * Tracks the combined runtime profiles across all created nano schedulers
- */
- final static private NSRuntimeProfile combinedNSRuntimeProfiler = new NSRuntimeProfile();
-
- /**
- * The profile specific to this nano scheduler
- */
- final private NSRuntimeProfile myNSRuntimeProfile = new NSRuntimeProfile();
-
/**
* Create a new nanoscheduler with the desire characteristics requested by the argument
*
@@ -94,9 +84,6 @@ public class NanoScheduler {
this.inputExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d"));
this.masterExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-master-thread-%d"));
}
-
- // start timing the time spent outside of the nanoScheduler
- myNSRuntimeProfile.outsideSchedulerTimer.start();
}
/**
@@ -123,11 +110,6 @@ public class NanoScheduler {
* After this call, execute cannot be invoked without throwing an error
*/
public void shutdown() {
- myNSRuntimeProfile.outsideSchedulerTimer.stop();
-
- // add my timing information to the combined NS runtime profile
- combinedNSRuntimeProfiler.combine(myNSRuntimeProfile);
-
if ( nThreads > 1 ) {
shutdownExecutor("inputExecutor", inputExecutor);
shutdownExecutor("mapExecutor", mapExecutor);
@@ -137,19 +119,6 @@ public class NanoScheduler {
shutdown = true;
}
- public void printRuntimeProfile() {
- myNSRuntimeProfile.log(logger);
- }
-
- public static void printCombinedRuntimeProfile() {
- if ( combinedNSRuntimeProfiler.totalRuntimeInSeconds() > 0.1 )
- combinedNSRuntimeProfiler.log(logger);
- }
-
- protected double getTotalRuntime() {
- return myNSRuntimeProfile.totalRuntimeInSeconds();
- }
-
/**
* Helper function to cleanly shutdown an execution service, checking that the execution
* state is clean when it's done.
@@ -245,8 +214,6 @@ public class NanoScheduler {
if ( map == null ) throw new IllegalArgumentException("map function cannot be null");
if ( reduce == null ) throw new IllegalArgumentException("reduce function cannot be null");
- myNSRuntimeProfile.outsideSchedulerTimer.stop();
-
ReduceType result;
if ( ALLOW_SINGLE_THREAD_FASTPATH && getnThreads() == 1 ) {
result = executeSingleThreaded(inputReader, map, initialValue, reduce);
@@ -254,7 +221,6 @@ public class NanoScheduler {
result = executeMultiThreaded(inputReader, map, initialValue, reduce);
}
- myNSRuntimeProfile.outsideSchedulerTimer.restart();
return result;
}
@@ -273,28 +239,19 @@ public class NanoScheduler {
while ( true ) {
// start timer to ensure that both hasNext and next are caught by the timer
- myNSRuntimeProfile.inputTimer.restart();
if ( ! inputReader.hasNext() ) {
- myNSRuntimeProfile.inputTimer.stop();
break;
} else {
final InputType input = inputReader.next();
- myNSRuntimeProfile.inputTimer.stop();
// map
- myNSRuntimeProfile.mapTimer.restart();
- final long preMapTime = LOG_MAP_TIMES ? 0 : myNSRuntimeProfile.mapTimer.currentTimeNano();
final MapType mapValue = map.apply(input);
- if ( LOG_MAP_TIMES ) logger.info("MAP TIME " + (myNSRuntimeProfile.mapTimer.currentTimeNano() - preMapTime));
- myNSRuntimeProfile.mapTimer.stop();
- if ( i++ % this.bufferSize == 0 && progressFunction != null )
+ if ( progressFunction != null )
progressFunction.progress(input);
// reduce
- myNSRuntimeProfile.reduceTimer.restart();
sum = reduce.apply(mapValue, sum);
- myNSRuntimeProfile.reduceTimer.stop();
}
}
@@ -320,6 +277,7 @@ public class NanoScheduler {
while ( true ) {
// check that no errors occurred while we were waiting
handleErrors();
+// checkForDeadlocks();
try {
final ReduceType result = reduceResult.get(100, TimeUnit.MILLISECONDS);
@@ -341,6 +299,26 @@ public class NanoScheduler {
}
}
+// private void checkForDeadlocks() {
+// if ( deadLockCheckCounter++ % 100 == 0 ) {
+// logger.info("Checking for deadlocks...");
+// final ThreadMXBean bean = ManagementFactory.getThreadMXBean();
+// final long[] threadIds = bean.findDeadlockedThreads(); // Returns null if no threads are deadlocked.
+//
+// if (threadIds != null) {
+// final ThreadInfo[] infos = bean.getThreadInfo(threadIds);
+//
+// logger.error("!!! Deadlock detected !!!!");
+// for (final ThreadInfo info : infos) {
+// logger.error("Thread " + info);
+// for ( final StackTraceElement elt : info.getStackTrace() ) {
+// logger.error("\t" + elt.toString());
+// }
+// }
+// }
+// }
+// }
+
private void handleErrors() {
if ( errorTracker.hasAnErrorOccurred() ) {
masterExecutor.shutdownNow();
@@ -380,7 +358,7 @@ public class NanoScheduler {
// Create the input producer and start it running
final InputProducer inputProducer =
- new InputProducer(inputReader, errorTracker, myNSRuntimeProfile.inputTimer, inputQueue);
+ new InputProducer(inputReader, errorTracker, inputQueue);
inputExecutor.submit(inputProducer);
// a priority queue that stores up to bufferSize elements
@@ -389,7 +367,7 @@ public class NanoScheduler {
new PriorityBlockingQueue>();
final Reducer reducer
- = new Reducer(reduce, errorTracker, myNSRuntimeProfile.reduceTimer, initialValue);
+ = new Reducer(reduce, errorTracker, initialValue);
try {
int nSubmittedJobs = 0;
@@ -408,7 +386,8 @@ public class NanoScheduler {
// wait for all of the input and map threads to finish
return waitForCompletion(inputProducer, reducer);
- } catch (Exception ex) {
+ } catch (Throwable ex) {
+// logger.warn("Reduce job got exception " + ex);
errorTracker.notifyOfError(ex);
return initialValue;
}
@@ -486,16 +465,12 @@ public class NanoScheduler {
final InputType input = inputWrapper.getValue();
// map
- myNSRuntimeProfile.mapTimer.restart();
- final long preMapTime = LOG_MAP_TIMES ? 0 : myNSRuntimeProfile.mapTimer.currentTimeNano();
final MapType mapValue = map.apply(input);
- if ( LOG_MAP_TIMES ) logger.info("MAP TIME " + (myNSRuntimeProfile.mapTimer.currentTimeNano() - preMapTime));
- myNSRuntimeProfile.mapTimer.stop();
// enqueue the result into the mapResultQueue
result = new MapResult(mapValue, jobID);
- if ( jobID % bufferSize == 0 && progressFunction != null )
+ if ( progressFunction != null )
progressFunction.progress(input);
} else {
// push back the EOF marker so other waiting threads can read it
@@ -508,7 +483,8 @@ public class NanoScheduler {
mapResultQueue.put(result);
final int nReduced = reducer.reduceAsMuchAsPossible(mapResultQueue);
- } catch (Exception ex) {
+ } catch (Throwable ex) {
+// logger.warn("Map job got exception " + ex);
errorTracker.notifyOfError(ex);
} finally {
// we finished a map job, release the job queue semaphore
diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/Reducer.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/Reducer.java
index 92c1018eb..5cae28187 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/Reducer.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/Reducer.java
@@ -4,7 +4,6 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.MultiThreadedErrorTracker;
-import org.broadinstitute.sting.utils.SimpleTimer;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.PriorityBlockingQueue;
@@ -34,7 +33,6 @@ class Reducer {
final CountDownLatch countDownLatch = new CountDownLatch(1);
final NSReduceFunction reduce;
- final SimpleTimer reduceTimer;
final MultiThreadedErrorTracker errorTracker;
/**
@@ -61,20 +59,16 @@ class Reducer {
* reduceTimer
*
* @param reduce the reduce function to apply
- * @param reduceTimer the timer to time the reduce function call
* @param initialSum the initial reduce sum
*/
public Reducer(final NSReduceFunction reduce,
final MultiThreadedErrorTracker errorTracker,
- final SimpleTimer reduceTimer,
final ReduceType initialSum) {
if ( errorTracker == null ) throw new IllegalArgumentException("Error tracker cannot be null");
if ( reduce == null ) throw new IllegalArgumentException("Reduce function cannot be null");
- if ( reduceTimer == null ) throw new IllegalArgumentException("reduceTimer cannot be null");
this.errorTracker = errorTracker;
this.reduce = reduce;
- this.reduceTimer = reduceTimer;
this.sum = initialSum;
}
@@ -125,10 +119,7 @@ class Reducer {
nReducesNow++;
// apply reduce, keeping track of sum
- reduceTimer.restart();
sum = reduce.apply(result.getValue(), sum);
- reduceTimer.stop();
-
}
numJobsReduced++;
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
index 42938d2a6..ff274499b 100644
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
@@ -652,23 +652,19 @@ public abstract class AbstractReadBackedPileup tracker = (PerSamplePileupElementTracker) pileupElementTracker;
PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker();
- int current = 0;
for (final String sample : tracker.getSamples()) {
PileupElementTracker perSampleElements = tracker.getElements(sample);
- List filteredPileup = new ArrayList();
- for (PileupElement p : perSampleElements) {
+ int current = 0;
+ UnifiedPileupElementTracker filteredPileup = new UnifiedPileupElementTracker();
+ for (PE p : perSampleElements) {
if (positions.contains(current))
filteredPileup.add(p);
- }
+ current++;
- if (!filteredPileup.isEmpty()) {
- AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements);
- filteredTracker.addElements(sample, pileup.pileupElementTracker);
}
-
- current++;
+ filteredTracker.addElements(sample, filteredPileup);
}
return (RBP) createNewPileup(loc, filteredTracker);
@@ -1058,6 +1054,11 @@ public abstract class AbstractReadBackedPileup toFragments() {
return FragmentUtils.create(this);
}
+
+ @Override
+ public ReadBackedPileup copy() {
+ return new ReadBackedPileupImpl(loc, (PileupElementTracker) pileupElementTracker.copy());
+ }
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java
index 09b907e00..6eecaf402 100644
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java
@@ -34,11 +34,20 @@ import java.util.*;
*/
abstract class PileupElementTracker implements Iterable {
public abstract int size();
+ public abstract PileupElementTracker copy();
}
class UnifiedPileupElementTracker extends PileupElementTracker {
private final List pileup;
+ @Override
+ public UnifiedPileupElementTracker copy() {
+ UnifiedPileupElementTracker result = new UnifiedPileupElementTracker();
+ for(PE element : pileup)
+ result.add(element);
+ return result;
+ }
+
public UnifiedPileupElementTracker() { pileup = new LinkedList(); }
public UnifiedPileupElementTracker(List pileup) { this.pileup = pileup; }
@@ -65,6 +74,14 @@ class PerSamplePileupElementTracker extends PileupElem
pileup = new HashMap>();
}
+ public PerSamplePileupElementTracker copy() {
+ PerSamplePileupElementTracker result = new PerSamplePileupElementTracker();
+ for (Map.Entry> entry : pileup.entrySet())
+ result.addElements(entry.getKey(), entry.getValue());
+
+ return result;
+ }
+
/**
* Gets a list of all the samples stored in this pileup.
* @return List of samples in this pileup.
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java
index be61bad99..b9e9b9a52 100644
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java
@@ -283,4 +283,12 @@ public interface ReadBackedPileup extends Iterable, HasGenomeLoca
* @return
*/
public FragmentCollection toFragments();
+
+ /**
+ * Creates a full copy (not shallow) of the ReadBacked Pileup
+ *
+ * @return
+ */
+ public ReadBackedPileup copy();
+
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java
index a8715e242..b69283b9d 100755
--- a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java
+++ b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java
@@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.progressmeter;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
+import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
@@ -143,6 +144,12 @@ public class ProgressMeter {
/** We use the SimpleTimer to time our run */
private final SimpleTimer timer = new SimpleTimer();
+ private GenomeLoc maxGenomeLoc = null;
+ private String positionMessage = "starting";
+ private long nTotalRecordsProcessed = 0;
+
+ final ProgressMeterDaemon progressMeterDaemon;
+
/**
* Create a new ProgressMeter
*
@@ -177,21 +184,15 @@ public class ProgressMeter {
targetSizeInBP = processingIntervals.coveredSize();
// start up the timer
+ progressMeterDaemon = new ProgressMeterDaemon(this);
start();
}
/**
- * Forward request to notifyOfProgress
- *
- * Assumes that one cycle has been completed
- *
- * @param loc our current location. Null means "in unmapped reads"
- * @param nTotalRecordsProcessed the total number of records we've processed
+ * Start up the progress meter, printing initialization message and starting up the
+ * daemon thread for periodic printing.
*/
- public void notifyOfProgress(final GenomeLoc loc, final long nTotalRecordsProcessed) {
- notifyOfProgress(loc, false, nTotalRecordsProcessed);
- }
-
+ @Requires("progressMeterDaemon != null")
private synchronized void start() {
timer.start();
lastProgressPrintTime = timer.currentTime();
@@ -199,6 +200,8 @@ public class ProgressMeter {
logger.info("[INITIALIZATION COMPLETE; STARTING PROCESSING]");
logger.info(String.format("%15s processed.%s runtime per.1M.%s completed total.runtime remaining",
"Location", processingUnitName, processingUnitName));
+
+ progressMeterDaemon.start();
}
/**
@@ -216,19 +219,41 @@ public class ProgressMeter {
* Synchronized to ensure that even with multiple threads calling notifyOfProgress we still
* get one clean stream of meter logs.
*
+ * Note this thread doesn't actually print progress, unless must print is true, but just registers
+ * the progress itself. A separate printing daemon periodically polls the meter to print out
+ * progress
+ *
* @param loc Current location, can be null if you are at the end of the processing unit
- * @param mustPrint If true, will print out info, regardless of time interval
* @param nTotalRecordsProcessed the total number of records we've processed
*/
- private synchronized void notifyOfProgress(final GenomeLoc loc, boolean mustPrint, final long nTotalRecordsProcessed) {
+ public synchronized void notifyOfProgress(final GenomeLoc loc, final long nTotalRecordsProcessed) {
if ( nTotalRecordsProcessed < 0 ) throw new IllegalArgumentException("nTotalRecordsProcessed must be >= 0");
+ // weird comparison to ensure that loc == null (in unmapped reads) is keep before maxGenomeLoc == null (on startup)
+ this.maxGenomeLoc = loc == null ? loc : (maxGenomeLoc == null ? loc : loc.max(maxGenomeLoc));
+ this.nTotalRecordsProcessed = Math.max(this.nTotalRecordsProcessed, nTotalRecordsProcessed);
+
+ // a pretty name for our position
+ this.positionMessage = maxGenomeLoc == null
+ ? "unmapped reads"
+ : String.format("%s:%d", maxGenomeLoc.getContig(), maxGenomeLoc.getStart());
+ }
+
+ /**
+ * Actually try to print out progress
+ *
+ * This function may print out if the progress print is due, but if not enough time has elapsed
+ * since the last print we will not print out information.
+ *
+ * @param mustPrint if true, progress will be printed regardless of the last time we printed progress
+ */
+ protected synchronized void printProgress(final boolean mustPrint) {
final long curTime = timer.currentTime();
final boolean printProgress = mustPrint || maxElapsedIntervalForPrinting(curTime, lastProgressPrintTime, progressPrintFrequency);
final boolean printLog = performanceLog != null && maxElapsedIntervalForPrinting(curTime, lastPerformanceLogPrintTime, PERFORMANCE_LOG_PRINT_FREQUENCY);
if ( printProgress || printLog ) {
- final ProgressMeterData progressData = takeProgressSnapshot(loc, nTotalRecordsProcessed);
+ final ProgressMeterData progressData = takeProgressSnapshot(maxGenomeLoc, nTotalRecordsProcessed);
final AutoFormattingTime elapsed = new AutoFormattingTime(progressData.getElapsedSeconds());
final AutoFormattingTime bpRate = new AutoFormattingTime(progressData.secondsPerMillionBP());
@@ -241,13 +266,8 @@ public class ProgressMeter {
lastProgressPrintTime = curTime;
updateLoggerPrintFrequency(estTotalRuntime.getTimeInSeconds());
- // a pretty name for our position
- final String posName = loc == null
- ? (mustPrint ? "done" : "unmapped reads")
- : String.format("%s:%d", loc.getContig(), loc.getStart());
-
logger.info(String.format("%15s %5.2e %s %s %5.1f%% %s %s",
- posName, progressData.getUnitsProcessed()*1.0, elapsed, unitRate,
+ positionMessage, progressData.getUnitsProcessed()*1.0, elapsed, unitRate,
100*fractionGenomeTargetCompleted, estTotalRuntime, timeToCompletion));
}
@@ -296,13 +316,18 @@ public class ProgressMeter {
*/
public void notifyDone(final long nTotalRecordsProcessed) {
// print out the progress meter
- notifyOfProgress(null, true, nTotalRecordsProcessed);
+ this.nTotalRecordsProcessed = nTotalRecordsProcessed;
+ this.positionMessage = "done";
+ printProgress(true);
logger.info(String.format("Total runtime %.2f secs, %.2f min, %.2f hours",
timer.getElapsedTime(), timer.getElapsedTime() / 60, timer.getElapsedTime() / 3600));
if ( performanceLog != null )
performanceLog.close();
+
+ // shutdown our daemon thread
+ progressMeterDaemon.done();
}
/**
diff --git a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemon.java b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemon.java
new file mode 100644
index 000000000..16887400a
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemon.java
@@ -0,0 +1,60 @@
+package org.broadinstitute.sting.utils.progressmeter;
+
+/**
+ * Daemon thread that periodically prints the progress of the progress meter
+ *
+ * User: depristo
+ * Date: 12/4/12
+ * Time: 9:16 PM
+ */
+public final class ProgressMeterDaemon extends Thread {
+ /**
+ * How frequently should we poll and print progress?
+ */
+ private final static long POLL_FREQUENCY_MILLISECONDS = 10 * 1000;
+
+ /**
+ * Are we to continue periodically printing status, or should we shut down?
+ */
+ boolean done = false;
+
+ /**
+ * The meter we will call print on
+ */
+ final ProgressMeter meter;
+
+ /**
+ * Create a new ProgressMeterDaemon printing progress for meter
+ * @param meter the progress meter to print progress of
+ */
+ public ProgressMeterDaemon(final ProgressMeter meter) {
+ if ( meter == null ) throw new IllegalArgumentException("meter cannot be null");
+ this.meter = meter;
+ setDaemon(true);
+ setName("ProgressMeterDaemon");
+ }
+
+ /**
+ * Tells this daemon thread to shutdown at the next opportunity, as the progress
+ * metering is complete.
+ */
+ public final void done() {
+ this.done = true;
+ }
+
+ /**
+ * Start up the ProgressMeterDaemon, polling every tens of seconds to print, if
+ * necessary, the provided progress meter. Never exits until the JVM is complete,
+ * or done() is called, as the thread is a daemon thread
+ */
+ public void run() {
+ while (! done) {
+ meter.printProgress(false);
+ try {
+ Thread.sleep(POLL_FREQUENCY_MILLISECONDS);
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java
new file mode 100644
index 000000000..d4e4ab65e
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java
@@ -0,0 +1,64 @@
+package org.broadinstitute.sting.utils.recalibration.covariates;
+
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.TandemRepeat;
+import org.broadinstitute.sting.utils.QualityUtils;
+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
+
+import java.util.Arrays;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 11/3/12
+ */
+
+public class RepeatLengthCovariate implements ExperimentalCovariate {
+ final int MAX_REPEAT_LENGTH = 20;
+
+ // Initialize any member variables using the command-line arguments passed to the walkers
+ @Override
+ public void initialize(final RecalibrationArgumentCollection RAC) {}
+
+ @Override
+ public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
+ byte[] readBytes = read.getReadBases();
+ for (int i = 0; i < readBytes.length; i++) {
+ int maxRL = 0;
+ for (int str = 1; str <= 8; str++) {
+ if (i + str <= readBytes.length) {
+ maxRL = Math.max(maxRL, VariantContextUtils.findNumberofRepetitions(
+ Arrays.copyOfRange(readBytes,i,i + str),
+ Arrays.copyOfRange(readBytes,i,readBytes.length)
+ ));
+ }
+ }
+ if(maxRL > MAX_REPEAT_LENGTH) { maxRL = MAX_REPEAT_LENGTH; }
+ values.addCovariate(maxRL, maxRL, maxRL, i);
+ }
+ }
+
+ // Used to get the covariate's value from input csv file during on-the-fly recalibration
+ @Override
+ public final Object getValue(final String str) {
+ return Byte.parseByte(str);
+ }
+
+ @Override
+ public String formatKey(final int key) {
+ return String.format("%d", key);
+ }
+
+ @Override
+ public int keyFromValue(final Object value) {
+ return (value instanceof String) ? Integer.parseInt((String) value) : (Integer) value;
+ }
+
+ @Override
+ public int maximumKeyValue() {
+ return MAX_REPEAT_LENGTH + 1;
+ }
+
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
index 9fdb48b34..6c7a162f8 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
@@ -397,6 +397,9 @@ public class GATKSAMRecord extends BAMRecord {
else if (op != CigarOperator.HARD_CLIP)
break;
}
+
+ if ( softStart < 1 )
+ softStart = 1;
}
return softStart;
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityReadTransformer.java b/public/java/src/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityReadTransformer.java
new file mode 100644
index 000000000..cac51239a
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityReadTransformer.java
@@ -0,0 +1,67 @@
+package org.broadinstitute.sting.utils.sam;
+
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
+import org.broadinstitute.sting.gatk.walkers.Walker;
+import org.broadinstitute.sting.utils.QualityUtils;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+
+/**
+ * Checks for and errors out (or fixes if requested) when it detects reads with base qualities that are not encoded with
+ * phred-scaled quality scores. Q0 == ASCII 33 according to the SAM specification, whereas Illumina encoding starts at
+ * Q64. The idea here is simple: if we are asked to fix the scores then we just subtract 31 from every quality score.
+ * Otherwise, we randomly sample reads (for efficiency) and error out if we encounter a qual that's too high.
+ */
+public class MisencodedBaseQualityReadTransformer extends ReadTransformer {
+
+ private static final int samplingFrequency = 1000; // sample 1 read for every 1000 encountered
+ private static final int encodingFixValue = 31; // Illumina_64 - PHRED_33
+
+ private boolean disabled;
+ private boolean fixQuals;
+ private static int currentReadCounter = 0;
+
+ @Override
+ public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) {
+ fixQuals = engine.getArguments().FIX_MISENCODED_QUALS;
+ disabled = !fixQuals && engine.getArguments().ALLOW_POTENTIALLY_MISENCODED_QUALS;
+
+ return ReadTransformer.ApplicationTime.ON_INPUT;
+ }
+
+ @Override
+ public boolean enabled() {
+ return !disabled;
+ }
+
+ @Override
+ public GATKSAMRecord apply(final GATKSAMRecord read) {
+ if ( fixQuals )
+ return fixMisencodedQuals(read);
+
+ checkForMisencodedQuals(read);
+ return read;
+ }
+
+ protected static GATKSAMRecord fixMisencodedQuals(final GATKSAMRecord read) {
+ final byte[] quals = read.getBaseQualities();
+ for ( int i = 0; i < quals.length; i++ ) {
+ quals[i] -= encodingFixValue;
+ }
+ read.setBaseQualities(quals);
+ return read;
+ }
+
+ protected static void checkForMisencodedQuals(final GATKSAMRecord read) {
+ // sample reads randomly for checking
+ if ( ++currentReadCounter >= samplingFrequency ) {
+ currentReadCounter = 0;
+
+ final byte[] quals = read.getBaseQualities();
+ for ( final byte qual : quals ) {
+ if ( qual > QualityUtils.MAX_REASONABLE_Q_SCORE )
+ throw new UserException.MisencodedBAM(read, "we encountered an extremely high quality score of " + (int)qual);
+ }
+ }
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
index 1f1867f75..b3e3cf8df 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
@@ -1267,7 +1267,7 @@ public class VariantContextUtils {
* @param testString String to test
* @return Number of repetitions (0 if testString is not a concatenation of n repeatUnit's
*/
- protected static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) {
+ public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) {
int numRepeats = 0;
for (int start = 0; start < testString.length; start += repeatUnit.length) {
int end = start + repeatUnit.length;
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriterBase.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriterBase.java
index 18d91ef3f..1f3cdd0fe 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriterBase.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriterBase.java
@@ -71,7 +71,6 @@ abstract class SortingVariantContextWriterBase implements VariantContextWriter {
this.takeOwnershipOfInner = takeOwnershipOfInner;
// has to be PriorityBlockingQueue to be thread-safe
- // see http://getsatisfaction.com/gsa/topics/missing_loci_output_in_multi_thread_mode_when_implement_sortingvcfwriterbase?utm_content=topic_link&utm_medium=email&utm_source=new_topic
this.queue = new PriorityBlockingQueue(50, new VariantContextComparator());
this.mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
index e4c7b2db0..a65b0cb45 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
@@ -1,6 +1,8 @@
package org.broadinstitute.sting.gatk.traversals;
+import com.google.java.contract.PreconditionError;
import net.sf.samtools.*;
+import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@@ -23,6 +25,7 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@@ -45,6 +48,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
private class DummyActiveRegionWalker extends ActiveRegionWalker {
private final double prob;
+ private EnumSet states = super.desiredReadStates();
+
protected List isActiveCalls = new ArrayList();
protected Map mappedActiveRegions = new HashMap();
@@ -52,6 +57,20 @@ public class TraverseActiveRegionsTest extends BaseTest {
this.prob = 1.0;
}
+ public DummyActiveRegionWalker(double constProb) {
+ this.prob = constProb;
+ }
+
+ public DummyActiveRegionWalker(EnumSet wantStates) {
+ this.prob = 1.0;
+ this.states = wantStates;
+ }
+
+ @Override
+ public EnumSet desiredReadStates() {
+ return states;
+ }
+
@Override
public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
isActiveCalls.add(ref.getLocus());
@@ -82,7 +101,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
private GenomeLocParser genomeLocParser;
private List intervals;
- private List reads;
+ private List reads;
@BeforeClass
private void init() throws FileNotFoundException {
@@ -96,21 +115,21 @@ public class TraverseActiveRegionsTest extends BaseTest {
intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000));
- // TODO: this fails!
- //intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 20000));
+ intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 10100));
intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
- reads = new ArrayList();
+ reads = new ArrayList();
reads.add(buildSAMRecord("simple", "1", 100, 200));
reads.add(buildSAMRecord("overlap_equal", "1", 10, 20));
reads.add(buildSAMRecord("overlap_unequal", "1", 10, 21));
reads.add(buildSAMRecord("boundary_equal", "1", 1990, 2009));
- reads.add(buildSAMRecord("boundary_unequal", "1", 1995, 2050));
- reads.add(buildSAMRecord("extended_only", "1", 3000, 3100));
+ reads.add(buildSAMRecord("boundary_unequal", "1", 1990, 2008));
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
- // TODO
- //reads.add(buildSAMRecord("simple20", "20", 10100, 10150));
+ reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
+
+ // required by LocusIteratorByState, and I prefer to list them in test case order above
+ ReadUtils.sortReadsByCoordinate(reads);
}
@Test
@@ -134,6 +153,18 @@ public class TraverseActiveRegionsTest extends BaseTest {
return activeIntervals;
}
+ @Test (expectedExceptions = PreconditionError.class)
+ public void testIsActiveRangeLow () {
+ DummyActiveRegionWalker walker = new DummyActiveRegionWalker(-0.1);
+ getActiveRegions(walker, intervals).values();
+ }
+
+ @Test (expectedExceptions = PreconditionError.class)
+ public void testIsActiveRangeHigh () {
+ DummyActiveRegionWalker walker = new DummyActiveRegionWalker(1.1);
+ getActiveRegions(walker, intervals).values();
+ }
+
@Test
public void testActiveRegionCoverage() {
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
@@ -187,80 +218,210 @@ public class TraverseActiveRegionsTest extends BaseTest {
}
@Test
- public void testReadMapping() {
+ public void testPrimaryReadMapping() {
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
// Contract: Each read has the Primary state in a single region (or none)
// This is the region of maximum overlap for the read (earlier if tied)
+ // simple: Primary in 1:1-999
+ // overlap_equal: Primary in 1:1-999
+ // overlap_unequal: Primary in 1:1-999
+ // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
+ // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
+ // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
+ // outside_intervals: none
+ // simple20: Primary in 20:10000-10100
+
+ Map activeRegions = getActiveRegions(walker, intervals);
+ ActiveRegion region;
+
+ region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
+
+ getRead(region, "simple");
+ getRead(region, "overlap_equal");
+ getRead(region, "overlap_unequal");
+ verifyReadNotPlaced(region, "boundary_equal");
+ verifyReadNotPlaced(region, "boundary_unequal");
+ verifyReadNotPlaced(region, "extended_and_np");
+ verifyReadNotPlaced(region, "outside_intervals");
+ verifyReadNotPlaced(region, "simple20");
+
+ region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
+
+ verifyReadNotPlaced(region, "simple");
+ verifyReadNotPlaced(region, "overlap_equal");
+ verifyReadNotPlaced(region, "overlap_unequal");
+ verifyReadNotPlaced(region, "boundary_equal");
+ getRead(region, "boundary_unequal");
+ getRead(region, "extended_and_np");
+ verifyReadNotPlaced(region, "outside_intervals");
+ verifyReadNotPlaced(region, "simple20");
+
+ region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
+
+ verifyReadNotPlaced(region, "simple");
+ verifyReadNotPlaced(region, "overlap_equal");
+ verifyReadNotPlaced(region, "overlap_unequal");
+ getRead(region, "boundary_equal");
+ verifyReadNotPlaced(region, "boundary_unequal");
+ verifyReadNotPlaced(region, "extended_and_np");
+ verifyReadNotPlaced(region, "outside_intervals");
+ verifyReadNotPlaced(region, "simple20");
+
+ region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
+
+ verifyReadNotPlaced(region, "simple");
+ verifyReadNotPlaced(region, "overlap_equal");
+ verifyReadNotPlaced(region, "overlap_unequal");
+ verifyReadNotPlaced(region, "boundary_equal");
+ verifyReadNotPlaced(region, "boundary_unequal");
+ verifyReadNotPlaced(region, "extended_and_np");
+ verifyReadNotPlaced(region, "outside_intervals");
+ getRead(region, "simple20");
+
+ // TODO: more tests and edge cases
+ }
+
+ @Test
+ public void testNonPrimaryReadMapping() {
+ DummyActiveRegionWalker walker = new DummyActiveRegionWalker(
+ EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY));
+
+ // Contract: Each read has the Primary state in a single region (or none)
+ // This is the region of maximum overlap for the read (earlier if tied)
+
+ // Contract: Each read has the Non-Primary state in all other regions it overlaps
+
+ // simple: Primary in 1:1-999
+ // overlap_equal: Primary in 1:1-999
+ // overlap_unequal: Primary in 1:1-999
+ // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
+ // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
+ // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
+ // outside_intervals: none
+ // simple20: Primary in 20:10000-10100
+
+ Map activeRegions = getActiveRegions(walker, intervals);
+ ActiveRegion region;
+
+ region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
+
+ getRead(region, "simple");
+ getRead(region, "overlap_equal");
+ getRead(region, "overlap_unequal");
+ verifyReadNotPlaced(region, "boundary_equal");
+ verifyReadNotPlaced(region, "boundary_unequal");
+ getRead(region, "extended_and_np");
+ verifyReadNotPlaced(region, "outside_intervals");
+ verifyReadNotPlaced(region, "simple20");
+
+ region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
+
+ verifyReadNotPlaced(region, "simple");
+ verifyReadNotPlaced(region, "overlap_equal");
+ verifyReadNotPlaced(region, "overlap_unequal");
+ getRead(region, "boundary_equal");
+ getRead(region, "boundary_unequal");
+ getRead(region, "extended_and_np");
+ verifyReadNotPlaced(region, "outside_intervals");
+ verifyReadNotPlaced(region, "simple20");
+
+ region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
+
+ verifyReadNotPlaced(region, "simple");
+ verifyReadNotPlaced(region, "overlap_equal");
+ verifyReadNotPlaced(region, "overlap_unequal");
+ getRead(region, "boundary_equal");
+ getRead(region, "boundary_unequal");
+ verifyReadNotPlaced(region, "extended_and_np");
+ verifyReadNotPlaced(region, "outside_intervals");
+ verifyReadNotPlaced(region, "simple20");
+
+ region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
+
+ verifyReadNotPlaced(region, "simple");
+ verifyReadNotPlaced(region, "overlap_equal");
+ verifyReadNotPlaced(region, "overlap_unequal");
+ verifyReadNotPlaced(region, "boundary_equal");
+ verifyReadNotPlaced(region, "boundary_unequal");
+ verifyReadNotPlaced(region, "extended_and_np");
+ verifyReadNotPlaced(region, "outside_intervals");
+ getRead(region, "simple20");
+
+ // TODO: more tests and edge cases
+ }
+
+ @Test
+ public void testExtendedReadMapping() {
+ DummyActiveRegionWalker walker = new DummyActiveRegionWalker(
+ EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY, ActiveRegionReadState.EXTENDED));
+
+ // Contract: Each read has the Primary state in a single region (or none)
+ // This is the region of maximum overlap for the read (earlier if tied)
+
// Contract: Each read has the Non-Primary state in all other regions it overlaps
// Contract: Each read has the Extended state in regions where it only overlaps if the region is extended
// simple: Primary in 1:1-999
// overlap_equal: Primary in 1:1-999
// overlap_unequal: Primary in 1:1-999
- // boundary_equal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
- // boundary_unequal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
- // extended_only: Extended in 1:2000-2999
+ // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
+ // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
-
- // TODO
- // simple20: Primary in 20:10000-20000
+ // simple20: Primary in 20:10000-10100
Map activeRegions = getActiveRegions(walker, intervals);
ActiveRegion region;
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
- verifyReadPrimary(region, "simple");
- verifyReadPrimary(region, "overlap_equal");
- verifyReadPrimary(region, "overlap_unequal");
+ getRead(region, "simple");
+ getRead(region, "overlap_equal");
+ getRead(region, "overlap_unequal");
verifyReadNotPlaced(region, "boundary_equal");
verifyReadNotPlaced(region, "boundary_unequal");
- verifyReadNotPlaced(region, "extended_only");
- // TODO: fail verifyReadNonPrimary(region, "extended_and_np");
+ getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
+ verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
- // TODO: fail verifyReadPrimary(region, "boundary_equal");
- // TODO: fail verifyReadNonPrimary(region, "boundary_unequal");
- verifyReadNotPlaced(region, "extended_only");
- // TODO: fail verifyReadPrimary(region, "extended_and_np");
+ getRead(region, "boundary_equal");
+ getRead(region, "boundary_unequal");
+ getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
+ verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
- // TODO: fail verifyReadNonPrimary(region, "boundary_equal");
- verifyReadPrimary(region, "boundary_unequal");
- // TODO: fail verifyReadExtended(region, "extended_only");
- // TODO: fail verifyReadExtended(region, "extended_and_np");
+ getRead(region, "boundary_equal");
+ getRead(region, "boundary_unequal");
+ getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
+ verifyReadNotPlaced(region, "simple20");
+
+ region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
+
+ verifyReadNotPlaced(region, "simple");
+ verifyReadNotPlaced(region, "overlap_equal");
+ verifyReadNotPlaced(region, "overlap_unequal");
+ verifyReadNotPlaced(region, "boundary_equal");
+ verifyReadNotPlaced(region, "boundary_unequal");
+ verifyReadNotPlaced(region, "extended_and_np");
+ verifyReadNotPlaced(region, "outside_intervals");
+ getRead(region, "simple20");
// TODO: more tests and edge cases
}
- private void verifyReadPrimary(ActiveRegion region, String readName) {
- SAMRecord read = getRead(region, readName);
- Assert.assertFalse(read.getNotPrimaryAlignmentFlag(), "Read " + read + " not primary in active region " + region);
- }
-
- private void verifyReadNonPrimary(ActiveRegion region, String readName) {
- SAMRecord read = getRead(region, readName);
- Assert.assertTrue(read.getNotPrimaryAlignmentFlag(), "Read " + read + " primary in active region " + region);
- }
-
- private void verifyReadExtended(ActiveRegion region, String readName) {
- Assert.fail("The Extended read state has not been implemented");
- }
-
private void verifyReadNotPlaced(ActiveRegion region, String readName) {
for (SAMRecord read : region.getReads()) {
if (read.getReadName().equals(readName))
@@ -274,7 +435,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
return read;
}
- Assert.fail("Read " + readName + " not found in active region " + region);
+ Assert.fail("Read " + readName + " not assigned to active region " + region);
return null;
}
@@ -282,6 +443,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
for (LocusShardDataProvider dataProvider : createDataProviders(intervals))
t.traverse(walker, dataProvider, 0);
+ t.endTraversal(walker, 0);
+
return walker.mappedActiveRegions;
}
@@ -345,7 +508,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
engine.setGenomeLocParser(genomeLocParser);
t.initialize(engine);
- StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(reads);
+ StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList