Input
*
diff --git a/public/java/src/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSession.java b/public/java/src/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSession.java
index 480113e1e..830c6590d 100644
--- a/public/java/src/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSession.java
+++ b/public/java/src/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSession.java
@@ -210,13 +210,23 @@ public class JnaSession implements Session {
}
public static void setAttribute(Pointer jt, String name, String value) throws DrmaaException {
- checkError(LibDrmaa.drmaa_set_attribute(jt, name, value, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
+ if (getAttrNames().contains(name)) {
+ checkError(LibDrmaa.drmaa_set_attribute(jt, name, value, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
+ }
+ else {
+ throw new InvalidAttributeValueException("Attribute " + name + " is not supported by this implementation of DRMAA");
+ }
}
public static String getAttribute(Pointer jt, String name) throws DrmaaException {
- Memory attrBuffer = new Memory(LibDrmaa.DRMAA_ATTR_BUFFER);
- checkError(LibDrmaa.drmaa_get_attribute(jt, name, attrBuffer, LibDrmaa.DRMAA_ATTR_BUFFER_LEN, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
- return attrBuffer.getString(0);
+ if (getAttrNames().contains(name)) {
+ Memory attrBuffer = new Memory(LibDrmaa.DRMAA_ATTR_BUFFER);
+ checkError(LibDrmaa.drmaa_get_attribute(jt, name, attrBuffer, LibDrmaa.DRMAA_ATTR_BUFFER_LEN, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
+ return attrBuffer.getString(0);
+ }
+ else {
+ throw new InvalidAttributeValueException("Attribute " + name + " is not supported by this implementation of DRMAA");
+ }
}
public static void setVectorAttribute(Pointer jt, String name, Collection values) throws DrmaaException {
diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
index a3ffe708c..bf60b4a80 100644
--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
@@ -374,7 +374,7 @@ public final class GenomeLocParser {
int start = 1;
int stop = -1;
- final int colonIndex = str.indexOf(":");
+ final int colonIndex = str.lastIndexOf(":");
if(colonIndex == -1) {
contig = str.substring(0, str.length()); // chr1
stop = Integer.MAX_VALUE;
diff --git a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
index 1242e5b00..848beccb8 100755
--- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
@@ -9,7 +9,7 @@ import net.sf.samtools.SAMUtils;
* @author Kiran Garimella
*/
public class QualityUtils {
- public final static byte MAX_RECALIBRATED_Q_SCORE = 93;
+ public final static byte MAX_RECALIBRATED_Q_SCORE = SAMUtils.MAX_PHRED_SCORE;
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE);
diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java
new file mode 100644
index 000000000..00e491eb0
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java
@@ -0,0 +1,16 @@
+package org.broadinstitute.sting.utils.activeregion;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: thibault
+ * Date: 11/26/12
+ * Time: 2:35 PM
+ *
+ * Describes how a read relates to an assigned ActiveRegion
+ */
+public enum ActiveRegionReadState {
+ PRIMARY, // This is the read's primary region
+ NONPRIMARY, // This region overlaps the read, but it is not primary
+ EXTENDED, // This region would overlap the read if it were extended
+ UNMAPPED // This read is not mapped
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
index cf4d699ee..9ad1bf773 100644
--- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
+++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
@@ -406,10 +406,15 @@ public class BAQ {
// so BQi = Qi - BAQi + 64
byte[] bqTag = new byte[baq.length];
for ( int i = 0; i < bqTag.length; i++) {
- int bq = (int)read.getBaseQualities()[i] + 64;
- int baq_i = (int)baq[i];
- int tag = bq - baq_i;
- if ( tag < 0 ) throw new ReviewedStingException("BAQ tag calculation error. BAQ value above base quality at " + read);
+ final int bq = (int)read.getBaseQualities()[i] + 64;
+ final int baq_i = (int)baq[i];
+ final int tag = bq - baq_i;
+ // problem with the calculation of the correction factor; this is our problem
+ if ( tag < 0 )
+ throw new ReviewedStingException("BAQ tag calculation error. BAQ value above base quality at " + read);
+ // the original quality is too high, almost certainly due to using the wrong encoding in the BAM file
+ if ( tag > Byte.MAX_VALUE )
+ throw new UserException.MalformedBAM(read, "we encountered an extremely high quality score (" + (bq - 64) + ") with BAQ correction factor of " + baq_i + "; the BAM file appears to be using the wrong encoding for quality scores");
bqTag[i] = (byte)tag;
}
return new String(bqTag);
diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
index a49a12292..a2ec35ae2 100755
--- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
+++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
@@ -30,6 +30,7 @@ import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
+import org.broadinstitute.sting.utils.help.HelpUtils;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@@ -267,7 +268,7 @@ public class UserException extends ReviewedStingException {
public static class ReadMissingReadGroup extends MalformedBAM {
public ReadMissingReadGroup(SAMRecord read) {
- super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));
+ super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use " + HelpUtils.GATK_FORUM_URL + "discussion/59/companion-utilities-replacereadgroups to fix this problem", read.getReadName()));
}
}
@@ -343,7 +344,7 @@ public class UserException extends ReviewedStingException {
super(String.format("Lexicographically sorted human genome sequence detected in %s."
+ "\nFor safety's sake the GATK requires human contigs in karyotypic order: 1, 2, ..., 10, 11, ..., 20, 21, 22, X, Y with M either leading or trailing these contigs."
+ "\nThis is because all distributed GATK resources are sorted in karyotypic order, and your processing will fail when you need to use these files."
- + "\nYou can use the ReorderSam utility to fix this problem: http://www.broadinstitute.org/gsa/wiki/index.php/ReorderSam"
+ + "\nYou can use the ReorderSam utility to fix this problem: " + HelpUtils.GATK_FORUM_URL + "discussion/58/companion-utilities-reordersam"
+ "\n %s contigs = %s",
name, name, ReadUtils.prettyPrintSequenceRecords(dict)));
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java
index fe5f48a48..64238dc73 100644
--- a/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/ForumAPIUtils.java
@@ -44,14 +44,13 @@ public class ForumAPIUtils {
/**
* How we post to the forum
*/
- private final static String API_URL = "https://gatkforums.broadinstitute.org/api/v1/";
final private static String ACCESS_TOKEN = "access_token=";
public static List getPostedTools(String forumKey) {
Gson gson = new Gson();
List output = new ArrayList();
- String text = httpGet(API_URL + "categories.json?CategoryIdentifier=tool-bulletin&page=1-100000&" + ACCESS_TOKEN + forumKey);
+ String text = httpGet(HelpUtils.GATK_FORUM_API_URL + "categories.json?CategoryIdentifier=tool-bulletin&page=1-100000&" + ACCESS_TOKEN + forumKey);
APIQuery details = gson.fromJson(text, APIQuery.class);
ForumDiscussion[] discussions = details.Discussions;
@@ -159,7 +158,7 @@ public class ForumAPIUtils {
Gson gson = new Gson();
String data = gson.toJson(post.getPostData());
- httpPost(data, API_URL + "post/discussion.json?" + ACCESS_TOKEN + forumKey);
+ httpPost(data, HelpUtils.GATK_FORUM_API_URL + "post/discussion.json?" + ACCESS_TOKEN + forumKey);
}
@@ -167,8 +166,7 @@ public class ForumAPIUtils {
class APIQuery {
ForumDiscussion[] Discussions;
- public APIQuery() {
- }
+ public APIQuery() {}
}
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java
index 4ec2ac6d7..21054a794 100644
--- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java
@@ -28,7 +28,7 @@ public class GATKDocUtils {
/**
* The URL root for RELEASED GATKDOC units
*/
- public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = "http://www.broadinstitute.org/gatk/gatkdocs/";
+ public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = HelpUtils.GATK_DOCS_URL;
/**
* The URL root for STABLE GATKDOC units
*/
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java
index 645ab34c1..1bc20d5a0 100644
--- a/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java
@@ -32,6 +32,15 @@ import org.broadinstitute.sting.utils.classloader.JVMUtils;
import java.lang.reflect.Field;
public class HelpUtils {
+
+ public final static String BASE_GATK_URL = "http://www.broadinstitute.org/gatk";
+ public final static String GATK_DOCS_URL = BASE_GATK_URL + "/gatkdocs/";
+ public final static String GATK_FORUM_URL = "http://gatkforums.broadinstitute.org/";
+ public final static String GATK_FORUM_API_URL = "https://gatkforums.broadinstitute.org/api/v1/";
+
+
+
+
protected static boolean assignableToClass(ProgramElementDoc classDoc, Class lhsClass, boolean requireConcrete) {
try {
Class type = getClassForDoc(classDoc);
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
index 42938d2a6..25f0bfa6d 100644
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
@@ -652,23 +652,19 @@ public abstract class AbstractReadBackedPileup tracker = (PerSamplePileupElementTracker) pileupElementTracker;
PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker();
- int current = 0;
for (final String sample : tracker.getSamples()) {
PileupElementTracker perSampleElements = tracker.getElements(sample);
- List filteredPileup = new ArrayList();
- for (PileupElement p : perSampleElements) {
+ int current = 0;
+ UnifiedPileupElementTracker filteredPileup = new UnifiedPileupElementTracker();
+ for (PE p : perSampleElements) {
if (positions.contains(current))
filteredPileup.add(p);
- }
+ current++;
- if (!filteredPileup.isEmpty()) {
- AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements);
- filteredTracker.addElements(sample, pileup.pileupElementTracker);
}
-
- current++;
+ filteredTracker.addElements(sample, filteredPileup);
}
return (RBP) createNewPileup(loc, filteredTracker);
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java
new file mode 100644
index 000000000..d4e4ab65e
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java
@@ -0,0 +1,64 @@
+package org.broadinstitute.sting.utils.recalibration.covariates;
+
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.TandemRepeat;
+import org.broadinstitute.sting.utils.QualityUtils;
+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
+
+import java.util.Arrays;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 11/3/12
+ */
+
+public class RepeatLengthCovariate implements ExperimentalCovariate {
+ final int MAX_REPEAT_LENGTH = 20;
+
+ // Initialize any member variables using the command-line arguments passed to the walkers
+ @Override
+ public void initialize(final RecalibrationArgumentCollection RAC) {}
+
+ @Override
+ public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
+ byte[] readBytes = read.getReadBases();
+ for (int i = 0; i < readBytes.length; i++) {
+ int maxRL = 0;
+ for (int str = 1; str <= 8; str++) {
+ if (i + str <= readBytes.length) {
+ maxRL = Math.max(maxRL, VariantContextUtils.findNumberofRepetitions(
+ Arrays.copyOfRange(readBytes,i,i + str),
+ Arrays.copyOfRange(readBytes,i,readBytes.length)
+ ));
+ }
+ }
+ if(maxRL > MAX_REPEAT_LENGTH) { maxRL = MAX_REPEAT_LENGTH; }
+ values.addCovariate(maxRL, maxRL, maxRL, i);
+ }
+ }
+
+ // Used to get the covariate's value from input csv file during on-the-fly recalibration
+ @Override
+ public final Object getValue(final String str) {
+ return Byte.parseByte(str);
+ }
+
+ @Override
+ public String formatKey(final int key) {
+ return String.format("%d", key);
+ }
+
+ @Override
+ public int keyFromValue(final Object value) {
+ return (value instanceof String) ? Integer.parseInt((String) value) : (Integer) value;
+ }
+
+ @Override
+ public int maximumKeyValue() {
+ return MAX_REPEAT_LENGTH + 1;
+ }
+
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
index 1f1867f75..b3e3cf8df 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
@@ -1267,7 +1267,7 @@ public class VariantContextUtils {
* @param testString String to test
* @return Number of repetitions (0 if testString is not a concatenation of n repeatUnit's
*/
- protected static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) {
+ public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) {
int numRepeats = 0;
for (int start = 0; start < testString.length; start += repeatUnit.length) {
int end = start + repeatUnit.length;
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriterBase.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriterBase.java
index 18d91ef3f..1f3cdd0fe 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriterBase.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriterBase.java
@@ -71,7 +71,6 @@ abstract class SortingVariantContextWriterBase implements VariantContextWriter {
this.takeOwnershipOfInner = takeOwnershipOfInner;
// has to be PriorityBlockingQueue to be thread-safe
- // see http://getsatisfaction.com/gsa/topics/missing_loci_output_in_multi_thread_mode_when_implement_sortingvcfwriterbase?utm_content=topic_link&utm_medium=email&utm_source=new_topic
this.queue = new PriorityBlockingQueue(50, new VariantContextComparator());
this.mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
index e4c7b2db0..a65b0cb45 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
@@ -1,6 +1,8 @@
package org.broadinstitute.sting.gatk.traversals;
+import com.google.java.contract.PreconditionError;
import net.sf.samtools.*;
+import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@@ -23,6 +25,7 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@@ -45,6 +48,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
private class DummyActiveRegionWalker extends ActiveRegionWalker {
private final double prob;
+ private EnumSet states = super.desiredReadStates();
+
protected List isActiveCalls = new ArrayList();
protected Map mappedActiveRegions = new HashMap();
@@ -52,6 +57,20 @@ public class TraverseActiveRegionsTest extends BaseTest {
this.prob = 1.0;
}
+ public DummyActiveRegionWalker(double constProb) {
+ this.prob = constProb;
+ }
+
+ public DummyActiveRegionWalker(EnumSet wantStates) {
+ this.prob = 1.0;
+ this.states = wantStates;
+ }
+
+ @Override
+ public EnumSet desiredReadStates() {
+ return states;
+ }
+
@Override
public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
isActiveCalls.add(ref.getLocus());
@@ -82,7 +101,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
private GenomeLocParser genomeLocParser;
private List intervals;
- private List reads;
+ private List reads;
@BeforeClass
private void init() throws FileNotFoundException {
@@ -96,21 +115,21 @@ public class TraverseActiveRegionsTest extends BaseTest {
intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000));
- // TODO: this fails!
- //intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 20000));
+ intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 10100));
intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
- reads = new ArrayList();
+ reads = new ArrayList();
reads.add(buildSAMRecord("simple", "1", 100, 200));
reads.add(buildSAMRecord("overlap_equal", "1", 10, 20));
reads.add(buildSAMRecord("overlap_unequal", "1", 10, 21));
reads.add(buildSAMRecord("boundary_equal", "1", 1990, 2009));
- reads.add(buildSAMRecord("boundary_unequal", "1", 1995, 2050));
- reads.add(buildSAMRecord("extended_only", "1", 3000, 3100));
+ reads.add(buildSAMRecord("boundary_unequal", "1", 1990, 2008));
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
- // TODO
- //reads.add(buildSAMRecord("simple20", "20", 10100, 10150));
+ reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
+
+ // required by LocusIteratorByState, and I prefer to list them in test case order above
+ ReadUtils.sortReadsByCoordinate(reads);
}
@Test
@@ -134,6 +153,18 @@ public class TraverseActiveRegionsTest extends BaseTest {
return activeIntervals;
}
+ @Test (expectedExceptions = PreconditionError.class)
+ public void testIsActiveRangeLow () {
+ DummyActiveRegionWalker walker = new DummyActiveRegionWalker(-0.1);
+ getActiveRegions(walker, intervals).values();
+ }
+
+ @Test (expectedExceptions = PreconditionError.class)
+ public void testIsActiveRangeHigh () {
+ DummyActiveRegionWalker walker = new DummyActiveRegionWalker(1.1);
+ getActiveRegions(walker, intervals).values();
+ }
+
@Test
public void testActiveRegionCoverage() {
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
@@ -187,80 +218,210 @@ public class TraverseActiveRegionsTest extends BaseTest {
}
@Test
- public void testReadMapping() {
+ public void testPrimaryReadMapping() {
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
// Contract: Each read has the Primary state in a single region (or none)
// This is the region of maximum overlap for the read (earlier if tied)
+ // simple: Primary in 1:1-999
+ // overlap_equal: Primary in 1:1-999
+ // overlap_unequal: Primary in 1:1-999
+ // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
+ // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
+ // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
+ // outside_intervals: none
+ // simple20: Primary in 20:10000-10100
+
+ Map activeRegions = getActiveRegions(walker, intervals);
+ ActiveRegion region;
+
+ region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
+
+ getRead(region, "simple");
+ getRead(region, "overlap_equal");
+ getRead(region, "overlap_unequal");
+ verifyReadNotPlaced(region, "boundary_equal");
+ verifyReadNotPlaced(region, "boundary_unequal");
+ verifyReadNotPlaced(region, "extended_and_np");
+ verifyReadNotPlaced(region, "outside_intervals");
+ verifyReadNotPlaced(region, "simple20");
+
+ region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
+
+ verifyReadNotPlaced(region, "simple");
+ verifyReadNotPlaced(region, "overlap_equal");
+ verifyReadNotPlaced(region, "overlap_unequal");
+ verifyReadNotPlaced(region, "boundary_equal");
+ getRead(region, "boundary_unequal");
+ getRead(region, "extended_and_np");
+ verifyReadNotPlaced(region, "outside_intervals");
+ verifyReadNotPlaced(region, "simple20");
+
+ region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
+
+ verifyReadNotPlaced(region, "simple");
+ verifyReadNotPlaced(region, "overlap_equal");
+ verifyReadNotPlaced(region, "overlap_unequal");
+ getRead(region, "boundary_equal");
+ verifyReadNotPlaced(region, "boundary_unequal");
+ verifyReadNotPlaced(region, "extended_and_np");
+ verifyReadNotPlaced(region, "outside_intervals");
+ verifyReadNotPlaced(region, "simple20");
+
+ region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
+
+ verifyReadNotPlaced(region, "simple");
+ verifyReadNotPlaced(region, "overlap_equal");
+ verifyReadNotPlaced(region, "overlap_unequal");
+ verifyReadNotPlaced(region, "boundary_equal");
+ verifyReadNotPlaced(region, "boundary_unequal");
+ verifyReadNotPlaced(region, "extended_and_np");
+ verifyReadNotPlaced(region, "outside_intervals");
+ getRead(region, "simple20");
+
+ // TODO: more tests and edge cases
+ }
+
+ @Test
+ public void testNonPrimaryReadMapping() {
+ DummyActiveRegionWalker walker = new DummyActiveRegionWalker(
+ EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY));
+
+ // Contract: Each read has the Primary state in a single region (or none)
+ // This is the region of maximum overlap for the read (earlier if tied)
+
+ // Contract: Each read has the Non-Primary state in all other regions it overlaps
+
+ // simple: Primary in 1:1-999
+ // overlap_equal: Primary in 1:1-999
+ // overlap_unequal: Primary in 1:1-999
+ // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
+ // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
+ // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
+ // outside_intervals: none
+ // simple20: Primary in 20:10000-10100
+
+ Map activeRegions = getActiveRegions(walker, intervals);
+ ActiveRegion region;
+
+ region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
+
+ getRead(region, "simple");
+ getRead(region, "overlap_equal");
+ getRead(region, "overlap_unequal");
+ verifyReadNotPlaced(region, "boundary_equal");
+ verifyReadNotPlaced(region, "boundary_unequal");
+ getRead(region, "extended_and_np");
+ verifyReadNotPlaced(region, "outside_intervals");
+ verifyReadNotPlaced(region, "simple20");
+
+ region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
+
+ verifyReadNotPlaced(region, "simple");
+ verifyReadNotPlaced(region, "overlap_equal");
+ verifyReadNotPlaced(region, "overlap_unequal");
+ getRead(region, "boundary_equal");
+ getRead(region, "boundary_unequal");
+ getRead(region, "extended_and_np");
+ verifyReadNotPlaced(region, "outside_intervals");
+ verifyReadNotPlaced(region, "simple20");
+
+ region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
+
+ verifyReadNotPlaced(region, "simple");
+ verifyReadNotPlaced(region, "overlap_equal");
+ verifyReadNotPlaced(region, "overlap_unequal");
+ getRead(region, "boundary_equal");
+ getRead(region, "boundary_unequal");
+ verifyReadNotPlaced(region, "extended_and_np");
+ verifyReadNotPlaced(region, "outside_intervals");
+ verifyReadNotPlaced(region, "simple20");
+
+ region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
+
+ verifyReadNotPlaced(region, "simple");
+ verifyReadNotPlaced(region, "overlap_equal");
+ verifyReadNotPlaced(region, "overlap_unequal");
+ verifyReadNotPlaced(region, "boundary_equal");
+ verifyReadNotPlaced(region, "boundary_unequal");
+ verifyReadNotPlaced(region, "extended_and_np");
+ verifyReadNotPlaced(region, "outside_intervals");
+ getRead(region, "simple20");
+
+ // TODO: more tests and edge cases
+ }
+
+ @Test
+ public void testExtendedReadMapping() {
+ DummyActiveRegionWalker walker = new DummyActiveRegionWalker(
+ EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY, ActiveRegionReadState.EXTENDED));
+
+ // Contract: Each read has the Primary state in a single region (or none)
+ // This is the region of maximum overlap for the read (earlier if tied)
+
// Contract: Each read has the Non-Primary state in all other regions it overlaps
// Contract: Each read has the Extended state in regions where it only overlaps if the region is extended
// simple: Primary in 1:1-999
// overlap_equal: Primary in 1:1-999
// overlap_unequal: Primary in 1:1-999
- // boundary_equal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
- // boundary_unequal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
- // extended_only: Extended in 1:2000-2999
+ // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
+ // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
-
- // TODO
- // simple20: Primary in 20:10000-20000
+ // simple20: Primary in 20:10000-10100
Map activeRegions = getActiveRegions(walker, intervals);
ActiveRegion region;
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
- verifyReadPrimary(region, "simple");
- verifyReadPrimary(region, "overlap_equal");
- verifyReadPrimary(region, "overlap_unequal");
+ getRead(region, "simple");
+ getRead(region, "overlap_equal");
+ getRead(region, "overlap_unequal");
verifyReadNotPlaced(region, "boundary_equal");
verifyReadNotPlaced(region, "boundary_unequal");
- verifyReadNotPlaced(region, "extended_only");
- // TODO: fail verifyReadNonPrimary(region, "extended_and_np");
+ getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
+ verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
- // TODO: fail verifyReadPrimary(region, "boundary_equal");
- // TODO: fail verifyReadNonPrimary(region, "boundary_unequal");
- verifyReadNotPlaced(region, "extended_only");
- // TODO: fail verifyReadPrimary(region, "extended_and_np");
+ getRead(region, "boundary_equal");
+ getRead(region, "boundary_unequal");
+ getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
+ verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
- // TODO: fail verifyReadNonPrimary(region, "boundary_equal");
- verifyReadPrimary(region, "boundary_unequal");
- // TODO: fail verifyReadExtended(region, "extended_only");
- // TODO: fail verifyReadExtended(region, "extended_and_np");
+ getRead(region, "boundary_equal");
+ getRead(region, "boundary_unequal");
+ getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
+ verifyReadNotPlaced(region, "simple20");
+
+ region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
+
+ verifyReadNotPlaced(region, "simple");
+ verifyReadNotPlaced(region, "overlap_equal");
+ verifyReadNotPlaced(region, "overlap_unequal");
+ verifyReadNotPlaced(region, "boundary_equal");
+ verifyReadNotPlaced(region, "boundary_unequal");
+ verifyReadNotPlaced(region, "extended_and_np");
+ verifyReadNotPlaced(region, "outside_intervals");
+ getRead(region, "simple20");
// TODO: more tests and edge cases
}
- private void verifyReadPrimary(ActiveRegion region, String readName) {
- SAMRecord read = getRead(region, readName);
- Assert.assertFalse(read.getNotPrimaryAlignmentFlag(), "Read " + read + " not primary in active region " + region);
- }
-
- private void verifyReadNonPrimary(ActiveRegion region, String readName) {
- SAMRecord read = getRead(region, readName);
- Assert.assertTrue(read.getNotPrimaryAlignmentFlag(), "Read " + read + " primary in active region " + region);
- }
-
- private void verifyReadExtended(ActiveRegion region, String readName) {
- Assert.fail("The Extended read state has not been implemented");
- }
-
private void verifyReadNotPlaced(ActiveRegion region, String readName) {
for (SAMRecord read : region.getReads()) {
if (read.getReadName().equals(readName))
@@ -274,7 +435,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
return read;
}
- Assert.fail("Read " + readName + " not found in active region " + region);
+ Assert.fail("Read " + readName + " not assigned to active region " + region);
return null;
}
@@ -282,6 +443,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
for (LocusShardDataProvider dataProvider : createDataProviders(intervals))
t.traverse(walker, dataProvider, 0);
+ t.endTraversal(walker, 0);
+
return walker.mappedActiveRegions;
}
@@ -345,7 +508,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
engine.setGenomeLocParser(genomeLocParser);
t.initialize(engine);
- StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(reads);
+ StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList(reads));
Shard shard = new MockLocusShard(genomeLocParser, intervals);
List providers = new ArrayList();
diff --git a/public/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java
index e9f138a0e..e4313b30a 100644
--- a/public/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java
@@ -2,6 +2,8 @@ package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMSequenceDictionary;
+import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@@ -74,6 +76,23 @@ public class GenomeLocParserUnitTest extends BaseTest {
genomeLocParser.parseGenomeLoc("Bad:0-1");
}
+ @Test
+ public void testContigHasColon() {
+ SAMFileHeader header = new SAMFileHeader();
+ header.setSortOrder(net.sf.samtools.SAMFileHeader.SortOrder.coordinate);
+ SAMSequenceDictionary dict = new SAMSequenceDictionary();
+ SAMSequenceRecord rec = new SAMSequenceRecord("c:h:r1", 10);
+ rec.setSequenceLength(10);
+ dict.addSequence(rec);
+ header.setSequenceDictionary(dict);
+
+ final GenomeLocParser myGenomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
+ GenomeLoc loc = myGenomeLocParser.parseGenomeLoc("c:h:r1:4-5");
+ assertEquals(0, loc.getContigIndex());
+ assertEquals(loc.getStart(), 4);
+ assertEquals(loc.getStop(), 5);
+ }
+
@Test
public void testParseGoodString() {
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-10");
@@ -81,7 +100,7 @@ public class GenomeLocParserUnitTest extends BaseTest {
assertEquals(loc.getStop(), 10);
assertEquals(loc.getStart(), 1);
}
-
+
@Test
public void testCreateGenomeLoc1() {
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100);
diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java
index b2a4ac2da..b9ce58992 100644
--- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java
@@ -28,7 +28,6 @@ public class VCFIntegrationTest extends WalkerTest {
}
@Test(enabled = true)
- // See https://getsatisfaction.com/gsa/topics/support_vcf_4_1_structural_variation_breakend_alleles?utm_content=topic_link&utm_medium=email&utm_source=new_topic
public void testReadingAndWritingBreakpointAlleles() {
String testVCF = privateTestDir + "breakpoint-example.vcf";
diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java
index f3daa9e4c..3ad438b26 100644
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java
@@ -793,7 +793,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
int sampleI = 0;
for ( final List alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) {
- genotypes.add(GenotypeBuilder.create("sample" + sampleI, alleles));
+ genotypes.add(GenotypeBuilder.create("sample" + sampleI++, alleles));
}
genotypes.add(GenotypeBuilder.createMissing("missing", 2));
diff --git a/public/packages/GATKEngine.xml b/public/packages/GATKEngine.xml
index 2de0273f3..d0b4a52b5 100644
--- a/public/packages/GATKEngine.xml
+++ b/public/packages/GATKEngine.xml
@@ -36,6 +36,9 @@
+
+
+