diff --git a/build.xml b/build.xml
index 834aef3cd..47e4eeb47 100644
--- a/build.xml
+++ b/build.xml
@@ -107,6 +107,12 @@
+
+
+
+
+
+
@@ -267,19 +273,19 @@
-
-
+
+
+
+
+
+
-
-
+
+
+
+
+
+
@@ -596,6 +602,7 @@
+
-
-
-
-
-
-
+
+
@@ -659,7 +662,7 @@
-
+
@@ -667,14 +670,10 @@
-
-
+
+
-
-
-
-
-
+
@@ -685,7 +684,7 @@
-
+
@@ -703,12 +702,12 @@
-
-
+
+
-
-
+
+
@@ -719,7 +718,7 @@
-
+
@@ -755,7 +754,7 @@
-
+
@@ -1098,7 +1097,6 @@
-
@@ -1129,6 +1127,7 @@
+
@@ -1136,6 +1135,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1215,6 +1229,7 @@
+
@@ -1228,10 +1243,11 @@
listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.TestNGTestTransformer,org.broadinstitute.sting.StingTextReporter,org.uncommons.reportng.HTMLReporter">
+
-
+
@@ -1270,6 +1286,7 @@
+
@@ -1382,6 +1399,7 @@
+
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java
index 94f6ff649..53f206bfe 100755
--- a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java
@@ -30,7 +30,8 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.*;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.variant.utils.BaseUtils;
+import org.broadinstitute.variant.variantcontext.Allele;
import java.io.PrintStream;
import java.util.*;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java
index d0bcd0eb3..255f1fd05 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java
@@ -25,51 +25,46 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
* OTHER DEALINGS IN THE SOFTWARE.
*/
-import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
-import org.broadinstitute.sting.utils.BaseUtils;
+import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
-import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
-import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
+import org.broadinstitute.sting.utils.recalibration.RecalDatum;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
-import org.broadinstitute.sting.utils.threading.ThreadLocalArray;
+
+import java.util.LinkedList;
+import java.util.List;
public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {
+ private final static Logger logger = Logger.getLogger(AdvancedRecalibrationEngine.class);
- // optimization: only allocate temp arrays once per thread
- private final ThreadLocal threadLocalTempQualArray = new ThreadLocalArray(EventType.values().length, byte.class);
- private final ThreadLocal threadLocalTempFractionalErrorArray = new ThreadLocalArray(EventType.values().length, double.class);
-
- public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) {
- super.initialize(covariates, recalibrationTables);
- }
+ final List> allThreadLocalQualityScoreTables = new LinkedList>();
+ private ThreadLocal> threadLocalQualityScoreTables = new ThreadLocal>() {
+ @Override
+ protected synchronized NestedIntegerArray initialValue() {
+ final NestedIntegerArray table = recalibrationTables.makeQualityScoreTable();
+ allThreadLocalQualityScoreTables.add(table);
+ return table;
+ }
+ };
@Override
- public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
+ public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) {
+ final GATKSAMRecord read = recalInfo.getRead();
+ final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
+ final NestedIntegerArray qualityScoreTable = getThreadLocalQualityScoreTable();
+
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
- if( !skip[offset] ) {
- final ReadCovariates readCovariates = covariateKeySetFrom(read);
-
- byte[] tempQualArray = threadLocalTempQualArray.get();
- double[] tempFractionalErrorArray = threadLocalTempFractionalErrorArray.get();
-
- tempQualArray[EventType.BASE_SUBSTITUTION.index] = read.getBaseQualities()[offset];
- tempFractionalErrorArray[EventType.BASE_SUBSTITUTION.index] = snpErrors[offset];
- tempQualArray[EventType.BASE_INSERTION.index] = read.getBaseInsertionQualities()[offset];
- tempFractionalErrorArray[EventType.BASE_INSERTION.index] = insertionErrors[offset];
- tempQualArray[EventType.BASE_DELETION.index] = read.getBaseDeletionQualities()[offset];
- tempFractionalErrorArray[EventType.BASE_DELETION.index] = deletionErrors[offset];
+ if( ! recalInfo.skip(offset) ) {
for (final EventType eventType : EventType.values()) {
final int[] keys = readCovariates.getKeySet(offset, eventType);
final int eventIndex = eventType.index;
- final byte qual = tempQualArray[eventIndex];
- final double isError = tempFractionalErrorArray[eventIndex];
+ final byte qual = recalInfo.getQual(eventType, offset);
+ final double isError = recalInfo.getErrorFraction(eventType, offset);
- combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex);
-
- incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex);
+ incrementDatumOrPutIfNecessary(qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
@@ -81,4 +76,24 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
}
}
}
+
+ /**
+ * Get a NestedIntegerArray for a QualityScore table specific to this thread
+ * @return a non-null NestedIntegerArray ready to be used to collect calibration info for the quality score covariate
+ */
+ private NestedIntegerArray getThreadLocalQualityScoreTable() {
+ return threadLocalQualityScoreTables.get();
+ }
+
+ @Override
+ public void finalizeData() {
+ // merge in all of the thread local tables
+ logger.info("Merging " + allThreadLocalQualityScoreTables.size() + " thread-local quality score tables");
+ for ( final NestedIntegerArray localTable : allThreadLocalQualityScoreTables ) {
+ recalibrationTables.combineQualityScoreTable(localTable);
+ }
+ allThreadLocalQualityScoreTables.clear(); // cleanup after ourselves
+
+ super.finalizeData();
+ }
}
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java
index 3097c2ee9..bebc27221 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java
@@ -39,16 +39,27 @@ public class HeaderElement {
*
* @param location the reference location for the new element
*/
- public HeaderElement(int location) {
+ public HeaderElement(final int location) {
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, 0, location, new LinkedList());
}
+ /**
+ * Creates a new HeaderElement with the following default values: - empty consensusBaseCounts - empty
+ * filteredBaseCounts - empty mappingQuality list
+ *
+ * @param location the reference location for the new element
+ */
+ public HeaderElement(final int location, final int insertionsToTheRight) {
+ this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, 0, location, new LinkedList());
+ }
+
/**
* Creates a new HeaderElement with all given parameters
*
* @param consensusBaseCounts the BaseCounts object for the running consensus synthetic read
* @param filteredBaseCounts the BaseCounts object for the filtered data synthetic read
* @param insertionsToTheRight number of insertions to the right of this HeaderElement
+ * @param nSoftClippedBases number of softclipped bases of this HeaderElement
* @param location the reference location of this reference element
* @param mappingQuality the list of mapping quality values of all reads that contributed to this
* HeaderElement
@@ -151,6 +162,14 @@ public class HeaderElement {
throw new ReviewedStingException("Removed too many insertions, header is now negative!");
}
+ public boolean hasInsertionToTheRight() {
+ return insertionsToTheRight > 0;
+ }
+
+ public int numInsertionsToTheRight() {
+ return insertionsToTheRight;
+ }
+
/**
* Whether or not the HeaderElement is variant due to excess insertions
*
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
index 2061c5364..39a284d98 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
@@ -300,7 +300,7 @@ public class ReduceReads extends ReadWalker, ReduceRea
// Check if the read goes beyond the boundaries of the chromosome, and hard clip those boundaries.
int chromosomeLength = ref.getGenomeLocParser().getContigInfo(read.getReferenceName()).getSequenceLength();
if (read.getSoftStart() < 0)
- read = ReadClipper.hardClipByReadCoordinates(read, 0, -read.getSoftStart() - 1);
+ read = ReadClipper.hardClipByReadCoordinates(read, 0, -read.getSoftStart());
if (read.getSoftEnd() > chromosomeLength)
read = ReadClipper.hardClipByReadCoordinates(read, chromosomeLength - read.getSoftStart() + 1, read.getReadLength() - 1);
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
index fff1c20a5..9af54b4a8 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
@@ -645,8 +645,15 @@ public class SlidingWindow {
}
}
- for (int i = 0; i <= lastStop; i++) // clean up the window header elements up until the end of the variant region. (we keep the last element in case the following element had a read that started with insertion)
- windowHeader.remove();
+ // clean up the window header elements up until the end of the variant region.
+ // note that we keep the last element of the region in the event that the following element has a read that starts with insertion.
+ if ( lastStop >= 0 ) {
+ for (int i = 0; i < lastStop; i++)
+ windowHeader.remove();
+ final HeaderElement lastOfRegion = windowHeader.remove();
+ if ( lastOfRegion.hasInsertionToTheRight() )
+ windowHeader.addFirst(new HeaderElement(lastOfRegion.getLocation(), lastOfRegion.numInsertionsToTheRight()));
+ }
}
return allReads;
}
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java
index fc6d23382..30650e9c0 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java
@@ -8,8 +8,8 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.variant.variantcontext.Allele;
+import org.broadinstitute.variant.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java
index 303ab94d6..502853868 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java
@@ -29,13 +29,13 @@ import net.sf.samtools.SAMUtils;
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACcounts;
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset;
import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
+import org.broadinstitute.variant.variantcontext.Allele;
+import org.broadinstitute.variant.variantcontext.GenotypeLikelihoods;
import java.util.*;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java
index f6ad445c7..ce66ce8d0 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java
@@ -31,11 +31,11 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
-import org.broadinstitute.sting.utils.variantcontext.*;
+import org.broadinstitute.variant.variantcontext.*;
import java.util.*;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java
index 4bcaa5ff9..3b12fe475 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java
@@ -8,7 +8,7 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.variant.variantcontext.Allele;
import java.util.*;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java
index eb4cf1839..76a934091 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java
@@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.*;
-import org.broadinstitute.sting.utils.variantcontext.*;
+import org.broadinstitute.variant.variantcontext.*;
import java.util.*;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java
index 0f0f85441..461329ad0 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java
@@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMUtils;
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset;
-import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@@ -11,7 +11,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.variant.variantcontext.Allele;
import java.util.ArrayList;
import java.util.Arrays;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java
index 9f2fdc096..3250de2b2 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java
@@ -31,7 +31,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.*;
-import org.broadinstitute.sting.utils.variantcontext.*;
+import org.broadinstitute.variant.utils.BaseUtils;
+import org.broadinstitute.variant.variantcontext.*;
import java.util.*;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java
index 0a3512aa6..d6b3eb768 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java
@@ -10,10 +10,10 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.Genotype;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
+import org.broadinstitute.variant.variantcontext.Allele;
+import org.broadinstitute.variant.variantcontext.Genotype;
+import org.broadinstitute.variant.variantcontext.VariantContext;
+import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
import java.io.*;
import java.util.*;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java
index 6f3740ab3..f4f17f1e0 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java
@@ -4,7 +4,7 @@ import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
-import org.broadinstitute.sting.utils.variantcontext.*;
+import org.broadinstitute.variant.variantcontext.*;
import java.util.ArrayList;
import java.util.Arrays;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java
index b248c8759..5f5d80fde 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java
@@ -27,9 +27,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods;
import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
+import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-import org.broadinstitute.sting.utils.variantcontext.*;
+import org.broadinstitute.variant.variantcontext.*;
import java.util.*;
@@ -532,7 +533,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
}
// if there is no mass on the (new) likelihoods, then just no-call the sample
- if ( MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL ) {
+ if ( MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) {
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
}
else {
@@ -544,7 +545,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
gb.PL(newLikelihoods);
// if we weren't asked to assign a genotype, then just no-call the sample
- if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL )
+ if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
gb.alleles(NO_CALL_ALLELES);
else
assignGenotype(gb, newLikelihoods, allelesToUse, ploidy);
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
index 4d81d0010..5aef002fe 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
@@ -36,7 +36,9 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
-import org.broadinstitute.sting.utils.variantcontext.*;
+import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
+import org.broadinstitute.variant.utils.BaseUtils;
+import org.broadinstitute.variant.variantcontext.*;
import java.io.PrintStream;
import java.util.*;
@@ -44,13 +46,15 @@ import java.util.*;
public class GenotypingEngine {
private final boolean DEBUG;
+ private final boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS;
private final static List noCall = new ArrayList(); // used to noCall all genotypes until the exact model is applied
private final static Allele SYMBOLIC_UNASSEMBLED_EVENT_ALLELE = Allele.create("", false);
private final VariantAnnotatorEngine annotationEngine;
- public GenotypingEngine( final boolean DEBUG, final VariantAnnotatorEngine annotationEngine ) {
+ public GenotypingEngine( final boolean DEBUG, final VariantAnnotatorEngine annotationEngine, final boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ) {
this.DEBUG = DEBUG;
this.annotationEngine = annotationEngine;
+ this.USE_FILTERED_READ_MAP_FOR_ANNOTATIONS = USE_FILTERED_READ_MAP_FOR_ANNOTATIONS;
noCall.add(Allele.NO_CALL);
}
@@ -156,7 +160,7 @@ public class GenotypingEngine {
}
// Merge the event to find a common reference representation
- final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
+ final VariantContext mergedVC = VariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
if( mergedVC == null ) { continue; }
// let's update the Allele keys in the mapper because they can change after merging when there are complex events
@@ -192,11 +196,13 @@ public class GenotypingEngine {
}
final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel);
if( call != null ) {
- final Map stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap, perSampleFilteredReadList, call );
+ final Map alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap :
+ convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, 0.0, UG_engine.getUAC().contaminationLog ) );
+ final Map stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call );
VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, call);
if( annotatedCall.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
- annotatedCall = VariantContextUtils.reverseTrimAlleles(annotatedCall);
+ annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
}
returnCalls.add( annotatedCall );
@@ -339,12 +345,7 @@ public class GenotypingEngine {
}
// count up the co-occurrences of the events for the R^2 calculation
for( final String sample : samples ) {
- final HashSet sampleSet = new HashSet(1);
- sampleSet.add(sample);
-
- final List alleleList = new ArrayList();
- alleleList.add(Allele.create(h.getBases()));
- final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( sampleSet, haplotypeReadMap, alleleList )[0][0];
+ final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( Collections.singleton(sample), haplotypeReadMap, Collections.singletonList(Allele.create(h.getBases())) )[0][0];
if( thisHapVC == null ) {
if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); }
else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); }
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
index 35aa86ca2..8c8113f46 100755
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@@ -48,8 +48,8 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
-import org.broadinstitute.sting.utils.codecs.vcf.*;
-import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
+import org.broadinstitute.variant.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
@@ -61,8 +61,8 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
-import org.broadinstitute.sting.utils.variantcontext.*;
-import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
+import org.broadinstitute.variant.variantcontext.*;
+import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import java.io.FileNotFoundException;
import java.io.PrintStream;
@@ -130,18 +130,26 @@ public class HaplotypeCaller extends ActiveRegionWalker implem
protected String keepRG = null;
@Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false)
- protected int MIN_PRUNE_FACTOR = 1;
+ protected int MIN_PRUNE_FACTOR = 2;
@Advanced
@Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false)
protected int gcpHMM = 10;
+ @Advanced
+ @Argument(fullName="minKmer", shortName="minKmer", doc="Minimum kmer length to use in the assembly graph", required = false)
+ protected int minKmer = 11;
+
@Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false)
protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000;
@Argument(fullName="useAllelesTrigger", shortName="allelesTrigger", doc = "If specified, use additional trigger on variants found in an external alleles file", required=false)
protected boolean USE_ALLELES_TRIGGER = false;
+ @Advanced
+ @Argument(fullName="useFilteredReadsForAnnotations", shortName="useFilteredReadsForAnnotations", doc = "If specified, use the contamination-filtered read maps for the purposes of annotating variants", required=false)
+ protected boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS = false;
+
/**
* rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate.
* dbSNP is not used in any way for the calculations themselves.
@@ -234,7 +242,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem
samplesList.addAll( samples );
// initialize the UnifiedGenotyper Engine which is used to call into the exact model
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user
- UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
+ UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
UnifiedArgumentCollection simpleUAC = new UnifiedArgumentCollection(UAC);
@@ -244,7 +252,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem
simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // low values used for isActive determination only, default/user-specified values used for actual calling
simpleUAC.CONTAMINATION_FRACTION = 0.0;
simpleUAC.exactCallsLog = null;
- UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
+ UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
// initialize the output VCF header
final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
@@ -278,9 +286,9 @@ public class HaplotypeCaller extends ActiveRegionWalker implem
throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e);
}
- assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter );
+ assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter, minKmer );
likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM );
- genotypingEngine = new GenotypingEngine( DEBUG, annotationEngine );
+ genotypingEngine = new GenotypingEngine( DEBUG, annotationEngine, USE_FILTERED_READ_MAP_FOR_ANNOTATIONS );
}
//---------------------------------------------------------------------------------------------------------------
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java
index 8a401439b..0e4673497 100755
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java
@@ -39,18 +39,18 @@ import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.SWPairwiseAlignment;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
+import org.broadinstitute.variant.vcf.VCFHeader;
+import org.broadinstitute.variant.vcf.VCFHeaderLine;
+import org.broadinstitute.variant.vcf.VCFHeaderLineType;
+import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
-import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
-import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
-import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
+import org.broadinstitute.variant.variantcontext.Allele;
+import org.broadinstitute.variant.variantcontext.VariantContext;
+import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
+import org.broadinstitute.variant.variantcontext.VariantContextUtils;
+import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
+import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory;
import java.util.*;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
index 018102893..59f241cdb 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
@@ -29,16 +29,13 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.*;
-import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pairhmm.*;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.variant.variantcontext.Allele;
-import java.io.PrintStream;
import java.util.*;
public class LikelihoodCalculationEngine {
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java
index bf6c82d82..6d5d268a6 100755
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java
@@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.variant.variantcontext.VariantContext;
import java.util.ArrayList;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
index 3c5a1f79c..0a98f54e9 100755
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
@@ -11,8 +11,8 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.variant.variantcontext.Allele;
+import org.broadinstitute.variant.variantcontext.VariantContext;
import org.jgrapht.graph.DefaultDirectedGraph;
import java.io.PrintStream;
@@ -28,7 +28,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers
private static final int NUM_BEST_PATHS_PER_KMER_GRAPH = 11;
- private static final byte MIN_QUALITY = (byte) 17;
+ private static final byte MIN_QUALITY = (byte) 16;
// Smith-Waterman parameters originally copied from IndelRealigner
private static final double SW_MATCH = 5.0; // 1.0;
@@ -39,13 +39,15 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
private final boolean DEBUG;
private final PrintStream GRAPH_WRITER;
private final ArrayList> graphs = new ArrayList>();
+ private final int MIN_KMER;
- private int PRUNE_FACTOR = 1;
+ private int PRUNE_FACTOR = 2;
- public SimpleDeBruijnAssembler( final boolean debug, final PrintStream graphWriter ) {
+ public SimpleDeBruijnAssembler( final boolean debug, final PrintStream graphWriter, final int minKmer ) {
super();
DEBUG = debug;
GRAPH_WRITER = graphWriter;
+ MIN_KMER = minKmer;
}
public ArrayList runLocalAssembly( final ActiveRegion activeRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final int PRUNE_FACTOR, final ArrayList activeAllelesToGenotype ) {
@@ -72,8 +74,9 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
protected void createDeBruijnGraphs( final List reads, final Haplotype refHaplotype ) {
graphs.clear();
+ final int maxKmer = refHaplotype.getBases().length;
// create the graph
- for( int kmer = 31; kmer <= 75; kmer += 6 ) {
+ for( int kmer = MIN_KMER; kmer <= maxKmer; kmer += 6 ) {
final DefaultDirectedGraph graph = new DefaultDirectedGraph(DeBruijnEdge.class);
if( createGraphFromSequences( graph, reads, kmer, refHaplotype, DEBUG ) ) {
graphs.add(graph);
diff --git a/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java b/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java
index 77a7c3bd9..4a13fb615 100644
--- a/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java
+++ b/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java
@@ -29,7 +29,7 @@ import org.broadinstitute.sting.gatk.downsampling.AlleleBiasedDownsamplingUtils;
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.variant.variantcontext.Allele;
import java.io.PrintStream;
import java.util.*;
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java
index b15969fba..177a989fb 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java
@@ -38,7 +38,8 @@ public class BQSRIntegrationTest extends WalkerTest {
args +
" -knownSites " + (reference.equals(b36KGReference) ? b36dbSNP129 : hg18dbSNP132) +
" --allow_potentially_misencoded_quality_scores" + // TODO -- remove me when we get new SOLiD bams
- " -o %s";
+ " -o %s" +
+ " -sortAllCols";
}
@Override
@@ -52,21 +53,21 @@ public class BQSRIntegrationTest extends WalkerTest {
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
String HiSeqInterval = "chr1:10,000,000-10,100,000";
return new Object[][]{
- {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "4fd3c9ad97e6ac58cba644a76564c9f7")},
- {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "2620f734cce20f70ce13afd880e46e5c")},
- {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "5eb3b94e767da19a4c037ee132e4b19a")},
- {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "ab261d291b107a3da7897759c0e4fa89")},
- {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "292303f649fbb19dc05d4a0197a49eeb")},
- {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "8ced9d1094493f17fb1876b818a64541")},
- {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "abb838131e403d39820dbd66932d1ed0")},
- {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "f70d8b5358bc2f76696f14b7a807ede0")},
- {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "4c0f63e06830681560a1e9f9aad9fe98")},
- {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "8f62aa0e75770204c98d8299793cc53c")},
- {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "03c29a0c1d21f72b12daf51cec111599")},
- {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "7080b2cad02ec6e67ebc766b2dccebf8")},
- {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "30e76055c16843b6e33e5b9bd8ced57c")},
- {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "f70d8b5358bc2f76696f14b7a807ede0")},
- {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "5e657fd6a44dcdc7674b6e5a2de5dc83")},
+ {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "2f250fecb930e0dfe0f63fe0fed3960b")},
+ {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "26c8d7226139a040557b1d3b1c8792f0")},
+ {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "9b43a1839cb6ea03aec1d96f15ca8efb")},
+ {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "3159a9d136c45e4a65d46a23dc8fd3b5")},
+ {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "bb7262829effbbdbc8d88dd36f480368")},
+ {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "fbb002fa2b9197c4b555852dccc11562")},
+ {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "7392acb71131a60a527ca32715fc59be")},
+ {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "49d4383896a90795d94138db1410a7df")},
+ {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "427448eff98cf194cc7217c0b1401e79")},
+ {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "50cd1a10b6ecb3d09f90f1e4a66da95d")},
+ {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "1dc71561c9d0fb56f9876cb5043c5376")},
+ {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "13e8f032e76340b114847c90af0a1f8a")},
+ {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "03f58ae4f9d203034e895a3636fc108f")},
+ {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "49d4383896a90795d94138db1410a7df")},
+ {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "2db2ef8c2d63e167663d70340182f49a")},
};
}
@@ -100,6 +101,7 @@ public class BQSRIntegrationTest extends WalkerTest {
" -knownSites " + b36dbSNP129 +
" -L 1:10,000,000-10,200,000" +
" -o /dev/null" +
+ " -sortAllCols" +
" --plot_pdf_file /dev/null" +
" --intermediate_csv_file %s",
Arrays.asList("d1c38a3418979400630e2bca1140689c"));
@@ -114,7 +116,8 @@ public class BQSRIntegrationTest extends WalkerTest {
" -I " + privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam" +
" -L 1:50,000-80,000" +
" --allow_potentially_misencoded_quality_scores" + // TODO -- remove me when we get new SOLiD bams
- " -o %s",
+ " -o %s" +
+ " -sortAllCols",
1, // just one output file
UserException.class);
executeTest("testBQSRFailWithSolidNoCall", spec);
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
index 1e539dc9d..7e662d3b2 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
@@ -17,6 +17,8 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
final String COREDUCTION_BAM_A = validationDataLocation + "coreduction.test.A.bam";
final String COREDUCTION_BAM_B = validationDataLocation + "coreduction.test.B.bam";
final String COREDUCTION_L = " -L 1:1,853,860-1,854,354 -L 1:1,884,131-1,892,057";
+ final String OFFCONTIG_BAM = privateTestDir + "readOffb37contigMT.bam";
+ final String INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM = privateTestDir + "rr-too-many-insertions.bam";
private void RRTest(String testName, String args, String md5) {
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, BAM) + " -o %s ";
@@ -29,6 +31,12 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
RRTest("testDefaultCompression ", L, "98080d3c53f441564796fc143cf510da");
}
+ @Test(enabled = true)
+ public void testInsertionsAtEdgeOfConsensus() {
+ String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s ";
+ executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("2a6e08a0206bd8ec7671224c4a55dae0")));
+ }
+
@Test(enabled = true)
public void testMultipleIntervals() {
String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
@@ -86,5 +94,15 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("5c30fde961a1357bf72c15144c01981b")));
}
+ /**
+ * Bug happens when reads are soft-clipped off the contig (usually in the MT). This test guarantees no changes to the upstream code will
+ * break the current hard-clipping routine that protects reduce reads from such reads.
+ */
+ @Test(enabled = true)
+ public void testReadOffContig() {
+ String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s ";
+ executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("2f17c1a78e9d0138217fdb83cede8f68")));
+ }
+
}
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java
index f95ba66b2..4d4dbbdb5 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java
@@ -28,11 +28,11 @@ import net.sf.samtools.SAMUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.walkers.Walker;
-import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
-import org.broadinstitute.sting.utils.variantcontext.*;
+import org.broadinstitute.variant.variantcontext.*;
import org.testng.Assert;
import org.testng.annotations.Test;
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java
index f26194e00..cdd31a5ef 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java
@@ -70,12 +70,12 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
- PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","481452ad7d6378cffb5cd834cc621d55");
+ PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","6987b89e04dcb604d3743bb09aa9587d");
}
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
- PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","812957e51277aca9925c1a7bb4d9a118");
+ PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","d0780f70365ed1b431099fd3b4cec449");
}
@Test(enabled = true)
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index c768f95ad..a8ba92634 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
- Arrays.asList("97df6c2a8d390d43b9bdf56c979d9b09"));
+ Arrays.asList("b41b95aaa2c453c9b75b3b29a9c2718e"));
executeTest("test Multiple SNP alleles", spec);
}
@@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
- private final static String COMPRESSED_OUTPUT_MD5 = "3eba6c309514d1e9ee06a20a112b68e6";
+ private final static String COMPRESSED_OUTPUT_MD5 = "af8187e2baf516dde1cddea787a52b8a";
@Test
public void testCompressedOutput() {
@@ -289,7 +289,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
- Arrays.asList("50329e15e5139be9e3b643f0b3ba8a53"));
+ Arrays.asList("f6f8fbf733f20fbc1dd9ebaf8faefe6c"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@@ -304,7 +304,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
- Arrays.asList("2b85e3bd6bf981afaf7324666740d74b"));
+ Arrays.asList("4438ad0f03bbdd182d9bb59b15af0fa5"));
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
}
@@ -317,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
- Arrays.asList("a6fd46eff78827060451a62cffd698a7"));
+ Arrays.asList("27b4ace2ad5a83d8cccb040f97f29183"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@@ -345,13 +345,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSampleIndels1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
- Arrays.asList("69df7a00f800204564ca3726e1871132"));
+ Arrays.asList("d3d518448b01bf0f751824b3d946cd04"));
List result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
- Arrays.asList("1256a7eceff2c2374c231ff981df486d"));
+ Arrays.asList("2ea18a3e8480718a80a415d3fea79f54"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
}
@@ -462,7 +462,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testReducedBamINDELs() {
- testReducedCalling("INDEL", "3c02ee5187933bed44dc416a2e28511f");
+ testReducedCalling("INDEL", "9d5418ddf1b227ae4d463995507f2b1c");
}
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java
index 556b7451f..1b0ffbd26 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java
@@ -4,8 +4,8 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.Pair;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.variant.variantcontext.Allele;
+import org.broadinstitute.variant.variantcontext.VariantContext;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java
index ee5436264..4753ad07a 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java
@@ -4,7 +4,7 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Utils;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.variant.variantcontext.Allele;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java
index 7ee909fe0..6b53d6188 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java
@@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Utils;
-import org.broadinstitute.sting.utils.variantcontext.*;
+import org.broadinstitute.variant.variantcontext.*;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.DataProvider;
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyAFCalculationModelUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyAFCalculationModelUnitTest.java
index 3df2f7883..0f2dc84dc 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyAFCalculationModelUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyAFCalculationModelUnitTest.java
@@ -2,10 +2,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.Genotype;
-import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
-import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
+import org.broadinstitute.variant.variantcontext.Allele;
+import org.broadinstitute.variant.variantcontext.Genotype;
+import org.broadinstitute.variant.variantcontext.GenotypeBuilder;
+import org.broadinstitute.variant.variantcontext.GenotypesContext;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.DataProvider;
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java
index 663471106..1293c274b 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java
@@ -3,10 +3,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.Genotype;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
+import org.broadinstitute.variant.variantcontext.Allele;
+import org.broadinstitute.variant.variantcontext.Genotype;
+import org.broadinstitute.variant.variantcontext.VariantContext;
+import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java
index 07e7b0d92..f44624a10 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java
@@ -10,9 +10,9 @@ import net.sf.picard.reference.ReferenceSequenceFile;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
+import org.broadinstitute.variant.variantcontext.Allele;
+import org.broadinstitute.variant.variantcontext.VariantContext;
+import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
index a80137c27..8422d856e 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@@ -21,30 +21,30 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
- HCTest(CEUTRIO_BAM, "", "d602d40852ad6d2d094be07e60cf95bd");
+ HCTest(CEUTRIO_BAM, "", "839de31b41d4186e2b12a5601525e894");
}
@Test
public void testHaplotypeCallerSingleSample() {
- HCTest(NA12878_BAM, "", "70ad9d53dda4d302b879ca2b7dd5b368");
+ HCTest(NA12878_BAM, "", "2b68faa0e0493d92491d74b8f731963a");
}
// TODO -- add more tests for GGA mode, especially with input alleles that are complex variants and/or not trimmed
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
- "fe84caa79f59ecbd98fcbcd5b30ab164");
+ "a2d56179cd19a41f8bfb995e225320bb");
}
private void HCTestComplexVariants(String bam, String args, String md5) {
- final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 2";
+ final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 4";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testHaplotypeCallerComplexVariants: args=" + args, spec);
}
@Test
public void testHaplotypeCallerMultiSampleComplex() {
- HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "883871f8bb4099f69fd804f8a6181954");
+ HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "fd8d2ae8db9d98e932b0a7f345631eec");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
@@ -55,7 +55,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleSymbolic() {
- HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "338ab3b7dc3d54df8af94c0811028a75");
+ HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "0761ff5cbf279be467833fa6708bf360");
}
private void HCTestIndelQualityScores(String bam, String args, String md5) {
@@ -66,20 +66,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
- HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "aff11b014ca42bfa301bcced5f5e54dd");
+ HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "6380e25c1ec79c6ae2f891ced15bf4e1");
}
@Test
public void HCTestProblematicReadsModifiedInActiveRegions() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
- final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("2f4ed6dc969bee041215944a9b24328f"));
+ final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("3a096d6139d15dcab82f5b091d08489d"));
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
}
@Test
public void HCTestStructuralIndels() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
- final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("d8d6f2ebe79bca81c8a0911daa153b89"));
+ final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("a518c7436544f2b5f71c9d9427ce1cce"));
executeTest("HCTestStructuralIndels: ", spec);
}
@@ -93,7 +93,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
- Arrays.asList("d01cb5f77ed5aca1d228cfbce9364c21"));
+ Arrays.asList("8a400b0c46f41447fcc35a907e34f384"));
executeTest("HC calling on a ReducedRead BAM", spec);
}
}
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java
index 5652b118d..e0b8cf466 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java
@@ -10,16 +10,11 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.walkers.genotyper.ArtificialReadPileupTestProvider;
import org.broadinstitute.sting.utils.Haplotype;
-import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.jgrapht.graph.DefaultDirectedGraph;
import org.testng.Assert;
-import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
-import java.io.File;
-import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.*;
@@ -146,44 +141,6 @@ public class SimpleDeBruijnAssemblerUnitTest extends BaseTest {
Assert.assertTrue(graphEquals(graph, expectedGraph));
}
- @Test(enabled=false)
-// not ready yet
- public void testBasicGraphCreation() {
- final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
- final byte refBase = refPileupTestProvider.getReferenceContext().getBase();
- final String altBase = (refBase==(byte)'A'?"C":"A");
- final int matches = 50;
- final int mismatches = 50;
- Map refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, altBase, new int[]{matches, mismatches}, false, 30);
- PrintStream graphWriter = null;
-
- try{
- graphWriter = new PrintStream("du.txt");
- } catch (Exception e) {}
-
-
- SimpleDeBruijnAssembler assembler = new SimpleDeBruijnAssembler(true,graphWriter);
- final Haplotype refHaplotype = new Haplotype(refPileupTestProvider.getReferenceContext().getBases());
- refHaplotype.setIsReference(true);
- assembler.createDeBruijnGraphs(refContext.get(refPileupTestProvider.getSampleNames().get(0)).getBasePileup().getReads(), refHaplotype);
-
-/* // clean up the graphs by pruning and merging
- for( final DefaultDirectedGraph graph : graphs ) {
- SimpleDeBruijnAssembler.pruneGraph( graph, PRUNE_FACTOR );
- //eliminateNonRefPaths( graph );
- SimpleDeBruijnAssembler.mergeNodes( graph );
- }
- */
- if( graphWriter != null ) {
- assembler.printGraphs();
- }
-
- int k=2;
-
- // find the best paths in the graphs
- // return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() );
-
- }
@Test(enabled = true)
public void testEliminateNonRefPaths() {
DefaultDirectedGraph graph = new DefaultDirectedGraph(DeBruijnEdge.class);
diff --git a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java
index 6281054b1..4684d6d8a 100644
--- a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java
@@ -29,7 +29,7 @@ package org.broadinstitute.sting.utils.pairhmm;
// the imports for unit testing.
import org.broadinstitute.sting.BaseTest;
-import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
diff --git a/public/java/src/org/broadinstitute/sting/alignment/Alignment.java b/public/java/src/org/broadinstitute/sting/alignment/Alignment.java
index c63f5615f..db21411b3 100644
--- a/public/java/src/org/broadinstitute/sting/alignment/Alignment.java
+++ b/public/java/src/org/broadinstitute/sting/alignment/Alignment.java
@@ -1,7 +1,7 @@
package org.broadinstitute.sting.alignment;
import net.sf.samtools.*;
-import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java
index b903b9f7d..ef50cf088 100644
--- a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java
+++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java
@@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
-import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java
index 2d568a96a..67305ce78 100644
--- a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java
+++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java
@@ -4,7 +4,7 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.*;
import org.broadinstitute.sting.alignment.Aligner;
import org.broadinstitute.sting.alignment.Alignment;
-import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File;
diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java
index fbeac9192..f29e639d6 100644
--- a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java
+++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java
@@ -6,7 +6,7 @@ import org.broadinstitute.sting.alignment.Alignment;
import org.broadinstitute.sting.alignment.bwa.BWAAligner;
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
import org.broadinstitute.sting.alignment.reference.bwt.*;
-import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import java.io.File;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java
index e0c2ce72a..5a1c2a1b4 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java
@@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.arguments;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.RodBinding;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.variant.variantcontext.VariantContext;
/**
* @author ebanks
diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java
index 547f375bb..f8f1831f6 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java
@@ -4,7 +4,7 @@ import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.variant.variantcontext.VariantContext;
import java.io.File;
import java.io.PrintStream;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java
index 4c0257e6a..72b01df01 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java
@@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.arguments;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.RodBinding;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.variant.variantcontext.VariantContext;
/**
* @author ebanks
diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java
index 34627b973..88f4166ef 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java
@@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.contexts;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
-import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
index 73301c511..8f5f420fd 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
@@ -23,17 +23,17 @@
*/
package org.broadinstitute.sting.gatk.datasources.reads;
+import org.broad.tribble.util.SeekableBufferedStream;
+import org.broad.tribble.util.SeekableFileStream;
+
import net.sf.samtools.*;
-import org.broadinstitute.sting.gatk.CommandLineGATK;
+
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
+import java.io.*;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
-import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -68,6 +68,9 @@ public class GATKBAMIndex {
private final File mFile;
+ //TODO: figure out a good value for this buffer size
+ private final int BUFFERED_STREAM_BUFFER_SIZE=8192;
+
/**
* Number of sequences stored in this index.
*/
@@ -78,8 +81,8 @@ public class GATKBAMIndex {
*/
private final long[] sequenceStartCache;
- private FileInputStream fileStream;
- private FileChannel fileChannel;
+ private SeekableFileStream fileStream;
+ private SeekableBufferedStream bufferedStream;
public GATKBAMIndex(final File file) {
mFile = file;
@@ -277,7 +280,6 @@ public class GATKBAMIndex {
for (int i = sequenceIndex; i < referenceSequence; i++) {
sequenceStartCache[i] = position();
-
// System.out.println("# Sequence TID: " + i);
final int nBins = readInteger();
// System.out.println("# nBins: " + nBins);
@@ -290,15 +292,18 @@ public class GATKBAMIndex {
final int nLinearBins = readInteger();
// System.out.println("# nLinearBins: " + nLinearBins);
skipBytes(8 * nLinearBins);
+
}
sequenceStartCache[referenceSequence] = position();
}
+
+
private void openIndexFile() {
try {
- fileStream = new FileInputStream(mFile);
- fileChannel = fileStream.getChannel();
+ fileStream = new SeekableFileStream(mFile);
+ bufferedStream = new SeekableBufferedStream(fileStream,BUFFERED_STREAM_BUFFER_SIZE);
}
catch (IOException exc) {
throw new ReviewedStingException("Unable to open index file (" + exc.getMessage() +")" + mFile, exc);
@@ -307,7 +312,7 @@ public class GATKBAMIndex {
private void closeIndexFile() {
try {
- fileChannel.close();
+ bufferedStream.close();
fileStream.close();
}
catch (IOException exc) {
@@ -352,7 +357,12 @@ public class GATKBAMIndex {
private void read(final ByteBuffer buffer) {
try {
int bytesExpected = buffer.limit();
- int bytesRead = fileChannel.read(buffer);
+ //BufferedInputStream cannot read directly into a byte buffer, so we read into an array
+ //and put the result into the bytebuffer after the if statement.
+
+ //SeekableBufferedStream is evil, it will "read" beyond the end of the file if you let it!
+ final int bytesToRead = (int) Math.min(bufferedStream.length() - bufferedStream.position(), bytesExpected); //min of int and long will definitely be castable to an int.
+ int bytesRead = bufferedStream.read(byteArray,0,bytesToRead);
// We have a rigid expectation here to read in exactly the number of bytes we've limited
// our buffer to -- if we read in fewer bytes than this, or encounter EOF (-1), the index
@@ -363,6 +373,7 @@ public class GATKBAMIndex {
"Please try re-indexing the corresponding BAM file.",
mFile));
}
+ buffer.put(byteArray,0,bytesRead);
}
catch(IOException ex) {
throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile);
@@ -376,10 +387,13 @@ public class GATKBAMIndex {
*/
private ByteBuffer buffer = null;
+ //BufferedStream don't read into ByteBuffers, so we need this temporary array
+ private byte[] byteArray=null;
private ByteBuffer getBuffer(final int size) {
if(buffer == null || buffer.capacity() < size) {
// Allocate a new byte buffer. For now, make it indirect to make sure it winds up on the heap for easier debugging.
buffer = ByteBuffer.allocate(size);
+ byteArray = new byte[size];
buffer.order(ByteOrder.LITTLE_ENDIAN);
}
buffer.clear();
@@ -389,7 +403,13 @@ public class GATKBAMIndex {
private void skipBytes(final int count) {
try {
- fileChannel.position(fileChannel.position() + count);
+
+ //try to skip forward the requested amount.
+ long skipped = bufferedStream.skip(count);
+
+ if( skipped != count ) { //if not managed to skip the requested amount
+ throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile);
+ }
}
catch(IOException ex) {
throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile);
@@ -398,7 +418,8 @@ public class GATKBAMIndex {
private void seek(final long position) {
try {
- fileChannel.position(position);
+ //to seek a new position, move the fileChannel, and reposition the bufferedStream
+ bufferedStream.seek(position);
}
catch(IOException ex) {
throw new ReviewedStingException("Index: unable to reposition of file channel of index file " + mFile);
@@ -411,7 +432,7 @@ public class GATKBAMIndex {
*/
private long position() {
try {
- return fileChannel.position();
+ return bufferedStream.position();
}
catch (IOException exc) {
throw new ReviewedStingException("Unable to read position from index file " + mFile, exc);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
index e99814278..5c932fdce 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@@ -44,6 +44,7 @@ import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.utils.baq.ReadTransformingIterator;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
import java.io.File;
@@ -252,9 +253,10 @@ public class SAMDataSource {
if(readBufferSize != null)
ReadShard.setReadBufferSize(readBufferSize); // TODO: use of non-final static variable here is just awful, especially for parallel tests
else {
- // Choose a sensible default for the read buffer size. For the moment, we're picking 1000 reads per BAM per shard (which effectively
- // will mean per-thread once ReadWalkers are parallelized) with a max cap of 250K reads in memory at once.
- ReadShard.setReadBufferSize(Math.min(10000*samFiles.size(),250000));
+ // Choose a sensible default for the read buffer size.
+ // Previously we we're picked 100000 reads per BAM per shard with a max cap of 250K reads in memory at once.
+ // Now we are simply setting it to 100K reads
+ ReadShard.setReadBufferSize(100000);
}
resourcePool = new SAMResourcePool(Integer.MAX_VALUE);
@@ -894,9 +896,11 @@ public class SAMDataSource {
long lastTick = timer.currentTime();
for(final SAMReaderID readerID: readerIDs) {
final ReaderInitializer init = new ReaderInitializer(readerID).call();
+
if (removeProgramRecords) {
init.reader.getFileHeader().setProgramRecords(new ArrayList());
}
+
if (threadAllocation.getNumIOThreads() > 0) {
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
}
@@ -916,6 +920,13 @@ public class SAMDataSource {
for(SAMFileReader reader: readers.values())
headers.add(reader.getFileHeader());
headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,headers,true);
+
+ // update all read groups to GATKSAMRecordReadGroups
+ final List gatkReadGroups = new LinkedList();
+ for ( final SAMReadGroupRecord rg : headerMerger.getMergedHeader().getReadGroups() ) {
+ gatkReadGroups.add(new GATKSAMReadGroupRecord(rg));
+ }
+ headerMerger.getMergedHeader().setReadGroups(gatkReadGroups);
}
final private void printReaderPerformance(final int nExecutedTotal,
diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIterator.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIterator.java
index 5275c471e..b4161b06e 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIterator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIterator.java
@@ -104,6 +104,56 @@ public class PerSampleDownsamplingReadsIterator implements StingSAMIterator {
readComparator.compare(orderedDownsampledReadsCache.peek(), earliestPendingRead) <= 0;
}
+ private boolean fillDownsampledReadsCache() {
+ SAMRecord prevRead = null;
+ int numPositionalChanges = 0;
+
+ // Continue submitting reads to the per-sample downsamplers until the read at the top of the priority queue
+ // can be released without violating global sort order
+ while ( nestedSAMIterator.hasNext() && ! readyToReleaseReads() ) {
+ SAMRecord read = nestedSAMIterator.next();
+ String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
+
+ ReadsDownsampler thisSampleDownsampler = perSampleDownsamplers.get(sampleName);
+ if ( thisSampleDownsampler == null ) {
+ thisSampleDownsampler = downsamplerFactory.newInstance();
+ perSampleDownsamplers.put(sampleName, thisSampleDownsampler);
+ }
+
+ thisSampleDownsampler.submit(read);
+ processFinalizedAndPendingItems(thisSampleDownsampler);
+
+ if ( prevRead != null && prevRead.getAlignmentStart() != read.getAlignmentStart() ) {
+ numPositionalChanges++;
+ }
+
+ // Periodically inform all downsamplers of the current position in the read stream. This is
+ // to prevent downsamplers for samples with sparser reads than others from getting stuck too
+ // long in a pending state.
+ if ( numPositionalChanges > 0 && numPositionalChanges % DOWNSAMPLER_POSITIONAL_UPDATE_INTERVAL == 0 ) {
+ for ( ReadsDownsampler perSampleDownsampler : perSampleDownsamplers.values() ) {
+ perSampleDownsampler.signalNoMoreReadsBefore(read);
+ processFinalizedAndPendingItems(perSampleDownsampler);
+ }
+ }
+
+ prevRead = read;
+ }
+
+ if ( ! nestedSAMIterator.hasNext() ) {
+ for ( ReadsDownsampler perSampleDownsampler : perSampleDownsamplers.values() ) {
+ perSampleDownsampler.signalEndOfInput();
+ if ( perSampleDownsampler.hasFinalizedItems() ) {
+ orderedDownsampledReadsCache.addAll(perSampleDownsampler.consumeFinalizedItems());
+ }
+ }
+ earliestPendingRead = null;
+ earliestPendingDownsampler = null;
+ }
+
+ return readyToReleaseReads();
+ }
+
private void updateEarliestPendingRead( ReadsDownsampler currentDownsampler ) {
// If there is no recorded earliest pending read and this downsampler has pending items,
// then this downsampler's first pending item becomes the new earliest pending read:
@@ -135,57 +185,11 @@ public class PerSampleDownsamplingReadsIterator implements StingSAMIterator {
}
}
- private boolean fillDownsampledReadsCache() {
- SAMRecord prevRead = null;
- int numPositionalChanges = 0;
-
- // Continue submitting reads to the per-sample downsamplers until the read at the top of the priority queue
- // can be released without violating global sort order
- while ( nestedSAMIterator.hasNext() && ! readyToReleaseReads() ) {
- SAMRecord read = nestedSAMIterator.next();
- String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
-
- ReadsDownsampler thisSampleDownsampler = perSampleDownsamplers.get(sampleName);
- if ( thisSampleDownsampler == null ) {
- thisSampleDownsampler = downsamplerFactory.newInstance();
- perSampleDownsamplers.put(sampleName, thisSampleDownsampler);
- }
-
- thisSampleDownsampler.submit(read);
- updateEarliestPendingRead(thisSampleDownsampler);
-
- if ( prevRead != null && prevRead.getAlignmentStart() != read.getAlignmentStart() ) {
- numPositionalChanges++;
- }
-
- // Periodically inform all downsamplers of the current position in the read stream. This is
- // to prevent downsamplers for samples with sparser reads than others from getting stuck too
- // long in a pending state.
- if ( numPositionalChanges > 0 && numPositionalChanges % DOWNSAMPLER_POSITIONAL_UPDATE_INTERVAL == 0 ) {
- for ( ReadsDownsampler perSampleDownsampler : perSampleDownsamplers.values() ) {
- perSampleDownsampler.signalNoMoreReadsBefore(read);
- updateEarliestPendingRead(perSampleDownsampler);
- }
- }
-
- prevRead = read;
+ private void processFinalizedAndPendingItems( ReadsDownsampler currentDownsampler ) {
+ if ( currentDownsampler.hasFinalizedItems() ) {
+ orderedDownsampledReadsCache.addAll(currentDownsampler.consumeFinalizedItems());
}
-
- if ( ! nestedSAMIterator.hasNext() ) {
- for ( ReadsDownsampler perSampleDownsampler : perSampleDownsamplers.values() ) {
- perSampleDownsampler.signalEndOfInput();
- }
- earliestPendingRead = null;
- earliestPendingDownsampler = null;
- }
-
- for ( ReadsDownsampler perSampleDownsampler : perSampleDownsamplers.values() ) {
- if ( perSampleDownsampler.hasFinalizedItems() ) {
- orderedDownsampledReadsCache.addAll(perSampleDownsampler.consumeFinalizedItems());
- }
- }
-
- return readyToReleaseReads();
+ updateEarliestPendingRead(currentDownsampler);
}
public void remove() {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java
index 8ad91ac1c..a1f2a877b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java
@@ -26,6 +26,7 @@
package org.broadinstitute.sting.gatk.filters;
import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
/**
@@ -37,6 +38,6 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
public class Platform454Filter extends ReadFilter {
public boolean filterOut(SAMRecord rec) {
- return (ReadUtils.is454Read(rec));
+ return (ReadUtils.is454Read((GATKSAMRecord)rec));
}
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java
index 8e241bb2c..de5be94bc 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java
@@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.filters;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
/**
@@ -41,7 +42,7 @@ public class PlatformFilter extends ReadFilter {
public boolean filterOut(SAMRecord rec) {
for ( String name : PLFilterNames )
- if ( ReadUtils.isPlatformRead(rec, name.toUpperCase() ))
+ if ( ReadUtils.isPlatformRead((GATKSAMRecord)rec, name.toUpperCase() ))
return true;
return false;
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java
index 8e4633869..2a7c5c7b2 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java
@@ -30,14 +30,14 @@ import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.FeatureCodec;
import org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
-import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
+import org.broadinstitute.variant.bcf2.BCF2Utils;
+import org.broadinstitute.variant.vcf.VCFHeader;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import org.broadinstitute.sting.utils.variantcontext.writer.Options;
-import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
-import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
+import org.broadinstitute.variant.variantcontext.VariantContext;
+import org.broadinstitute.variant.variantcontext.writer.Options;
+import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
+import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory;
import java.io.*;
import java.util.Arrays;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java
index f521c959d..8a989b040 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java
@@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.io.stubs;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
-import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
+import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java
index f92d78bb5..c512ba835 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java
@@ -29,13 +29,13 @@ import org.broadinstitute.sting.gatk.CommandLineExecutable;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import org.broadinstitute.sting.utils.variantcontext.writer.Options;
-import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
-import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
+import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
+import org.broadinstitute.variant.vcf.VCFHeader;
+import org.broadinstitute.variant.vcf.VCFHeaderLine;
+import org.broadinstitute.variant.variantcontext.VariantContext;
+import org.broadinstitute.variant.variantcontext.writer.Options;
+import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
+import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory;
import java.io.File;
import java.io.OutputStream;
@@ -232,7 +232,7 @@ public class VariantContextWriterStub implements Stub, Var
}
if ( UPDATE_CONTIG_HEADERS )
- vcfHeader = VCFUtils.withUpdatedContigs(vcfHeader, engine);
+ vcfHeader = GATKVCFUtils.withUpdatedContigs(vcfHeader, engine);
}
outputTracker.getStorage(this).writeHeader(vcfHeader);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
index 51fed470f..2bc14aa69 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
@@ -131,7 +131,7 @@ public class GATKRunReport {
private String hostName;
@Element(required = true, name = "java")
- private String java;
+ private String javaVersion;
@Element(required = true, name = "machine")
private String machine;
@@ -212,7 +212,7 @@ public class GATKRunReport {
hostName = Utils.resolveHostname();
// basic java information
- java = Utils.join("-", Arrays.asList(System.getProperty("java.vendor"), System.getProperty("java.version")));
+ javaVersion = Utils.join("-", Arrays.asList(System.getProperty("java.vendor"), System.getProperty("java.version")));
machine = Utils.join("-", Arrays.asList(System.getProperty("os.name"), System.getProperty("os.arch")));
// if there was an exception, capture it
diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java
index 5c7da82d0..8713e9797 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java
@@ -9,7 +9,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature;
-import org.broadinstitute.sting.utils.variantcontext.*;
+import org.broadinstitute.variant.variantcontext.*;
import java.util.*;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java
index a2fe94641..0e0e8017d 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java
@@ -33,9 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.classloader.PluginManager;
-import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
+import org.broadinstitute.variant.vcf.AbstractVCFCodec;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.GATKDocUtils;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
index 81fe73075..2bb6cbeee 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
@@ -39,7 +39,6 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
index 605a6680f..e69924930 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
@@ -117,7 +117,7 @@ public class GATKReport {
* @param numColumns the number of columns in this table
*/
public void addTable(final String tableName, final String tableDescription, final int numColumns) {
- addTable(tableName, tableDescription, numColumns, false);
+ addTable(tableName, tableDescription, numColumns, false, false);
}
/**
@@ -127,9 +127,10 @@ public class GATKReport {
* @param tableDescription the description of the table
* @param numColumns the number of columns in this table
* @param sortByRowID whether to sort the rows by the row ID
+ * @param sortByAllColumns whether to sort the rows by all columns starting from leftmost column
*/
- public void addTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID) {
- GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortByRowID);
+ public void addTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) {
+ GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortByRowID, sortByAllColumns);
tables.put(tableName, table);
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
index 3b4bdd087..2bf7c9609 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
@@ -47,6 +47,7 @@ public class GATKReportTable {
private final String tableDescription;
private final boolean sortByRowID;
+ private final boolean sortByAllColumns;
private List