diff --git a/build.xml b/build.xml index 834aef3cd..47e4eeb47 100644 --- a/build.xml +++ b/build.xml @@ -107,6 +107,12 @@ + + + + + + @@ -267,19 +273,19 @@ - - + + + + + + - - + + + + + + @@ -596,6 +602,7 @@ + - - - - - - + + @@ -659,7 +662,7 @@ - + @@ -667,14 +670,10 @@ - - + + - - - - - + @@ -685,7 +684,7 @@ - + @@ -703,12 +702,12 @@ - - + + - - + + @@ -719,7 +718,7 @@ - + @@ -755,7 +754,7 @@ - + @@ -1098,7 +1097,6 @@ - @@ -1129,6 +1127,7 @@ + @@ -1136,6 +1135,21 @@ + + + + + + + + + + + + + + + @@ -1215,6 +1229,7 @@ + @@ -1228,10 +1243,11 @@ listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.TestNGTestTransformer,org.broadinstitute.sting.StingTextReporter,org.uncommons.reportng.HTMLReporter"> + - + @@ -1270,6 +1286,7 @@ + @@ -1382,6 +1399,7 @@ + diff --git a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java index 94f6ff649..53f206bfe 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java @@ -30,7 +30,8 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.variant.variantcontext.Allele; import java.io.PrintStream; import java.util.*; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java index d0bcd0eb3..255f1fd05 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java @@ -25,51 +25,46 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; * OTHER DEALINGS IN THE SOFTWARE. */ -import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; -import org.broadinstitute.sting.utils.BaseUtils; +import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource; -import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.recalibration.ReadCovariates; -import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; +import org.broadinstitute.sting.utils.recalibration.RecalDatum; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.threading.ThreadLocalArray; + +import java.util.LinkedList; +import java.util.List; public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource { + private final static Logger logger = Logger.getLogger(AdvancedRecalibrationEngine.class); - // optimization: only allocate temp arrays once per thread - private final ThreadLocal threadLocalTempQualArray = new ThreadLocalArray(EventType.values().length, byte.class); - private final ThreadLocal threadLocalTempFractionalErrorArray = new ThreadLocalArray(EventType.values().length, double.class); - - public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) { - super.initialize(covariates, recalibrationTables); - } + final List> allThreadLocalQualityScoreTables = new LinkedList>(); + private ThreadLocal> threadLocalQualityScoreTables = new ThreadLocal>() { + @Override + protected synchronized NestedIntegerArray initialValue() { + final NestedIntegerArray table = recalibrationTables.makeQualityScoreTable(); + allThreadLocalQualityScoreTables.add(table); + return table; + } + }; @Override - public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) { + public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) { + final GATKSAMRecord read = recalInfo.getRead(); + final ReadCovariates readCovariates = recalInfo.getCovariatesValues(); + final NestedIntegerArray qualityScoreTable = getThreadLocalQualityScoreTable(); + for( int offset = 0; offset < read.getReadBases().length; offset++ ) { - if( !skip[offset] ) { - final ReadCovariates readCovariates = covariateKeySetFrom(read); - - byte[] tempQualArray = threadLocalTempQualArray.get(); - double[] tempFractionalErrorArray = threadLocalTempFractionalErrorArray.get(); - - tempQualArray[EventType.BASE_SUBSTITUTION.index] = read.getBaseQualities()[offset]; - tempFractionalErrorArray[EventType.BASE_SUBSTITUTION.index] = snpErrors[offset]; - tempQualArray[EventType.BASE_INSERTION.index] = read.getBaseInsertionQualities()[offset]; - tempFractionalErrorArray[EventType.BASE_INSERTION.index] = insertionErrors[offset]; - tempQualArray[EventType.BASE_DELETION.index] = read.getBaseDeletionQualities()[offset]; - tempFractionalErrorArray[EventType.BASE_DELETION.index] = deletionErrors[offset]; + if( ! recalInfo.skip(offset) ) { for (final EventType eventType : EventType.values()) { final int[] keys = readCovariates.getKeySet(offset, eventType); final int eventIndex = eventType.index; - final byte qual = tempQualArray[eventIndex]; - final double isError = tempFractionalErrorArray[eventIndex]; + final byte qual = recalInfo.getQual(eventType, offset); + final double isError = recalInfo.getErrorFraction(eventType, offset); - combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex); - - incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex); + incrementDatumOrPutIfNecessary(qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex); for (int i = 2; i < covariates.length; i++) { if (keys[i] < 0) @@ -81,4 +76,24 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp } } } + + /** + * Get a NestedIntegerArray for a QualityScore table specific to this thread + * @return a non-null NestedIntegerArray ready to be used to collect calibration info for the quality score covariate + */ + private NestedIntegerArray getThreadLocalQualityScoreTable() { + return threadLocalQualityScoreTables.get(); + } + + @Override + public void finalizeData() { + // merge in all of the thread local tables + logger.info("Merging " + allThreadLocalQualityScoreTables.size() + " thread-local quality score tables"); + for ( final NestedIntegerArray localTable : allThreadLocalQualityScoreTables ) { + recalibrationTables.combineQualityScoreTable(localTable); + } + allThreadLocalQualityScoreTables.clear(); // cleanup after ourselves + + super.finalizeData(); + } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java index 3097c2ee9..bebc27221 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java @@ -39,16 +39,27 @@ public class HeaderElement { * * @param location the reference location for the new element */ - public HeaderElement(int location) { + public HeaderElement(final int location) { this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, 0, location, new LinkedList()); } + /** + * Creates a new HeaderElement with the following default values: - empty consensusBaseCounts - empty + * filteredBaseCounts - empty mappingQuality list + * + * @param location the reference location for the new element + */ + public HeaderElement(final int location, final int insertionsToTheRight) { + this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, 0, location, new LinkedList()); + } + /** * Creates a new HeaderElement with all given parameters * * @param consensusBaseCounts the BaseCounts object for the running consensus synthetic read * @param filteredBaseCounts the BaseCounts object for the filtered data synthetic read * @param insertionsToTheRight number of insertions to the right of this HeaderElement + * @param nSoftClippedBases number of softclipped bases of this HeaderElement * @param location the reference location of this reference element * @param mappingQuality the list of mapping quality values of all reads that contributed to this * HeaderElement @@ -151,6 +162,14 @@ public class HeaderElement { throw new ReviewedStingException("Removed too many insertions, header is now negative!"); } + public boolean hasInsertionToTheRight() { + return insertionsToTheRight > 0; + } + + public int numInsertionsToTheRight() { + return insertionsToTheRight; + } + /** * Whether or not the HeaderElement is variant due to excess insertions * diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java index 2061c5364..39a284d98 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java @@ -300,7 +300,7 @@ public class ReduceReads extends ReadWalker, ReduceRea // Check if the read goes beyond the boundaries of the chromosome, and hard clip those boundaries. int chromosomeLength = ref.getGenomeLocParser().getContigInfo(read.getReferenceName()).getSequenceLength(); if (read.getSoftStart() < 0) - read = ReadClipper.hardClipByReadCoordinates(read, 0, -read.getSoftStart() - 1); + read = ReadClipper.hardClipByReadCoordinates(read, 0, -read.getSoftStart()); if (read.getSoftEnd() > chromosomeLength) read = ReadClipper.hardClipByReadCoordinates(read, chromosomeLength - read.getSoftStart() + 1, read.getReadLength() - 1); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java index fff1c20a5..9af54b4a8 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java @@ -645,8 +645,15 @@ public class SlidingWindow { } } - for (int i = 0; i <= lastStop; i++) // clean up the window header elements up until the end of the variant region. (we keep the last element in case the following element had a read that started with insertion) - windowHeader.remove(); + // clean up the window header elements up until the end of the variant region. + // note that we keep the last element of the region in the event that the following element has a read that starts with insertion. + if ( lastStop >= 0 ) { + for (int i = 0; i < lastStop; i++) + windowHeader.remove(); + final HeaderElement lastOfRegion = windowHeader.remove(); + if ( lastOfRegion.hasInsertionToTheRight() ) + windowHeader.addFirst(new HeaderElement(lastOfRegion.getLocation(), lastOfRegion.numInsertionsToTheRight())); + } } return allReads; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java index fc6d23382..30650e9c0 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java @@ -8,8 +8,8 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java index 303ab94d6..502853868 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java @@ -29,13 +29,13 @@ import net.sf.samtools.SAMUtils; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACcounts; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.GenotypeLikelihoods; import java.util.*; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java index f6ad445c7..ce66ce8d0 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java @@ -31,11 +31,11 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import java.util.*; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java index 4bcaa5ff9..3b12fe475 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java @@ -8,7 +8,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Allele; import java.util.*; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java index eb4cf1839..76a934091 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java @@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import java.util.*; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java index 0f0f85441..461329ad0 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoods.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMUtils; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -11,7 +11,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Allele; import java.util.ArrayList; import java.util.Arrays; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java index 9f2fdc096..3250de2b2 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidySNPGenotypeLikelihoodsCalculationModel.java @@ -31,7 +31,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.variant.variantcontext.*; import java.util.*; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java index 0a3512aa6..d6b3eb768 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java @@ -10,10 +10,10 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.SimpleTimer; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; import java.io.*; import java.util.*; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java index 6f3740ab3..f4f17f1e0 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcTestBuilder.java @@ -4,7 +4,7 @@ import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import java.util.ArrayList; import java.util.Arrays; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java index b248c8759..5f5d80fde 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java @@ -27,9 +27,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import java.util.*; @@ -532,7 +533,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { } // if there is no mass on the (new) likelihoods, then just no-call the sample - if ( MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL ) { + if ( MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) { newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); } else { @@ -544,7 +545,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { gb.PL(newLikelihoods); // if we weren't asked to assign a genotype, then just no-call the sample - if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL ) + if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) gb.alleles(NO_CALL_ALLELES); else assignGenotype(gb, newLikelihoods, allelesToUse, ploidy); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 4d81d0010..5aef002fe 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -36,7 +36,9 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.variant.variantcontext.*; import java.io.PrintStream; import java.util.*; @@ -44,13 +46,15 @@ import java.util.*; public class GenotypingEngine { private final boolean DEBUG; + private final boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS; private final static List noCall = new ArrayList(); // used to noCall all genotypes until the exact model is applied private final static Allele SYMBOLIC_UNASSEMBLED_EVENT_ALLELE = Allele.create("", false); private final VariantAnnotatorEngine annotationEngine; - public GenotypingEngine( final boolean DEBUG, final VariantAnnotatorEngine annotationEngine ) { + public GenotypingEngine( final boolean DEBUG, final VariantAnnotatorEngine annotationEngine, final boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ) { this.DEBUG = DEBUG; this.annotationEngine = annotationEngine; + this.USE_FILTERED_READ_MAP_FOR_ANNOTATIONS = USE_FILTERED_READ_MAP_FOR_ANNOTATIONS; noCall.add(Allele.NO_CALL); } @@ -156,7 +160,7 @@ public class GenotypingEngine { } // Merge the event to find a common reference representation - final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false); + final VariantContext mergedVC = VariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false); if( mergedVC == null ) { continue; } // let's update the Allele keys in the mapper because they can change after merging when there are complex events @@ -192,11 +196,13 @@ public class GenotypingEngine { } final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel); if( call != null ) { - final Map stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap, perSampleFilteredReadList, call ); + final Map alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap : + convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, 0.0, UG_engine.getUAC().contaminationLog ) ); + final Map stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call ); VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, call); if( annotatedCall.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary! - annotatedCall = VariantContextUtils.reverseTrimAlleles(annotatedCall); + annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall); } returnCalls.add( annotatedCall ); @@ -339,12 +345,7 @@ public class GenotypingEngine { } // count up the co-occurrences of the events for the R^2 calculation for( final String sample : samples ) { - final HashSet sampleSet = new HashSet(1); - sampleSet.add(sample); - - final List alleleList = new ArrayList(); - alleleList.add(Allele.create(h.getBases())); - final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( sampleSet, haplotypeReadMap, alleleList )[0][0]; + final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( Collections.singleton(sample), haplotypeReadMap, Collections.singletonList(Allele.create(h.getBases())) )[0][0]; if( thisHapVC == null ) { if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); } else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 35aa86ca2..8c8113f46 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -48,8 +48,8 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState; import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.clipping.ReadClipper; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fragments.FragmentCollection; @@ -61,8 +61,8 @@ import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.variantcontext.*; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import java.io.FileNotFoundException; import java.io.PrintStream; @@ -130,18 +130,26 @@ public class HaplotypeCaller extends ActiveRegionWalker implem protected String keepRG = null; @Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false) - protected int MIN_PRUNE_FACTOR = 1; + protected int MIN_PRUNE_FACTOR = 2; @Advanced @Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false) protected int gcpHMM = 10; + @Advanced + @Argument(fullName="minKmer", shortName="minKmer", doc="Minimum kmer length to use in the assembly graph", required = false) + protected int minKmer = 11; + @Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false) protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000; @Argument(fullName="useAllelesTrigger", shortName="allelesTrigger", doc = "If specified, use additional trigger on variants found in an external alleles file", required=false) protected boolean USE_ALLELES_TRIGGER = false; + @Advanced + @Argument(fullName="useFilteredReadsForAnnotations", shortName="useFilteredReadsForAnnotations", doc = "If specified, use the contamination-filtered read maps for the purposes of annotating variants", required=false) + protected boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS = false; + /** * rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate. * dbSNP is not used in any way for the calculations themselves. @@ -234,7 +242,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem samplesList.addAll( samples ); // initialize the UnifiedGenotyper Engine which is used to call into the exact model final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user - UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); + UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY); // create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested UnifiedArgumentCollection simpleUAC = new UnifiedArgumentCollection(UAC); @@ -244,7 +252,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // low values used for isActive determination only, default/user-specified values used for actual calling simpleUAC.CONTAMINATION_FRACTION = 0.0; simpleUAC.exactCallsLog = null; - UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); + UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY); // initialize the output VCF header final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); @@ -278,9 +286,9 @@ public class HaplotypeCaller extends ActiveRegionWalker implem throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e); } - assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter ); + assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter, minKmer ); likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM ); - genotypingEngine = new GenotypingEngine( DEBUG, annotationEngine ); + genotypingEngine = new GenotypingEngine( DEBUG, annotationEngine, USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ); } //--------------------------------------------------------------------------------------------------------------- diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java index 8a401439b..0e4673497 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java @@ -39,18 +39,18 @@ import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.SWPairwiseAlignment; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.vcf.VCFHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory; import java.util.*; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 018102893..59f241cdb 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -29,16 +29,13 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pairhmm.*; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; -import java.io.PrintStream; import java.util.*; public class LikelihoodCalculationEngine { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java index bf6c82d82..6d5d268a6 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.ArrayList; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java index 3c5a1f79c..0a98f54e9 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java @@ -11,8 +11,8 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; import org.jgrapht.graph.DefaultDirectedGraph; import java.io.PrintStream; @@ -28,7 +28,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers private static final int NUM_BEST_PATHS_PER_KMER_GRAPH = 11; - private static final byte MIN_QUALITY = (byte) 17; + private static final byte MIN_QUALITY = (byte) 16; // Smith-Waterman parameters originally copied from IndelRealigner private static final double SW_MATCH = 5.0; // 1.0; @@ -39,13 +39,15 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { private final boolean DEBUG; private final PrintStream GRAPH_WRITER; private final ArrayList> graphs = new ArrayList>(); + private final int MIN_KMER; - private int PRUNE_FACTOR = 1; + private int PRUNE_FACTOR = 2; - public SimpleDeBruijnAssembler( final boolean debug, final PrintStream graphWriter ) { + public SimpleDeBruijnAssembler( final boolean debug, final PrintStream graphWriter, final int minKmer ) { super(); DEBUG = debug; GRAPH_WRITER = graphWriter; + MIN_KMER = minKmer; } public ArrayList runLocalAssembly( final ActiveRegion activeRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final int PRUNE_FACTOR, final ArrayList activeAllelesToGenotype ) { @@ -72,8 +74,9 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { protected void createDeBruijnGraphs( final List reads, final Haplotype refHaplotype ) { graphs.clear(); + final int maxKmer = refHaplotype.getBases().length; // create the graph - for( int kmer = 31; kmer <= 75; kmer += 6 ) { + for( int kmer = MIN_KMER; kmer <= maxKmer; kmer += 6 ) { final DefaultDirectedGraph graph = new DefaultDirectedGraph(DeBruijnEdge.class); if( createGraphFromSequences( graph, reads, kmer, refHaplotype, DEBUG ) ) { graphs.add(graph); diff --git a/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java b/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java index 77a7c3bd9..4a13fb615 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java +++ b/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java @@ -29,7 +29,7 @@ import org.broadinstitute.sting.gatk.downsampling.AlleleBiasedDownsamplingUtils; import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Allele; import java.io.PrintStream; import java.util.*; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index b15969fba..177a989fb 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -38,7 +38,8 @@ public class BQSRIntegrationTest extends WalkerTest { args + " -knownSites " + (reference.equals(b36KGReference) ? b36dbSNP129 : hg18dbSNP132) + " --allow_potentially_misencoded_quality_scores" + // TODO -- remove me when we get new SOLiD bams - " -o %s"; + " -o %s" + + " -sortAllCols"; } @Override @@ -52,21 +53,21 @@ public class BQSRIntegrationTest extends WalkerTest { String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam"; String HiSeqInterval = "chr1:10,000,000-10,100,000"; return new Object[][]{ - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "4fd3c9ad97e6ac58cba644a76564c9f7")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "2620f734cce20f70ce13afd880e46e5c")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "5eb3b94e767da19a4c037ee132e4b19a")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "ab261d291b107a3da7897759c0e4fa89")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "292303f649fbb19dc05d4a0197a49eeb")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "8ced9d1094493f17fb1876b818a64541")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "abb838131e403d39820dbd66932d1ed0")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "f70d8b5358bc2f76696f14b7a807ede0")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "4c0f63e06830681560a1e9f9aad9fe98")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "8f62aa0e75770204c98d8299793cc53c")}, - {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "03c29a0c1d21f72b12daf51cec111599")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "7080b2cad02ec6e67ebc766b2dccebf8")}, - {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "30e76055c16843b6e33e5b9bd8ced57c")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "f70d8b5358bc2f76696f14b7a807ede0")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "5e657fd6a44dcdc7674b6e5a2de5dc83")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "2f250fecb930e0dfe0f63fe0fed3960b")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "26c8d7226139a040557b1d3b1c8792f0")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "9b43a1839cb6ea03aec1d96f15ca8efb")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "3159a9d136c45e4a65d46a23dc8fd3b5")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "bb7262829effbbdbc8d88dd36f480368")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "fbb002fa2b9197c4b555852dccc11562")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "7392acb71131a60a527ca32715fc59be")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "49d4383896a90795d94138db1410a7df")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "427448eff98cf194cc7217c0b1401e79")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "50cd1a10b6ecb3d09f90f1e4a66da95d")}, + {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "1dc71561c9d0fb56f9876cb5043c5376")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "13e8f032e76340b114847c90af0a1f8a")}, + {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "03f58ae4f9d203034e895a3636fc108f")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "49d4383896a90795d94138db1410a7df")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "2db2ef8c2d63e167663d70340182f49a")}, }; } @@ -100,6 +101,7 @@ public class BQSRIntegrationTest extends WalkerTest { " -knownSites " + b36dbSNP129 + " -L 1:10,000,000-10,200,000" + " -o /dev/null" + + " -sortAllCols" + " --plot_pdf_file /dev/null" + " --intermediate_csv_file %s", Arrays.asList("d1c38a3418979400630e2bca1140689c")); @@ -114,7 +116,8 @@ public class BQSRIntegrationTest extends WalkerTest { " -I " + privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam" + " -L 1:50,000-80,000" + " --allow_potentially_misencoded_quality_scores" + // TODO -- remove me when we get new SOLiD bams - " -o %s", + " -o %s" + + " -sortAllCols", 1, // just one output file UserException.class); executeTest("testBQSRFailWithSolidNoCall", spec); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java index 1e539dc9d..7e662d3b2 100755 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java @@ -17,6 +17,8 @@ public class ReduceReadsIntegrationTest extends WalkerTest { final String COREDUCTION_BAM_A = validationDataLocation + "coreduction.test.A.bam"; final String COREDUCTION_BAM_B = validationDataLocation + "coreduction.test.B.bam"; final String COREDUCTION_L = " -L 1:1,853,860-1,854,354 -L 1:1,884,131-1,892,057"; + final String OFFCONTIG_BAM = privateTestDir + "readOffb37contigMT.bam"; + final String INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM = privateTestDir + "rr-too-many-insertions.bam"; private void RRTest(String testName, String args, String md5) { String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, BAM) + " -o %s "; @@ -29,6 +31,12 @@ public class ReduceReadsIntegrationTest extends WalkerTest { RRTest("testDefaultCompression ", L, "98080d3c53f441564796fc143cf510da"); } + @Test(enabled = true) + public void testInsertionsAtEdgeOfConsensus() { + String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s "; + executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("2a6e08a0206bd8ec7671224c4a55dae0"))); + } + @Test(enabled = true) public void testMultipleIntervals() { String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110"; @@ -86,5 +94,15 @@ public class ReduceReadsIntegrationTest extends WalkerTest { executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("5c30fde961a1357bf72c15144c01981b"))); } + /** + * Bug happens when reads are soft-clipped off the contig (usually in the MT). This test guarantees no changes to the upstream code will + * break the current hard-clipping routine that protects reduce reads from such reads. + */ + @Test(enabled = true) + public void testReadOffContig() { + String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s "; + executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("2f17c1a78e9d0138217fdb83cede8f68"))); + } + } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java index f95ba66b2..4d4dbbdb5 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java @@ -28,11 +28,11 @@ import net.sf.samtools.SAMUtils; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java index f26194e00..cdd31a5ef 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java @@ -70,12 +70,12 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","481452ad7d6378cffb5cd834cc621d55"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","6987b89e04dcb604d3743bb09aa9587d"); } @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","812957e51277aca9925c1a7bb4d9a118"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","d0780f70365ed1b431099fd3b4cec449"); } @Test(enabled = true) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index c768f95ad..a8ba92634 100755 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("97df6c2a8d390d43b9bdf56c979d9b09")); + Arrays.asList("b41b95aaa2c453c9b75b3b29a9c2718e")); executeTest("test Multiple SNP alleles", spec); } @@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "3eba6c309514d1e9ee06a20a112b68e6"; + private final static String COMPRESSED_OUTPUT_MD5 = "af8187e2baf516dde1cddea787a52b8a"; @Test public void testCompressedOutput() { @@ -289,7 +289,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("50329e15e5139be9e3b643f0b3ba8a53")); + Arrays.asList("f6f8fbf733f20fbc1dd9ebaf8faefe6c")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -304,7 +304,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("2b85e3bd6bf981afaf7324666740d74b")); + Arrays.asList("4438ad0f03bbdd182d9bb59b15af0fa5")); executeTest(String.format("test indel caller in SLX with low min allele count"), spec); } @@ -317,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("a6fd46eff78827060451a62cffd698a7")); + Arrays.asList("27b4ace2ad5a83d8cccb040f97f29183")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -345,13 +345,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSampleIndels1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("69df7a00f800204564ca3726e1871132")); + Arrays.asList("d3d518448b01bf0f751824b3d946cd04")); List result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("1256a7eceff2c2374c231ff981df486d")); + Arrays.asList("2ea18a3e8480718a80a415d3fea79f54")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -462,7 +462,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testReducedBamINDELs() { - testReducedCalling("INDEL", "3c02ee5187933bed44dc416a2e28511f"); + testReducedCalling("INDEL", "9d5418ddf1b227ae4d463995507f2b1c"); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java index 556b7451f..1b0ffbd26 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java @@ -4,8 +4,8 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java index ee5436264..4753ad07a 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java @@ -4,7 +4,7 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Allele; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java index 7ee909fe0..6b53d6188 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java @@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.DataProvider; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyAFCalculationModelUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyAFCalculationModelUnitTest.java index 3df2f7883..0f2dc84dc 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyAFCalculationModelUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyAFCalculationModelUnitTest.java @@ -2,10 +2,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder; -import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.GenotypeBuilder; +import org.broadinstitute.variant.variantcontext.GenotypesContext; import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.DataProvider; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java index 663471106..1293c274b 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java @@ -3,10 +3,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java index 07e7b0d92..f44624a10 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java @@ -10,9 +10,9 @@ import net.sf.picard.reference.ReferenceSequenceFile; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index a80137c27..8422d856e 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -21,30 +21,30 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "d602d40852ad6d2d094be07e60cf95bd"); + HCTest(CEUTRIO_BAM, "", "839de31b41d4186e2b12a5601525e894"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "70ad9d53dda4d302b879ca2b7dd5b368"); + HCTest(NA12878_BAM, "", "2b68faa0e0493d92491d74b8f731963a"); } // TODO -- add more tests for GGA mode, especially with input alleles that are complex variants and/or not trimmed @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "fe84caa79f59ecbd98fcbcd5b30ab164"); + "a2d56179cd19a41f8bfb995e225320bb"); } private void HCTestComplexVariants(String bam, String args, String md5) { - final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 2"; + final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 4"; final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); executeTest("testHaplotypeCallerComplexVariants: args=" + args, spec); } @Test public void testHaplotypeCallerMultiSampleComplex() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "883871f8bb4099f69fd804f8a6181954"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "fd8d2ae8db9d98e932b0a7f345631eec"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -55,7 +55,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleSymbolic() { - HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "338ab3b7dc3d54df8af94c0811028a75"); + HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "0761ff5cbf279be467833fa6708bf360"); } private void HCTestIndelQualityScores(String bam, String args, String md5) { @@ -66,20 +66,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "aff11b014ca42bfa301bcced5f5e54dd"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "6380e25c1ec79c6ae2f891ced15bf4e1"); } @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("2f4ed6dc969bee041215944a9b24328f")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("3a096d6139d15dcab82f5b091d08489d")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("d8d6f2ebe79bca81c8a0911daa153b89")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("a518c7436544f2b5f71c9d9427ce1cce")); executeTest("HCTestStructuralIndels: ", spec); } @@ -93,7 +93,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("d01cb5f77ed5aca1d228cfbce9364c21")); + Arrays.asList("8a400b0c46f41447fcc35a907e34f384")); executeTest("HC calling on a ReducedRead BAM", spec); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java index 5652b118d..e0b8cf466 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssemblerUnitTest.java @@ -10,16 +10,11 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.walkers.genotyper.ArtificialReadPileupTestProvider; import org.broadinstitute.sting.utils.Haplotype; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.jgrapht.graph.DefaultDirectedGraph; import org.testng.Assert; -import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.io.File; -import java.io.FileNotFoundException; import java.io.PrintStream; import java.util.*; @@ -146,44 +141,6 @@ public class SimpleDeBruijnAssemblerUnitTest extends BaseTest { Assert.assertTrue(graphEquals(graph, expectedGraph)); } - @Test(enabled=false) -// not ready yet - public void testBasicGraphCreation() { - final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref"); - final byte refBase = refPileupTestProvider.getReferenceContext().getBase(); - final String altBase = (refBase==(byte)'A'?"C":"A"); - final int matches = 50; - final int mismatches = 50; - Map refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, altBase, new int[]{matches, mismatches}, false, 30); - PrintStream graphWriter = null; - - try{ - graphWriter = new PrintStream("du.txt"); - } catch (Exception e) {} - - - SimpleDeBruijnAssembler assembler = new SimpleDeBruijnAssembler(true,graphWriter); - final Haplotype refHaplotype = new Haplotype(refPileupTestProvider.getReferenceContext().getBases()); - refHaplotype.setIsReference(true); - assembler.createDeBruijnGraphs(refContext.get(refPileupTestProvider.getSampleNames().get(0)).getBasePileup().getReads(), refHaplotype); - -/* // clean up the graphs by pruning and merging - for( final DefaultDirectedGraph graph : graphs ) { - SimpleDeBruijnAssembler.pruneGraph( graph, PRUNE_FACTOR ); - //eliminateNonRefPaths( graph ); - SimpleDeBruijnAssembler.mergeNodes( graph ); - } - */ - if( graphWriter != null ) { - assembler.printGraphs(); - } - - int k=2; - - // find the best paths in the graphs - // return findBestPaths( refHaplotype, fullReferenceWithPadding, refLoc, activeAllelesToGenotype, activeRegion.getExtendedLoc() ); - - } @Test(enabled = true) public void testEliminateNonRefPaths() { DefaultDirectedGraph graph = new DefaultDirectedGraph(DeBruijnEdge.class); diff --git a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java index 6281054b1..4684d6d8a 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java @@ -29,7 +29,7 @@ package org.broadinstitute.sting.utils.pairhmm; // the imports for unit testing. import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; import org.testng.Assert; import org.testng.annotations.DataProvider; diff --git a/public/java/src/org/broadinstitute/sting/alignment/Alignment.java b/public/java/src/org/broadinstitute/sting/alignment/Alignment.java index c63f5615f..db21411b3 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/Alignment.java +++ b/public/java/src/org/broadinstitute/sting/alignment/Alignment.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.alignment; import net.sf.samtools.*; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java index b903b9f7d..ef50cf088 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java +++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java @@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java index 2d568a96a..67305ce78 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/AlignerTestHarness.java @@ -4,7 +4,7 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.*; import org.broadinstitute.sting.alignment.Aligner; import org.broadinstitute.sting.alignment.Alignment; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java index fbeac9192..f29e639d6 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java @@ -6,7 +6,7 @@ import org.broadinstitute.sting.alignment.Alignment; import org.broadinstitute.sting.alignment.bwa.BWAAligner; import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; import org.broadinstitute.sting.alignment.reference.bwt.*; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; import java.io.File; diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java index e0c2ce72a..5a1c2a1b4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java @@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.arguments; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.RodBinding; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; /** * @author ebanks diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java index 547f375bb..f8f1831f6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java @@ -4,7 +4,7 @@ import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.File; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java index 4c0257e6a..72b01df01 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java @@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.arguments; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.RodBinding; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; /** * @author ebanks diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java index 34627b973..88f4166ef 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java @@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.contexts; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java index 73301c511..8f5f420fd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java @@ -23,17 +23,17 @@ */ package org.broadinstitute.sting.gatk.datasources.reads; +import org.broad.tribble.util.SeekableBufferedStream; +import org.broad.tribble.util.SeekableFileStream; + import net.sf.samtools.*; -import org.broadinstitute.sting.gatk.CommandLineGATK; + import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; +import java.io.*; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -68,6 +68,9 @@ public class GATKBAMIndex { private final File mFile; + //TODO: figure out a good value for this buffer size + private final int BUFFERED_STREAM_BUFFER_SIZE=8192; + /** * Number of sequences stored in this index. */ @@ -78,8 +81,8 @@ public class GATKBAMIndex { */ private final long[] sequenceStartCache; - private FileInputStream fileStream; - private FileChannel fileChannel; + private SeekableFileStream fileStream; + private SeekableBufferedStream bufferedStream; public GATKBAMIndex(final File file) { mFile = file; @@ -277,7 +280,6 @@ public class GATKBAMIndex { for (int i = sequenceIndex; i < referenceSequence; i++) { sequenceStartCache[i] = position(); - // System.out.println("# Sequence TID: " + i); final int nBins = readInteger(); // System.out.println("# nBins: " + nBins); @@ -290,15 +292,18 @@ public class GATKBAMIndex { final int nLinearBins = readInteger(); // System.out.println("# nLinearBins: " + nLinearBins); skipBytes(8 * nLinearBins); + } sequenceStartCache[referenceSequence] = position(); } + + private void openIndexFile() { try { - fileStream = new FileInputStream(mFile); - fileChannel = fileStream.getChannel(); + fileStream = new SeekableFileStream(mFile); + bufferedStream = new SeekableBufferedStream(fileStream,BUFFERED_STREAM_BUFFER_SIZE); } catch (IOException exc) { throw new ReviewedStingException("Unable to open index file (" + exc.getMessage() +")" + mFile, exc); @@ -307,7 +312,7 @@ public class GATKBAMIndex { private void closeIndexFile() { try { - fileChannel.close(); + bufferedStream.close(); fileStream.close(); } catch (IOException exc) { @@ -352,7 +357,12 @@ public class GATKBAMIndex { private void read(final ByteBuffer buffer) { try { int bytesExpected = buffer.limit(); - int bytesRead = fileChannel.read(buffer); + //BufferedInputStream cannot read directly into a byte buffer, so we read into an array + //and put the result into the bytebuffer after the if statement. + + //SeekableBufferedStream is evil, it will "read" beyond the end of the file if you let it! + final int bytesToRead = (int) Math.min(bufferedStream.length() - bufferedStream.position(), bytesExpected); //min of int and long will definitely be castable to an int. + int bytesRead = bufferedStream.read(byteArray,0,bytesToRead); // We have a rigid expectation here to read in exactly the number of bytes we've limited // our buffer to -- if we read in fewer bytes than this, or encounter EOF (-1), the index @@ -363,6 +373,7 @@ public class GATKBAMIndex { "Please try re-indexing the corresponding BAM file.", mFile)); } + buffer.put(byteArray,0,bytesRead); } catch(IOException ex) { throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile); @@ -376,10 +387,13 @@ public class GATKBAMIndex { */ private ByteBuffer buffer = null; + //BufferedStream don't read into ByteBuffers, so we need this temporary array + private byte[] byteArray=null; private ByteBuffer getBuffer(final int size) { if(buffer == null || buffer.capacity() < size) { // Allocate a new byte buffer. For now, make it indirect to make sure it winds up on the heap for easier debugging. buffer = ByteBuffer.allocate(size); + byteArray = new byte[size]; buffer.order(ByteOrder.LITTLE_ENDIAN); } buffer.clear(); @@ -389,7 +403,13 @@ public class GATKBAMIndex { private void skipBytes(final int count) { try { - fileChannel.position(fileChannel.position() + count); + + //try to skip forward the requested amount. + long skipped = bufferedStream.skip(count); + + if( skipped != count ) { //if not managed to skip the requested amount + throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile); + } } catch(IOException ex) { throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile); @@ -398,7 +418,8 @@ public class GATKBAMIndex { private void seek(final long position) { try { - fileChannel.position(position); + //to seek a new position, move the fileChannel, and reposition the bufferedStream + bufferedStream.seek(position); } catch(IOException ex) { throw new ReviewedStingException("Index: unable to reposition of file channel of index file " + mFile); @@ -411,7 +432,7 @@ public class GATKBAMIndex { */ private long position() { try { - return fileChannel.position(); + return bufferedStream.position(); } catch (IOException exc) { throw new ReviewedStingException("Unable to read position from index file " + mFile, exc); diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index e99814278..5c932fdce 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -44,6 +44,7 @@ import org.broadinstitute.sting.utils.SimpleTimer; import org.broadinstitute.sting.utils.baq.ReadTransformingIterator; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory; import java.io.File; @@ -252,9 +253,10 @@ public class SAMDataSource { if(readBufferSize != null) ReadShard.setReadBufferSize(readBufferSize); // TODO: use of non-final static variable here is just awful, especially for parallel tests else { - // Choose a sensible default for the read buffer size. For the moment, we're picking 1000 reads per BAM per shard (which effectively - // will mean per-thread once ReadWalkers are parallelized) with a max cap of 250K reads in memory at once. - ReadShard.setReadBufferSize(Math.min(10000*samFiles.size(),250000)); + // Choose a sensible default for the read buffer size. + // Previously we we're picked 100000 reads per BAM per shard with a max cap of 250K reads in memory at once. + // Now we are simply setting it to 100K reads + ReadShard.setReadBufferSize(100000); } resourcePool = new SAMResourcePool(Integer.MAX_VALUE); @@ -894,9 +896,11 @@ public class SAMDataSource { long lastTick = timer.currentTime(); for(final SAMReaderID readerID: readerIDs) { final ReaderInitializer init = new ReaderInitializer(readerID).call(); + if (removeProgramRecords) { init.reader.getFileHeader().setProgramRecords(new ArrayList()); } + if (threadAllocation.getNumIOThreads() > 0) { inputStreams.put(init.readerID, init.blockInputStream); // get from initializer } @@ -916,6 +920,13 @@ public class SAMDataSource { for(SAMFileReader reader: readers.values()) headers.add(reader.getFileHeader()); headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,headers,true); + + // update all read groups to GATKSAMRecordReadGroups + final List gatkReadGroups = new LinkedList(); + for ( final SAMReadGroupRecord rg : headerMerger.getMergedHeader().getReadGroups() ) { + gatkReadGroups.add(new GATKSAMReadGroupRecord(rg)); + } + headerMerger.getMergedHeader().setReadGroups(gatkReadGroups); } final private void printReaderPerformance(final int nExecutedTotal, diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIterator.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIterator.java index 5275c471e..b4161b06e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIterator.java @@ -104,6 +104,56 @@ public class PerSampleDownsamplingReadsIterator implements StingSAMIterator { readComparator.compare(orderedDownsampledReadsCache.peek(), earliestPendingRead) <= 0; } + private boolean fillDownsampledReadsCache() { + SAMRecord prevRead = null; + int numPositionalChanges = 0; + + // Continue submitting reads to the per-sample downsamplers until the read at the top of the priority queue + // can be released without violating global sort order + while ( nestedSAMIterator.hasNext() && ! readyToReleaseReads() ) { + SAMRecord read = nestedSAMIterator.next(); + String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null; + + ReadsDownsampler thisSampleDownsampler = perSampleDownsamplers.get(sampleName); + if ( thisSampleDownsampler == null ) { + thisSampleDownsampler = downsamplerFactory.newInstance(); + perSampleDownsamplers.put(sampleName, thisSampleDownsampler); + } + + thisSampleDownsampler.submit(read); + processFinalizedAndPendingItems(thisSampleDownsampler); + + if ( prevRead != null && prevRead.getAlignmentStart() != read.getAlignmentStart() ) { + numPositionalChanges++; + } + + // Periodically inform all downsamplers of the current position in the read stream. This is + // to prevent downsamplers for samples with sparser reads than others from getting stuck too + // long in a pending state. + if ( numPositionalChanges > 0 && numPositionalChanges % DOWNSAMPLER_POSITIONAL_UPDATE_INTERVAL == 0 ) { + for ( ReadsDownsampler perSampleDownsampler : perSampleDownsamplers.values() ) { + perSampleDownsampler.signalNoMoreReadsBefore(read); + processFinalizedAndPendingItems(perSampleDownsampler); + } + } + + prevRead = read; + } + + if ( ! nestedSAMIterator.hasNext() ) { + for ( ReadsDownsampler perSampleDownsampler : perSampleDownsamplers.values() ) { + perSampleDownsampler.signalEndOfInput(); + if ( perSampleDownsampler.hasFinalizedItems() ) { + orderedDownsampledReadsCache.addAll(perSampleDownsampler.consumeFinalizedItems()); + } + } + earliestPendingRead = null; + earliestPendingDownsampler = null; + } + + return readyToReleaseReads(); + } + private void updateEarliestPendingRead( ReadsDownsampler currentDownsampler ) { // If there is no recorded earliest pending read and this downsampler has pending items, // then this downsampler's first pending item becomes the new earliest pending read: @@ -135,57 +185,11 @@ public class PerSampleDownsamplingReadsIterator implements StingSAMIterator { } } - private boolean fillDownsampledReadsCache() { - SAMRecord prevRead = null; - int numPositionalChanges = 0; - - // Continue submitting reads to the per-sample downsamplers until the read at the top of the priority queue - // can be released without violating global sort order - while ( nestedSAMIterator.hasNext() && ! readyToReleaseReads() ) { - SAMRecord read = nestedSAMIterator.next(); - String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null; - - ReadsDownsampler thisSampleDownsampler = perSampleDownsamplers.get(sampleName); - if ( thisSampleDownsampler == null ) { - thisSampleDownsampler = downsamplerFactory.newInstance(); - perSampleDownsamplers.put(sampleName, thisSampleDownsampler); - } - - thisSampleDownsampler.submit(read); - updateEarliestPendingRead(thisSampleDownsampler); - - if ( prevRead != null && prevRead.getAlignmentStart() != read.getAlignmentStart() ) { - numPositionalChanges++; - } - - // Periodically inform all downsamplers of the current position in the read stream. This is - // to prevent downsamplers for samples with sparser reads than others from getting stuck too - // long in a pending state. - if ( numPositionalChanges > 0 && numPositionalChanges % DOWNSAMPLER_POSITIONAL_UPDATE_INTERVAL == 0 ) { - for ( ReadsDownsampler perSampleDownsampler : perSampleDownsamplers.values() ) { - perSampleDownsampler.signalNoMoreReadsBefore(read); - updateEarliestPendingRead(perSampleDownsampler); - } - } - - prevRead = read; + private void processFinalizedAndPendingItems( ReadsDownsampler currentDownsampler ) { + if ( currentDownsampler.hasFinalizedItems() ) { + orderedDownsampledReadsCache.addAll(currentDownsampler.consumeFinalizedItems()); } - - if ( ! nestedSAMIterator.hasNext() ) { - for ( ReadsDownsampler perSampleDownsampler : perSampleDownsamplers.values() ) { - perSampleDownsampler.signalEndOfInput(); - } - earliestPendingRead = null; - earliestPendingDownsampler = null; - } - - for ( ReadsDownsampler perSampleDownsampler : perSampleDownsamplers.values() ) { - if ( perSampleDownsampler.hasFinalizedItems() ) { - orderedDownsampledReadsCache.addAll(perSampleDownsampler.consumeFinalizedItems()); - } - } - - return readyToReleaseReads(); + updateEarliestPendingRead(currentDownsampler); } public void remove() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java index 8ad91ac1c..a1f2a877b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.filters; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; /** @@ -37,6 +38,6 @@ import org.broadinstitute.sting.utils.sam.ReadUtils; public class Platform454Filter extends ReadFilter { public boolean filterOut(SAMRecord rec) { - return (ReadUtils.is454Read(rec)); + return (ReadUtils.is454Read((GATKSAMRecord)rec)); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java index 8e241bb2c..de5be94bc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.filters; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; /** @@ -41,7 +42,7 @@ public class PlatformFilter extends ReadFilter { public boolean filterOut(SAMRecord rec) { for ( String name : PLFilterNames ) - if ( ReadUtils.isPlatformRead(rec, name.toUpperCase() )) + if ( ReadUtils.isPlatformRead((GATKSAMRecord)rec, name.toUpperCase() )) return true; return false; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java index 8e4633869..2a7c5c7b2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java @@ -30,14 +30,14 @@ import org.broad.tribble.AbstractFeatureReader; import org.broad.tribble.FeatureCodec; import org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub; import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; -import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.variant.bcf2.BCF2Utils; +import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.writer.Options; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.writer.Options; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory; import java.io.*; import java.util.Arrays; diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java index f521c959d..8a989b040 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java @@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.io.stubs; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java index f92d78bb5..c512ba835 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java @@ -29,13 +29,13 @@ import org.broadinstitute.sting.gatk.CommandLineExecutable; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.utils.classloader.JVMUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.writer.Options; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.vcf.VCFHeaderLine; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.writer.Options; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory; import java.io.File; import java.io.OutputStream; @@ -232,7 +232,7 @@ public class VariantContextWriterStub implements Stub, Var } if ( UPDATE_CONTIG_HEADERS ) - vcfHeader = VCFUtils.withUpdatedContigs(vcfHeader, engine); + vcfHeader = GATKVCFUtils.withUpdatedContigs(vcfHeader, engine); } outputTracker.getStorage(this).writeHeader(vcfHeader); diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index 51fed470f..2bc14aa69 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -131,7 +131,7 @@ public class GATKRunReport { private String hostName; @Element(required = true, name = "java") - private String java; + private String javaVersion; @Element(required = true, name = "machine") private String machine; @@ -212,7 +212,7 @@ public class GATKRunReport { hostName = Utils.resolveHostname(); // basic java information - java = Utils.join("-", Arrays.asList(System.getProperty("java.vendor"), System.getProperty("java.version"))); + javaVersion = Utils.join("-", Arrays.asList(System.getProperty("java.vendor"), System.getProperty("java.version"))); machine = Utils.join("-", Arrays.asList(System.getProperty("os.name"), System.getProperty("os.arch"))); // if there was an exception, capture it diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 5c7da82d0..8713e9797 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -9,7 +9,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java index a2fe94641..0e0e8017d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java @@ -33,9 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.variant.vcf.AbstractVCFCodec; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.help.GATKDocUtils; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java index 81fe73075..2bb6cbeee 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java @@ -39,7 +39,6 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index 605a6680f..e69924930 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -117,7 +117,7 @@ public class GATKReport { * @param numColumns the number of columns in this table */ public void addTable(final String tableName, final String tableDescription, final int numColumns) { - addTable(tableName, tableDescription, numColumns, false); + addTable(tableName, tableDescription, numColumns, false, false); } /** @@ -127,9 +127,10 @@ public class GATKReport { * @param tableDescription the description of the table * @param numColumns the number of columns in this table * @param sortByRowID whether to sort the rows by the row ID + * @param sortByAllColumns whether to sort the rows by all columns starting from leftmost column */ - public void addTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID) { - GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortByRowID); + public void addTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) { + GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortByRowID, sortByAllColumns); tables.put(tableName, table); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 3b4bdd087..2bf7c9609 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -47,6 +47,7 @@ public class GATKReportTable { private final String tableDescription; private final boolean sortByRowID; + private final boolean sortByAllColumns; private List underlyingData; private final List columnInfo; @@ -108,6 +109,9 @@ public class GATKReportTable { // when reading from a file, we do not re-sort the rows sortByRowID = false; + // when reading from a file, we do not re-sort the rows + sortByAllColumns = false; + // initialize the data final int nColumns = Integer.parseInt(tableData[TableDataHeaderFields.COLS.index()]); final int nRows = Integer.parseInt(tableData[TableDataHeaderFields.ROWS.index()]); @@ -177,7 +181,7 @@ public class GATKReportTable { * @param numColumns the number of columns in this table */ public GATKReportTable(final String tableName, final String tableDescription, final int numColumns) { - this(tableName, tableDescription, numColumns, true); + this(tableName, tableDescription, numColumns, true, false); } /** @@ -187,8 +191,9 @@ public class GATKReportTable { * @param tableDescription the description of the table * @param numColumns the number of columns in this table * @param sortByRowID whether to sort rows by the row ID (instead of the order in which they were added) + * @param sortByAllColumns whether to sort rows by all columns (instead of the order in which they were added) */ - public GATKReportTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID) { + public GATKReportTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) { if ( !isValidName(tableName) ) { throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed."); } @@ -200,6 +205,7 @@ public class GATKReportTable { this.tableName = tableName; this.tableDescription = tableDescription; this.sortByRowID = sortByRowID; + this.sortByAllColumns = sortByAllColumns; underlyingData = new ArrayList(INITITAL_ARRAY_SIZE); columnInfo = new ArrayList(numColumns); @@ -212,7 +218,7 @@ public class GATKReportTable { * @param tableToCopy */ public GATKReportTable(final GATKReportTable tableToCopy, final boolean copyData) { - this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortByRowID); + this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortByRowID, tableToCopy.sortByAllColumns); for ( final GATKReportColumn column : tableToCopy.getColumnInfo() ) addColumn(column.getColumnName(), column.getFormat()); if ( copyData ) @@ -559,30 +565,62 @@ public class GATKReportTable { needsPadding = true; out.printf(column.getColumnFormat().getNameFormat(), column.getColumnName()); - } - out.println(); + } + out.println(); - // write the table body - if ( sortByRowID ) { - // make sure that there are exactly the correct number of ID mappings - if ( rowIdToIndex.size() != underlyingData.size() ) - throw new ReviewedStingException("There isn't a 1-to-1 mapping from row ID to index; this can happen when rows are not created consistently"); + // write the table body + if ( sortByAllColumns ) { + Collections.sort(underlyingData, new Comparator() { + //INVARIANT the two arrays are of the same length and corresponding elements are of the same type + @Override + public int compare(Object[] objectArr1, Object[] objectArr2) { + final int EQUAL = 0; - final TreeMap sortedMap; - try { - sortedMap = new TreeMap(rowIdToIndex); - } catch (ClassCastException e) { - throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types"); - } - for ( final Map.Entry rowKey : sortedMap.entrySet() ) - writeRow(out, underlyingData.get(rowKey.getValue())); - } else { - for ( final Object[] row : underlyingData ) - writeRow(out, row); - } + int result = EQUAL; - out.println(); - } + int l = objectArr1.length; + for (int x = 0; x < l; x++) { + if (objectArr1[x] instanceof Integer) { + result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]); + if( result != EQUAL) { + return result; + } + } else if (objectArr1[x] instanceof Double) { + result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]); + if( result != EQUAL) { + return result; + } + } else { // default uses String comparison + result = objectArr1[x].toString().compareTo(objectArr2[x].toString()); + if( result != EQUAL) { + return result; + } + } + } + return result; + } + }); + for ( final Object[] row : underlyingData ) + writeRow(out, row); + } else if ( sortByRowID ) { + // make sure that there are exactly the correct number of ID mappings + if ( rowIdToIndex.size() != underlyingData.size() ) + throw new ReviewedStingException("There isn't a 1-to-1 mapping from row ID to index; this can happen when rows are not created consistently"); + + final TreeMap sortedMap; + try { + sortedMap = new TreeMap(rowIdToIndex); + } catch (ClassCastException e) { + throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types"); + } + for ( final Map.Entry rowKey : sortedMap.entrySet() ) + writeRow(out, underlyingData.get(rowKey.getValue())); + } else { + for ( final Object[] row : underlyingData ) + writeRow(out, row); + } + out.println(); + } private void writeRow(final PrintStream out, final Object[] row) { boolean needsPadding = false; @@ -697,8 +735,41 @@ public class GATKReportTable { } private List getOrderedRows() { - if ( !sortByRowID ) + if ( sortByAllColumns ) { + Collections.sort(underlyingData, new Comparator() { + //INVARIANT the two arrays are of the same length and corresponding elements are of the same type + @Override + public int compare(Object[] objectArr1, Object[] objectArr2) { + final int EQUAL = 0; + + int result = EQUAL; + + int l = objectArr1.length; + for (int x = 0; x < l; x++) { + if (objectArr1[x] instanceof Integer) { + result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]); + if( result != EQUAL) { + return result; + } + } else if (objectArr1[x] instanceof Double) { + result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]); + if( result != EQUAL) { + return result; + } + } else { // default uses String comparison + result = objectArr1[x].toString().compareTo(objectArr2[x].toString()); + if( result != EQUAL) { + return result; + } + } + } + return result; + } + }); return underlyingData; + } else if ( !sortByRowID ) { + return underlyingData; + } final TreeMap sortedMap; try { diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java index 3de85028f..91439b65f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.samples; import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.Genotype; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java index 735f62ca3..ee71d82bb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java @@ -33,14 +33,14 @@ import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider import org.broadinstitute.sting.gatk.datasources.providers.ReadView; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.nanoScheduler.NSMapFunction; +import org.broadinstitute.sting.utils.nanoScheduler.NSProgressFunction; import org.broadinstitute.sting.utils.nanoScheduler.NSReduceFunction; import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import java.util.Iterator; import java.util.LinkedList; -import java.util.List; /** * A nano-scheduling version of TraverseReads. @@ -54,12 +54,20 @@ import java.util.List; */ public class TraverseReadsNano extends TraversalEngine,ReadShardDataProvider> { /** our log, which we want to capture anything from this class */ + private final static boolean PRE_READ_ALL_MAP_DATA = true; protected static final Logger logger = Logger.getLogger(TraverseReadsNano.class); private static final boolean DEBUG = false; final NanoScheduler nanoScheduler; public TraverseReadsNano(int nThreads) { nanoScheduler = new NanoScheduler(nThreads); + nanoScheduler.setProgressFunction(new NSProgressFunction() { + @Override + public void progress(MapData lastProcessedMap) { + if ( lastProcessedMap.refContext != null ) + printProgress(lastProcessedMap.refContext.getLocus()); + } + }); } @Override @@ -78,7 +86,8 @@ public class TraverseReadsNano extends TraversalEngine, public T traverse(ReadWalker walker, ReadShardDataProvider dataProvider, T sum) { - logger.debug(String.format("TraverseReadsNano.traverse Covered dataset is %s", dataProvider)); + if ( logger.isDebugEnabled() ) + logger.debug(String.format("TraverseReadsNano.traverse Covered dataset is %s", dataProvider)); if( !dataProvider.hasReads() ) throw new IllegalArgumentException("Unable to traverse reads; no read data is available."); @@ -87,14 +96,10 @@ public class TraverseReadsNano extends TraversalEngine, final TraverseReadsMap myMap = new TraverseReadsMap(walker); final TraverseReadsReduce myReduce = new TraverseReadsReduce(walker); - final List aggregatedInputs = aggregateMapData(dataProvider); - final T result = nanoScheduler.execute(aggregatedInputs.iterator(), myMap, sum, myReduce); - - final GATKSAMRecord lastRead = aggregatedInputs.get(aggregatedInputs.size() - 1).read; - final GenomeLoc locus = engine.getGenomeLocParser().createGenomeLoc(lastRead); + final Iterator aggregatedInputs = aggregateMapData(dataProvider); + final T result = nanoScheduler.execute(aggregatedInputs, myMap, sum, myReduce); updateCumulativeMetrics(dataProvider.getShard()); - printProgress(locus); return result; } @@ -107,29 +112,49 @@ public class TraverseReadsNano extends TraversalEngine, * @return a linked list of MapData objects holding the read, ref, and ROD info for every map/reduce * should execute */ - private List aggregateMapData(final ReadShardDataProvider dataProvider) { - final ReadView reads = new ReadView(dataProvider); - final ReadReferenceView reference = new ReadReferenceView(dataProvider); - final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider); - - final List mapData = new LinkedList(); - for ( final SAMRecord read : reads ) { - final ReferenceContext refContext = ! read.getReadUnmappedFlag() - ? reference.getReferenceContext(read) - : null; - - // if the read is mapped, create a metadata tracker - final RefMetaDataTracker tracker = read.getReferenceIndex() >= 0 - ? rodView.getReferenceOrderedDataForRead(read) - : null; - - // update the number of reads we've seen - dataProvider.getShard().getReadMetrics().incrementNumIterations(); - - mapData.add(new MapData((GATKSAMRecord)read, refContext, tracker)); + private Iterator aggregateMapData(final ReadShardDataProvider dataProvider) { + final Iterator it = makeDataIterator(dataProvider); + if ( PRE_READ_ALL_MAP_DATA ) { + final LinkedList l = new LinkedList(); + while ( it.hasNext() ) l.add(it.next()); + return l.iterator(); + } else { + return it; } + } - return mapData; + + private Iterator makeDataIterator(final ReadShardDataProvider dataProvider) { + return new Iterator () { + final ReadView reads = new ReadView(dataProvider); + final ReadReferenceView reference = new ReadReferenceView(dataProvider); + final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider); + final Iterator readIterator = reads.iterator(); + + @Override public boolean hasNext() { return readIterator.hasNext(); } + + @Override + public MapData next() { + final SAMRecord read = readIterator.next(); + final ReferenceContext refContext = ! read.getReadUnmappedFlag() + ? reference.getReferenceContext(read) + : null; + + // if the read is mapped, create a metadata tracker + final RefMetaDataTracker tracker = read.getReferenceIndex() >= 0 + ? rodView.getReferenceOrderedDataForRead(read) + : null; + + // update the number of reads we've seen + dataProvider.getShard().getReadMetrics().incrementNumIterations(); + + return new MapData((GATKSAMRecord)read, refContext, tracker); + } + + @Override public void remove() { + throw new UnsupportedOperationException("Remove not supported"); + } + }; } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReads.java index e63dbcabd..93eaee209 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReads.java @@ -37,7 +37,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.clipping.ClippingOp; import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index 18bdb02ed..943a1147a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -32,12 +32,12 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.GenotypesContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java index 4d79c4112..4bda9a6d4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java @@ -8,12 +8,12 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAn import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.VCFFormatHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.GenotypeBuilder; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.Collection; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java index aef3e49cf..e19e82597 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java @@ -37,10 +37,10 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java index e59fc827d..d234f3471 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java @@ -2,11 +2,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Allele; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index 0c78c0204..5b1a1e236 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -35,12 +35,12 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFHeaderLine; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFStandardHeaderLines; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java index 1dff4d1a3..24da5e893 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java @@ -1,12 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Allele; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index c9481f244..97c61f056 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -8,13 +8,13 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines; +import org.broadinstitute.variant.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFStandardHeaderLines; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 89a239e54..05c36eda6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -7,17 +7,17 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines; +import org.broadinstitute.variant.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFFormatHeaderLine; +import org.broadinstitute.variant.vcf.VCFStandardHeaderLines; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.GenotypeBuilder; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 52072d10c..f0d46cf35 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -34,13 +34,13 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java index 07391c78c..5994c1ab7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java @@ -8,11 +8,11 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index ca7180510..5ca3c9885 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -33,20 +33,20 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.Serializable; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java index 0340f457c..6a879db12 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java @@ -9,11 +9,11 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.GenotypesContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index 037b357ae..ebeb10f43 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -7,9 +7,9 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java index dd058b469..826b45fa0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java @@ -10,11 +10,11 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.GenotypesContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java deleted file mode 100755 index c67d829c2..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java +++ /dev/null @@ -1,66 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.annotator; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; -import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.IndelUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.*; - -/** - * Rough category of indel type (insertion, deletion, multi-allelic, other) - */ -public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation { - - public Map annotate(final RefMetaDataTracker tracker, - final AnnotatorCompatible walker, - final ReferenceContext ref, - final Map stratifiedContexts, - final VariantContext vc, - final Map stratifiedPerReadAlleleLikelihoodMap) { - - int run; - if (vc.isMixed()) { - Map map = new HashMap(); - map.put(getKeyNames().get(0), String.format("%s", "MIXED")); - return map; - - } - else if ( vc.isIndel() ) { - String type=""; - if (!vc.isBiallelic()) - type = "MULTIALLELIC_INDEL"; - else { - if (vc.isSimpleInsertion()) - type = "INS."; - else if (vc.isSimpleDeletion()) - type = "DEL."; - else - type = "OTHER."; - ArrayList inds = IndelUtils.findEventClassificationIndex(vc, ref); - for (int k : inds) { - type = type+ IndelUtils.getIndelClassificationName(k)+"."; - } - } - Map map = new HashMap(); - map.put(getKeyNames().get(0), String.format("%s", type)); - return map; - - } else { - return null; - } - - } - - public List getKeyNames() { return Arrays.asList("IndelType"); } - - public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("IndelType", 1, VCFHeaderLineType.String, "Indel type description")); } - -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java index c9a4d0ee6..cc42a7b35 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java @@ -6,10 +6,10 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java index c9d5ca261..0bc9fa37e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -11,10 +11,10 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.MendelianViolation; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java index 2679a169b..96e76ceab 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java @@ -2,12 +2,12 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Allele; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java index 364bbdbb9..4b11a9698 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java @@ -8,13 +8,13 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines; +import org.broadinstitute.variant.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFStandardHeaderLines; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java index afb4ceb60..057fbaf8a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java @@ -31,14 +31,14 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFFormatHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.GenotypeBuilder; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java index 5f9f3416d..d0ea231a1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java @@ -7,11 +7,11 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java index 3e6aa62a2..1aaf3afcd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java @@ -6,11 +6,11 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index d75947879..af27d9c6f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -8,11 +8,11 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.GenotypesContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index 474b6b150..883e2ddce 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -10,12 +10,12 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnota import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines; +import org.broadinstitute.variant.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFStandardHeaderLines; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index e7c0e6b14..adc661d66 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -10,14 +10,14 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.MannWhitneyU; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.GenotypesContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index 334b89f01..de1fdf0d0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -7,14 +7,14 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Allele; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java index 33e895187..090702e42 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java @@ -31,11 +31,11 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.VCFHeaderLineCount; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index b3b0be153..a19b77aae 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -35,9 +35,10 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; @@ -212,7 +213,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio // Make sure that the SnpEff version number and command-line header lines are present in the VCF header of // the SnpEff rod, and that the file was generated by a supported version of SnpEff: - VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName()); + VCFHeader snpEffVCFHeader = GATKVCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName()); VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY); VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index 8e1140af1..7c113c188 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -7,10 +7,10 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java index c72ba1c5f..1862ab6c6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java @@ -31,12 +31,12 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.vcf.VCFHeaderLineCount; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; @@ -57,7 +57,7 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa if ( !vc.isIndel()) return null; - Pair,byte[]> result = VariantContextUtils.getNumTandemRepeatUnits(vc, ref.getForwardBases()); + Pair,byte[]> result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, ref.getForwardBases()); if (result == null) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java index 57b50c6e2..58cce6b30 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java @@ -8,12 +8,12 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java index be7288a7e..40a442808 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java @@ -10,11 +10,11 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLineCount; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 92060b4a3..09f01bd42 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -35,13 +35,14 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import java.util.*; @@ -225,7 +226,7 @@ public class VariantAnnotator extends RodWalker implements Ann // note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones Set hInfo = new HashSet(); hInfo.addAll(engine.getVCFAnnotationDescriptions()); - for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variantCollection.variants.getName())) ) { + for ( VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variantCollection.variants.getName())) ) { if ( isUniqueHeaderLine(line, hInfo) ) hInfo.add(line); } @@ -237,7 +238,7 @@ public class VariantAnnotator extends RodWalker implements Ann continue; } VCFInfoHeaderLine targetHeaderLine = null; - for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(expression.binding.getName())) ) { + for ( VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(expression.binding.getName())) ) { if ( line instanceof VCFInfoHeaderLine ) { VCFInfoHeaderLine infoline = (VCFInfoHeaderLine)line; if ( infoline.getID().equals(expression.fieldName) ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 725097ddc..ddf5a6de1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -32,9 +32,9 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantType.java new file mode 100755 index 000000000..944962d30 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantType.java @@ -0,0 +1,60 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; +import org.broadinstitute.sting.utils.IndelUtils; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.variantcontext.VariantContext; + +import java.util.*; + +/** + * Assigns a roughly correct category of the variant type (SNP, MNP, insertion, deletion, etc.) + */ +public class VariantType extends InfoFieldAnnotation implements ExperimentalAnnotation { + + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { + + StringBuffer type = new StringBuffer(""); + if ( vc.isVariant() && !vc.isBiallelic() ) + type.append("MULTIALLELIC_"); + + if ( !vc.isIndel() ) { + type.append(vc.getType().toString()); + } else { + if (vc.isSimpleInsertion()) + type.append("INSERTION."); + else if (vc.isSimpleDeletion()) + type.append("DELETION."); + else + type.append("COMPLEX."); + ArrayList inds = IndelUtils.findEventClassificationIndex(vc, ref); + type.append(IndelUtils.getIndelClassificationName(inds.get(0))); + + for (int i = 1; i < inds.size(); i++ ) { + type.append("."); + type.append(IndelUtils.getIndelClassificationName(inds.get(i))); + } + } + + Map map = new HashMap(); + map.put(getKeyNames().get(0), String.format("%s", type)); + return map; + } + + public List getKeyNames() { return Arrays.asList("VariantType"); } + + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("VariantType", 1, VCFHeaderLineType.String, "Variant type description")); } + +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ActiveRegionBasedAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ActiveRegionBasedAnnotation.java index 03fcba760..632b60b02 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ActiveRegionBasedAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ActiveRegionBasedAnnotation.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.List; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatible.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatible.java index f610a2ba8..0d599332d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatible.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatible.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; import org.broadinstitute.sting.commandline.RodBinding; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java index 6970908b5..490bbdbde 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java @@ -4,10 +4,10 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.VCFFormatHeaderLine; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.GenotypeBuilder; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java index 5b2dc310d..ef717abd1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java @@ -4,8 +4,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.List; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java index 996d85a67..0ec7ef907 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java index 83b10dd91..bdc129b8c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java @@ -35,10 +35,11 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.beagle.BeagleFeature; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.*; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import java.util.*; @@ -132,7 +133,7 @@ public class BeagleOutputToVCF extends RodWalker { // setup the header fields final Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); + hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit())); hInfo.add(new VCFFormatHeaderLine("OG",1, VCFHeaderLineType.String, "Original Genotype input to Beagle")); hInfo.add(new VCFInfoHeaderLine("R2", 1, VCFHeaderLineType.Float, "r2 Value reported by Beagle on each site")); hInfo.add(new VCFInfoHeaderLine("NumGenotypesChanged", 1, VCFHeaderLineType.Integer, "The number of genotypes changed by Beagle")); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java index d11747766..e02b473b6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java @@ -38,14 +38,15 @@ import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VQSRCalibratio import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.vcf.VCFFilterHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.*; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import java.io.File; import java.io.PrintStream; @@ -231,7 +232,7 @@ public class ProduceBeagleInput extends RodWalker { private final static double[] DIPLOID_FLAT_LOG10_LIKELIHOODS = MathUtils.toLog10(new double[]{ 0.33, 0.33, 0.33 }); public void writeBeagleOutput(VariantContext preferredVC, VariantContext otherVC, boolean isValidationSite, double prior) { - GenomeLoc currentLoc = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),preferredVC); + GenomeLoc currentLoc = GATKVariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), preferredVC); StringBuffer beagleOut = new StringBuffer(); String marker = String.format("%s:%d ",currentLoc.getContig(),currentLoc.getStart()); @@ -344,7 +345,7 @@ public class ProduceBeagleInput extends RodWalker { // setup the header fields Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames)); + hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames)); hInfo.add(new VCFFilterHeaderLine("bootstrap","This site used for genotype bootstrapping with ProduceBeagleInputWalker")); bootstrapVCFOutput.writeHeader(new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames))); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java index a6a6d484e..fd552e12f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java @@ -36,16 +36,16 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import java.io.PrintStream; import java.util.Arrays; @@ -90,7 +90,7 @@ public class VariantsToBeagleUnphased extends RodWalker { throw new UserException.BadArgumentValue("bootstrap", "Bootstrap value must be fraction between 0 and 1"); if ( bootstrapVCFOutput != null ) { - Set hInfo = VCFUtils.getHeaderFields(getToolkit()); + Set hInfo = GATKVCFUtils.getHeaderFields(getToolkit()); bootstrapVCFOutput.writeHeader(new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit()))); } } @@ -141,7 +141,7 @@ public class VariantsToBeagleUnphased extends RodWalker { } public void writeUnphasedBeagleOutput(VariantContext vc, boolean makeMissing) { - GenomeLoc currentLoc = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc); + GenomeLoc currentLoc = GATKVariantContextUtils.getLocation(getToolkit().getGenomeLocParser(), vc); StringBuffer beagleOut = new StringBuffer(); String marker = String.format("%s:%d ",currentLoc.getContig(), currentLoc.getStart()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index 7ce98cf1d..7692c58e2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -38,14 +38,14 @@ import org.broadinstitute.sting.gatk.filters.*; import org.broadinstitute.sting.gatk.iterators.ReadTransformer; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.GATKLiteUtils; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.recalibration.*; import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; @@ -53,7 +53,6 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import java.io.File; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintStream; import java.lang.reflect.Constructor; @@ -137,6 +136,7 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche private BAQ baq; // BAQ the reads on the fly to generate the alignment uncertainty vector private IndexedFastaSequenceFile referenceReader; // fasta reference reader for use with BAQ calculation + private final static byte NO_BAQ_UNCERTAINTY = (byte)'@'; /** * Parse the -cov arguments and create a list of covariates to be used here @@ -194,14 +194,7 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche recalibrationEngine.initialize(requestedCovariates, recalibrationTables); minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN; - - try { - // fasta reference reader for use with BAQ calculation - referenceReader = new CachingIndexedFastaSequenceFile(getToolkit().getArguments().referenceFile); - } catch( FileNotFoundException e ) { - throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e); - } - + referenceReader = getToolkit().getReferenceDataSource().getReference(); } private RecalibrationEngine initializeRecalibrationEngine() { @@ -234,25 +227,48 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche if (!RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) { // parse the solid color space and check for color no-calls return 0L; // skip this read completely } - read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates)); - final boolean[] skip = calculateSkipArray(read, metaDataTracker); // skip known sites of variation as well as low quality and non-regular bases final int[] isSNP = calculateIsSNP(read, ref, originalRead); final int[] isInsertion = calculateIsIndel(read, EventType.BASE_INSERTION); final int[] isDeletion = calculateIsIndel(read, EventType.BASE_DELETION); - final byte[] baqArray = calculateBAQArray(read); + final int nErrors = nEvents(isSNP, isInsertion, isDeletion); + + // note for efficiency regions we don't compute the BAQ array unless we actually have + // some error to marginalize over. For ILMN data ~85% of reads have no error + final byte[] baqArray = nErrors == 0 ? flatBAQArray(read) : calculateBAQArray(read); if( baqArray != null ) { // some reads just can't be BAQ'ed + final ReadCovariates covariates = RecalUtils.computeCovariates(read, requestedCovariates); + final boolean[] skip = calculateSkipArray(read, metaDataTracker); // skip known sites of variation as well as low quality and non-regular bases final double[] snpErrors = calculateFractionalErrorArray(isSNP, baqArray); final double[] insertionErrors = calculateFractionalErrorArray(isInsertion, baqArray); final double[] deletionErrors = calculateFractionalErrorArray(isDeletion, baqArray); - recalibrationEngine.updateDataForRead(read, skip, snpErrors, insertionErrors, deletionErrors); + + // aggregate all of the info into our info object, and update the data + final ReadRecalibrationInfo info = new ReadRecalibrationInfo(read, covariates, skip, snpErrors, insertionErrors, deletionErrors); + recalibrationEngine.updateDataForRead(info); return 1L; } else { return 0L; } } + /** + * Compute the number of mutational events across all hasEvent vectors + * + * Simply the sum of entries in hasEvents + * + * @param hasEvents a vector a vectors of 0 (no event) and 1 (has event) + * @return the total number of events across all hasEvent arrays + */ + private int nEvents(final int[]... hasEvents) { + int n = 0; + for ( final int[] hasEvent : hasEvents ) { + n += MathUtils.sum(hasEvent); + } + return n; + } + protected boolean[] calculateSkipArray( final GATKSAMRecord read, final RefMetaDataTracker metaDataTracker ) { final byte[] bases = read.getReadBases(); final boolean[] skip = new boolean[bases.length]; @@ -380,7 +396,6 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche throw new ReviewedStingException("Array length mismatch detected. Malformed read?"); } - final byte NO_BAQ_UNCERTAINTY = (byte)'@'; final int BLOCK_START_UNSET = -1; final double[] fractionalErrors = new double[baqArray.length]; @@ -424,6 +439,23 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche } } + /** + * Create a BAQ style array that indicates no alignment uncertainty + * @param read the read for which we want a BAQ array + * @return a BAQ-style non-null byte[] counting NO_BAQ_UNCERTAINTY values + * // TODO -- could be optimized avoiding this function entirely by using this inline if the calculation code above + */ + private byte[] flatBAQArray(final GATKSAMRecord read) { + final byte[] baq = new byte[read.getReadLength()]; + Arrays.fill(baq, NO_BAQ_UNCERTAINTY); + return baq; + } + + /** + * Compute an actual BAQ array for read, based on its quals and the reference sequence + * @param read the read to BAQ + * @return a non-null BAQ tag array for read + */ private byte[] calculateBAQArray( final GATKSAMRecord read ) { baq.baqRead(read, referenceReader, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.ADD_TAG); return BAQ.getBAQTag(read); @@ -452,6 +484,8 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche @Override public void onTraversalDone(Long result) { + recalibrationEngine.finalizeData(); + logger.info("Calculating quantized quality scores..."); quantizeQualityScores(); @@ -487,6 +521,6 @@ public class BaseRecalibrator extends ReadWalker implements NanoSche } private void generateReport() { - RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates); + RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, RAC.SORT_BY_ALL_COLUMNS); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java new file mode 100644 index 000000000..121e3449b --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ReadRecalibrationInfo.java @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.bqsr; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.recalibration.EventType; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +/** + * Created with IntelliJ IDEA. + * User: depristo + * Date: 12/18/12 + * Time: 3:50 PM + * + * TODO -- merge in ReadCovariates? + */ +public final class ReadRecalibrationInfo { + private final GATKSAMRecord read; + private final int length; + private final ReadCovariates covariates; + private final boolean[] skips; + private final byte[] baseQuals, insertionQuals, deletionQuals; + private final double[] snpErrors, insertionErrors, deletionErrors; + + public ReadRecalibrationInfo(final GATKSAMRecord read, + final ReadCovariates covariates, + final boolean[] skips, + final double[] snpErrors, + final double[] insertionErrors, + final double[] deletionErrors) { + if ( read == null ) throw new IllegalArgumentException("read cannot be null"); + if ( covariates == null ) throw new IllegalArgumentException("covariates cannot be null"); + if ( skips == null ) throw new IllegalArgumentException("skips cannot be null"); + if ( snpErrors == null ) throw new IllegalArgumentException("snpErrors cannot be null"); + // future: may allow insertionErrors && deletionErrors to be null, so don't enforce + + this.read = read; + this.baseQuals = read.getBaseQualities(); + this.length = baseQuals.length; + this.covariates = covariates; + this.skips = skips; + this.insertionQuals = read.getExistingBaseInsertionQualities(); + this.deletionQuals = read.getExistingBaseDeletionQualities(); + this.snpErrors = snpErrors; + this.insertionErrors = insertionErrors; + this.deletionErrors = deletionErrors; + + if ( skips.length != length ) throw new IllegalArgumentException("skips.length " + snpErrors.length + " != length " + length); + if ( snpErrors.length != length ) throw new IllegalArgumentException("snpErrors.length " + snpErrors.length + " != length " + length); + if ( insertionErrors != null && insertionErrors.length != length ) throw new IllegalArgumentException("insertionErrors.length " + snpErrors.length + " != length " + length); + if ( deletionErrors != null && deletionErrors.length != length ) throw new IllegalArgumentException("deletionErrors.length " + snpErrors.length + " != length " + length); + } + + /** + * Get the qual score for event type at offset + * + * @param eventType the type of event we want the qual for + * @param offset the offset into this read for the qual + * @return a valid quality score for event at offset + */ + @Requires("validOffset(offset)") + @Ensures("validQual(result)") + public byte getQual(final EventType eventType, final int offset) { + switch ( eventType ) { + case BASE_SUBSTITUTION: return baseQuals[offset]; + // note optimization here -- if we don't have ins/del quals we just return the default byte directly + case BASE_INSERTION: return insertionQuals == null ? GATKSAMRecord.DEFAULT_INSERTION_DELETION_QUAL : insertionQuals[offset]; + case BASE_DELETION: return deletionQuals == null ? GATKSAMRecord.DEFAULT_INSERTION_DELETION_QUAL : deletionQuals[offset]; + default: throw new IllegalStateException("Unknown event type " + eventType); + } + } + + /** + * Get the error fraction for event type at offset + * + * The error fraction is a value between 0 and 1 that indicates how much certainty we have + * in the error occurring at offset. A value of 1 means that the error definitely occurs at this + * site, a value of 0.0 means it definitely doesn't happen here. 0.5 means that half the weight + * of the error belongs here + * + * @param eventType the type of event we want the qual for + * @param offset the offset into this read for the qual + * @return a fractional weight for an error at this offset + */ + @Requires("validOffset(offset)") + @Ensures("result >= 0.0 && result <= 1.0") + public double getErrorFraction(final EventType eventType, final int offset) { + switch ( eventType ) { + case BASE_SUBSTITUTION: return snpErrors[offset]; + case BASE_INSERTION: return insertionErrors[offset]; + case BASE_DELETION: return deletionErrors[offset]; + default: throw new IllegalStateException("Unknown event type " + eventType); + } + } + + /** + * Get the read involved in this recalibration info + * @return a non-null GATKSAMRecord + */ + @Ensures("result != null") + public GATKSAMRecord getRead() { + return read; + } + + /** + * Should offset in this read be skipped (because it's covered by a known variation site?) + * @param offset a valid offset into this info + * @return true if offset should be skipped, false otherwise + */ + @Requires("validOffset(offset)") + public boolean skip(final int offset) { + return skips[offset]; + } + + /** + * Get the ReadCovariates object carrying the mapping from offsets -> covariate key sets + * @return a non-null ReadCovariates object + */ + @Ensures("result != null") + public ReadCovariates getCovariatesValues() { + return covariates; + } + + /** + * Ensures an offset is valid. Used in contracts + * @param offset a proposed offset + * @return true if offset is valid w.r.t. the data in this object, false otherwise + */ + private boolean validOffset(final int offset) { + return offset >= 0 && offset < baseQuals.length; + } + + private boolean validQual(final byte result) { + return result >= 0 && result <= QualityUtils.MAX_QUAL_SCORE; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index c64482151..2f0f976fa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -180,6 +180,11 @@ public class RecalibrationArgumentCollection { @Argument(fullName = "binary_tag_name", shortName = "bintag", required = false, doc = "the binary tag covariate name if using it") public String BINARY_TAG_NAME = null; + /* + * whether GATK report tables should have rows in sorted order, starting from leftmost column + */ + @Argument(fullName = "sort_by_all_columns", shortName = "sortAllCols", doc = "Sort the rows in the tables of reports", required = false) + public Boolean SORT_BY_ALL_COLUMNS = false; ///////////////////////////// // Debugging-only Arguments @@ -200,7 +205,12 @@ public class RecalibrationArgumentCollection { public File existingRecalibrationReport = null; public GATKReportTable generateReportTable(final String covariateNames) { - GATKReportTable argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2); + GATKReportTable argumentsTable; + if(SORT_BY_ALL_COLUMNS) { + argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, false, true); + } else { + argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2); + } argumentsTable.addColumn("Argument"); argumentsTable.addColumn(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME); argumentsTable.addRowID("covariate", true); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java index 962d62d5e..5c002b7e5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; -import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; -import org.broadinstitute.sting.utils.pileup.PileupElement; +import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /* @@ -30,8 +30,31 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; * OTHER DEALINGS IN THE SOFTWARE. */ public interface RecalibrationEngine { - + /** + * Initialize the recalibration engine + * + * Called once before any calls to updateDataForRead are made. The engine should prepare itself + * to handle any number of updateDataForRead calls containing ReadRecalibrationInfo containing + * keys for each of the covariates provided. + * + * The engine should collect match and mismatch data into the recalibrationTables data. + * + * @param covariates an array of the covariates we'll be using in this engine, order matters + * @param recalibrationTables the destination recalibrationTables where stats should be collected + */ public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables); - public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors); + /** + * Update the recalibration statistics using the information in recalInfo + * @param recalInfo data structure holding information about the recalibration values for a single read + */ + @Requires("recalInfo != null") + public void updateDataForRead(final ReadRecalibrationInfo recalInfo); + + /** + * Finalize, if appropriate, all derived data in recalibrationTables. + * + * Called once after all calls to updateDataForRead have been issued. + */ + public void finalizeData(); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java index 6031aa955..a6ab98e8b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java @@ -25,48 +25,47 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; * OTHER DEALINGS IN THE SOFTWARE. */ -import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; -import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.classloader.PublicPackageSource; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; -import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.recalibration.ReadCovariates; import org.broadinstitute.sting.utils.recalibration.RecalDatum; import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; +import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; public class StandardRecalibrationEngine implements RecalibrationEngine, PublicPackageSource { - protected Covariate[] covariates; protected RecalibrationTables recalibrationTables; + @Override public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) { + if ( covariates == null ) throw new IllegalArgumentException("Covariates cannot be null"); + if ( recalibrationTables == null ) throw new IllegalArgumentException("recalibrationTables cannot be null"); + this.covariates = covariates.clone(); this.recalibrationTables = recalibrationTables; } @Override - public void updateDataForRead( final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) { + public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) { + final GATKSAMRecord read = recalInfo.getRead(); + final EventType eventType = EventType.BASE_SUBSTITUTION; + final ReadCovariates readCovariates = recalInfo.getCovariatesValues(); + for( int offset = 0; offset < read.getReadBases().length; offset++ ) { - if( !skip[offset] ) { - final ReadCovariates readCovariates = covariateKeySetFrom(read); + if( ! recalInfo.skip(offset) ) { + final byte qual = recalInfo.getQual(eventType, offset); + final double isError = recalInfo.getErrorFraction(eventType, offset); + final int[] keys = readCovariates.getKeySet(offset, eventType); - final byte qual = read.getBaseQualities()[offset]; - final double isError = snpErrors[offset]; - - final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION); - final int eventIndex = EventType.BASE_SUBSTITUTION.index; - - combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex); - - incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex); + incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventType.index); for (int i = 2; i < covariates.length; i++) { if (keys[i] < 0) continue; - incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex); + incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventType.index); } } } @@ -84,13 +83,31 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP } /** - * Get the covariate key set from a read + * Create derived recalibration data tables * - * @param read the read - * @return the covariate keysets for this read + * Assumes that all of the principal tables (by quality score) have been completely updated, + * and walks over this data to create summary data tables like by read group table. */ - protected ReadCovariates covariateKeySetFrom(GATKSAMRecord read) { - return (ReadCovariates) read.getTemporaryAttribute(BaseRecalibrator.COVARS_ATTRIBUTE); + @Override + public void finalizeData() { + final NestedIntegerArray byReadGroupTable = recalibrationTables.getReadGroupTable(); + final NestedIntegerArray byQualTable = recalibrationTables.getQualityScoreTable(); + + // iterate over all values in the qual table + for ( NestedIntegerArray.Leaf leaf : byQualTable.getAllLeaves() ) { + final int rgKey = leaf.keys[0]; + final int eventIndex = leaf.keys[2]; + final RecalDatum rgDatum = byReadGroupTable.get(rgKey, eventIndex); + final RecalDatum qualDatum = leaf.value; + + if ( rgDatum == null ) { + // create a copy of qualDatum, and initialize byReadGroup table with it + byReadGroupTable.put(new RecalDatum(qualDatum), rgKey, eventIndex); + } else { + // combine the qual datum with the existing datum in the byReadGroup table + rgDatum.combine(qualDatum); + } + } } /** @@ -105,7 +122,10 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP * @param isError error value for this event * @param keys location in table of our item */ - protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray table, final byte qual, final double isError, final int... keys ) { + protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray table, + final byte qual, + final double isError, + final int... keys ) { final RecalDatum existingDatum = table.get(keys); if ( existingDatum == null ) { @@ -121,34 +141,4 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP existingDatum.increment(1.0, isError); } } - - /** - * Combines the RecalDatum at the specified position in the specified table with a new RecalDatum, or put a - * new item there if there isn't already one. - * - * Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put() - * to return false if another thread inserts a new item at our position in the middle of our put operation. - * - * @param table the table that holds/will hold our item - * @param qual qual for this event - * @param isError error value for this event - * @param keys location in table of our item - */ - protected void combineDatumOrPutIfNecessary( final NestedIntegerArray table, final byte qual, final double isError, final int... keys ) { - final RecalDatum existingDatum = table.get(keys); - final RecalDatum newDatum = createDatumObject(qual, isError); - - if ( existingDatum == null ) { - // No existing item, try to put a new one - if ( ! table.put(newDatum, keys) ) { - // Failed to put a new item because another thread came along and put an item here first. - // Get the newly-put item and combine it with our item (item is guaranteed to exist at this point) - table.get(keys).combine(newDatum); - } - } - else { - // Easy case: already an item here, so combine it with our item - existingDatum.combine(newDatum); - } - } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java index 48019efea..68084e606 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java @@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.variant.utils.BaseUtils; import java.io.File; import java.io.FileNotFoundException; @@ -305,7 +306,7 @@ public class CallableLoci extends LocusWalker { public void initialize() { report = new GATKReport(); - report.addTable(reportName, reportDescription, 6, true); + report.addTable(reportName, reportDescription, 6, true, false); table = report.getTable(reportName); table.addColumn("readgroup"); table.addColumn("cycle"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java index cbd3bc950..095807e5c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java @@ -34,12 +34,12 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.variant.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.*; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/ThresHolder.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/ThresHolder.java index 0d8195551..9ebb34585 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/ThresHolder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/ThresHolder.java @@ -24,7 +24,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.util.HashSet; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java index 5726eeed9..fe95112e7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -28,9 +28,9 @@ import org.apache.log4j.Logger; import org.broad.tribble.AbstractFeatureReader; import org.broad.tribble.FeatureReader; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.*; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.*; import java.util.Iterator; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java index 22c6097cf..ca605bdad 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java @@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Collections; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java index 6beade070..0f9b79930 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java @@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RefWalker; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java index 2c009f7f2..a130b8993 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.filters; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; public class ClusteredSnps { private GenomeLocParser genomeLocParser; @@ -32,7 +32,7 @@ public class ClusteredSnps { throw new UserException.BadInput("The clustered SNPs filter does not work in the presence of non-variant records; see the documentation for more details"); // find the nth variant - GenomeLoc left = VariantContextUtils.getLocation(genomeLocParser,variants[i].getVariantContext()); + GenomeLoc left = GATKVariantContextUtils.getLocation(genomeLocParser, variants[i].getVariantContext()); GenomeLoc right = null; int snpsSeen = 1; @@ -40,7 +40,7 @@ public class ClusteredSnps { while ( ++currentIndex < variants.length ) { if ( variants[currentIndex] != null && variants[currentIndex].getVariantContext() != null && variants[currentIndex].getVariantContext().isVariant() ) { if ( ++snpsSeen == snpThreshold ) { - right = VariantContextUtils.getLocation(genomeLocParser,variants[currentIndex].getVariantContext()); + right = GATKVariantContextUtils.getLocation(genomeLocParser, variants[currentIndex].getVariantContext()); break; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java index ede19746a..676cef733 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java @@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.filters; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; public class FiltrationContext { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java index 8b3886cfb..3e0bf07e4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java @@ -37,11 +37,12 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.*; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import java.util.*; @@ -170,7 +171,7 @@ public class VariantFiltration extends RodWalker { // setup the header fields Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames)); + hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames)); if ( clusterWindow > 0 ) hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters")); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java index 7dcc95361..751418a61 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java @@ -36,7 +36,7 @@ import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import java.util.*; @@ -283,7 +283,7 @@ public class ConsensusAlleleCounter { if (vcs.isEmpty()) return Collections.emptyList(); // nothing else to do, no alleles passed minimum count criterion - final VariantContext mergedVC = VariantContextUtils.simpleMerge(locParser, vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false); + final VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false); return mergedVC.getAlleles(); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java index 95b81e322..86047d30b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; public enum DiploidGenotype { AA ('A', 'A'), diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java index 2870b6629..290c33b2b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java @@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMUtils; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index a8ee4afde..ea09e17d8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -30,13 +30,13 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.List; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 0d9f443e2..56965c022 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -31,14 +31,14 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 791cdc325..c10f7264e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -30,7 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; @@ -40,7 +40,7 @@ import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 5f6ddf0f1..e7add3213 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -28,8 +28,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection; import org.broadinstitute.sting.utils.pairhmm.PairHMM; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.variantcontext.VariantContext; public class UnifiedArgumentCollection extends StandardCallerArgumentCollection { @@ -151,7 +151,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection Sample ploidy - equivalent to number of chromosomes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy */ @Argument(shortName="ploidy", fullName="sample_ploidy", doc="Plody (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required=false) - public int samplePloidy = VariantContextUtils.DEFAULT_PLOIDY; + public int samplePloidy = GATKVariantContextUtils.DEFAULT_PLOIDY; @Hidden @Argument(shortName="minqs", fullName="min_quality_score", doc="Min quality score to consider. Smaller numbers process faster. Default: Q1.", required=false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 36be2e7c6..7b82403b5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -42,13 +42,13 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.GATKLiteUtils; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.GenotypeLikelihoods; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import java.io.PrintStream; import java.util.*; @@ -232,7 +232,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif // Check for protected modes if (GATKLiteUtils.isGATKLite()) { // no polyploid/pooled mode in GATK Like - if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY || + if (UAC.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY || UAC.referenceSampleName != null || UAC.referenceSampleRod.isBound()) { throw new UserException.NotSupportedInGATKLite("you cannot enable usage of ploidy values other than 2"); @@ -303,7 +303,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif headerInfo.add(new VCFInfoHeaderLine(UnifiedGenotyperEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY, 1, VCFHeaderLineType.Integer, "Number of alternate alleles discovered (but not necessarily genotyped) at this site")); // add the pool values for each genotype - if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) { + if (UAC.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY) { headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample")); headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample")); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 8f2588679..1d5671857 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -40,11 +40,13 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import java.io.PrintStream; import java.lang.reflect.Constructor; @@ -112,7 +114,7 @@ public class UnifiedGenotyperEngine { // --------------------------------------------------------------------------------------------------------- @Requires({"toolkit != null", "UAC != null"}) public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) { - this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), VariantContextUtils.DEFAULT_PLOIDY); + this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), GATKVariantContextUtils.DEFAULT_PLOIDY); } @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","ploidy>0"}) @@ -503,7 +505,7 @@ public class UnifiedGenotyperEngine { // if we are subsetting alleles (either because there were too many or because some were not polymorphic) // then we may need to trim the alleles (because the original VariantContext may have had to pad at the end). if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // limitedContext callers need to handle allele trimming on their own to keep their perReadAlleleLikelihoodMap alleles in sync - vcCall = VariantContextUtils.reverseTrimAlleles(vcCall); + vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall); if ( annotationEngine != null && !limitedContext ) { // limitedContext callers need to handle annotations on their own by calling their own annotationEngine // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations @@ -640,7 +642,7 @@ public class UnifiedGenotyperEngine { private void determineGLModelsToUse() { String modelPrefix = ""; - if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY ) + if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY ) modelPrefix = GPSTRING; if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/VariantCallContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/VariantCallContext.java index 423c80112..a30ae0d34 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/VariantCallContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/VariantCallContext.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java index f783267bc..0e6c0762c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalc.java @@ -29,9 +29,9 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.SimpleTimer; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.GenotypesContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.File; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java index 142469077..bd46c4b84 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java @@ -29,7 +29,7 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Allele; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java index 4895c84d9..0c2e59d08 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java @@ -26,7 +26,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.variantcontext.*; import java.util.*; @@ -84,7 +85,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { alleles.add(vc.getReference()); alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles())); builder.alleles(alleles); - builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false)); + builder.genotypes(GATKVariantContextUtils.subsetDiploidAlleles(vc, alleles, false)); return builder.make(); } else { return vc; @@ -330,6 +331,6 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { final List allelesToUse, final boolean assignGenotypes, final int ploidy) { - return VariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes); + return GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalc.java index ab230d398..8e8a2d8ce 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalc.java @@ -26,10 +26,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.GenotypesContext; import java.util.ArrayList; @@ -71,7 +71,7 @@ abstract class ExactAFCalc extends AFCalc { if ( sample.hasLikelihoods() ) { double[] gls = sample.getLikelihoods().getAsVector(); - if ( MathUtils.sum(gls) < VariantContextUtils.SUM_GL_THRESH_NOCALL ) + if ( MathUtils.sum(gls) < GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) genotypeLikelihoods.add(gls); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java index b138ddf70..c9270a6a7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java @@ -4,7 +4,7 @@ import com.google.java.contract.Requires; import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import java.io.*; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java index 937ef2ffc..76017be4f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java @@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java index 67cc79646..f37e4e3d2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java @@ -2,8 +2,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Collections; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java index ad6361a3f..179b5d8ba 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Allele; import java.util.Arrays; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java index 3a10620aa..3c1bc338a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java @@ -30,8 +30,9 @@ import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Allele; import java.util.Arrays; import java.util.HashMap; @@ -421,7 +422,7 @@ public class HaplotypeIndelErrorModel { double[][] haplotypeLikehoodMatrix = new double[haplotypesInVC.size()][haplotypesInVC.size()]; double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()]; int i=0; - for (SAMRecord read : pileup.getReads()) { + for (GATKSAMRecord read : pileup.getReads()) { if(ReadUtils.is454Read(read)) { continue; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 345f79b2b..15d3f43fd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -54,7 +54,8 @@ import org.broadinstitute.sting.utils.sam.NWaySAMFileWriter; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.File; import java.io.FileNotFoundException; @@ -528,7 +529,7 @@ public class IndelRealigner extends ReadWalker { sawReadInCurrentInterval = false; } - private boolean doNotTryToClean(SAMRecord read) { + private boolean doNotTryToClean(GATKSAMRecord read) { return read.getReadUnmappedFlag() || read.getNotPrimaryAlignmentFlag() || read.getReadFailsVendorQualityCheckFlag() || @@ -834,7 +835,7 @@ public class IndelRealigner extends ReadWalker { // TODO -- get rid of this try block when Picard does the right thing for reads aligned off the end of the reference try { if ( read.getAttribute(SAMTag.NM.name()) != null ) - read.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(read, reference, leftmostIndex-1)); + read.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(read, reference, leftmostIndex - 1)); if ( read.getAttribute(SAMTag.UQ.name()) != null ) read.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(read, reference, leftmostIndex-1)); } catch (Exception e) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 848aaf8a3..15ced4f0b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Allele; import java.io.PrintStream; import java.util.Arrays; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index b14dc9cc9..ad97e3ddc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -41,7 +41,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.PrintStream; import java.util.ArrayList; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java index 0165c6cf3..68be1629c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java @@ -53,7 +53,7 @@ import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec; import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature; import org.broadinstitute.sting.utils.codecs.refseq.Transcript; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.collections.CircularArray; import org.broadinstitute.sting.utils.collections.PrimitivePair; import org.broadinstitute.sting.utils.exceptions.StingException; @@ -64,8 +64,8 @@ import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.interval.OverlappingIntervalIterator; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.variantcontext.*; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import java.io.*; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AllelePair.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AllelePair.java index c629bd313..985d0a9c9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AllelePair.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AllelePair.java @@ -24,8 +24,8 @@ package org.broadinstitute.sting.gatk.walkers.phasing; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Genotype; import java.util.ArrayList; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/BaseArray.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/BaseArray.java index 54838b55e..e131ea5e3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/BaseArray.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/BaseArray.java @@ -23,7 +23,7 @@ */ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import java.util.Arrays; import java.util.LinkedList; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java index 5bbc6dacc..5515f2141 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java @@ -28,12 +28,13 @@ import net.sf.picard.reference.ReferenceSequenceFile; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import java.io.File; import java.io.FileNotFoundException; @@ -124,7 +125,7 @@ class MergeSegregatingAlternateAllelesVCFWriter implements VariantContextWriter return; } - logger.debug("Next VC input = " + VariantContextUtils.getLocation(genomeLocParser, vc)); + logger.debug("Next VC input = " + GATKVariantContextUtils.getLocation(genomeLocParser, vc)); boolean curVcIsNotFiltered = vc.isNotFiltered(); if (vcfrWaitingToMerge == null) { @@ -134,20 +135,20 @@ class MergeSegregatingAlternateAllelesVCFWriter implements VariantContextWriter throw new ReviewedStingException("filteredVcfrList should be empty if not waiting to merge a vc!"); if (curVcIsNotFiltered) { // still need to wait before can release vc - logger.debug("Waiting for new variant " + VariantContextUtils.getLocation(genomeLocParser, vc)); + logger.debug("Waiting for new variant " + GATKVariantContextUtils.getLocation(genomeLocParser, vc)); vcfrWaitingToMerge = new VCFRecord(vc, false); } else if (!emitOnlyMergedRecords) { // filtered records are never merged - logger.debug("DIRECTLY output " + VariantContextUtils.getLocation(genomeLocParser, vc)); + logger.debug("DIRECTLY output " + GATKVariantContextUtils.getLocation(genomeLocParser, vc)); innerWriter.add(vc); } } else { // waiting to merge vcfrWaitingToMerge - logger.debug("Waiting to merge " + VariantContextUtils.getLocation(genomeLocParser, vcfrWaitingToMerge.vc)); + logger.debug("Waiting to merge " + GATKVariantContextUtils.getLocation(genomeLocParser, vcfrWaitingToMerge.vc)); if (!curVcIsNotFiltered) { if (!emitOnlyMergedRecords) { // filtered records are never merged - logger.debug("Caching unprocessed output " + VariantContextUtils.getLocation(genomeLocParser, vc)); + logger.debug("Caching unprocessed output " + GATKVariantContextUtils.getLocation(genomeLocParser, vc)); filteredVcfrList.add(new VCFRecord(vc, false)); } } @@ -345,10 +346,10 @@ class MergeSegregatingAlternateAllelesVCFWriter implements VariantContextWriter if (!PhasingUtils.alleleSegregationIsKnown(gt1, gt2)) { aas.segregationUnknown++; - logger.debug("Unknown segregation of alleles [not phased] for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser, vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser, vc2)); + logger.debug("Unknown segregation of alleles [not phased] for " + samp + " at " + GATKVariantContextUtils.getLocation(genomeLocParser, vc1) + ", " + GATKVariantContextUtils.getLocation(genomeLocParser, vc2)); } else if (gt1.isHomRef() || gt2.isHomRef()) { - logger.debug("gt1.isHomRef() || gt2.isHomRef() for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser, vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser, vc2)); + logger.debug("gt1.isHomRef() || gt2.isHomRef() for " + samp + " at " + GATKVariantContextUtils.getLocation(genomeLocParser, vc1) + ", " + GATKVariantContextUtils.getLocation(genomeLocParser, vc2)); aas.eitherNotVariant++; } else { // BOTH gt1 and gt2 have at least one variant allele (so either hets, or homozygous variant): @@ -377,7 +378,7 @@ class MergeSegregatingAlternateAllelesVCFWriter implements VariantContextWriter // Check MNPs vs. CHets: if (containsRefAllele(site1Alleles) && containsRefAllele(site2Alleles)) { - logger.debug("HET-HET for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser, vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser, vc2)); + logger.debug("HET-HET for " + samp + " at " + GATKVariantContextUtils.getLocation(genomeLocParser, vc1) + ", " + GATKVariantContextUtils.getLocation(genomeLocParser, vc2)); if (logger.isDebugEnabled() && !(gt1.isHet() && gt2.isHet())) throw new ReviewedStingException("Since !gt1.isHomRef() && !gt2.isHomRef(), yet both have ref alleles, they BOTH must be hets!"); @@ -463,7 +464,7 @@ class DistanceMergeRule extends VariantContextMergeRule { } public int minDistance(VariantContext vc1, VariantContext vc2) { - return VariantContextUtils.getLocation(genomeLocParser, vc1).minDistance(VariantContextUtils.getLocation(genomeLocParser, vc2)); + return GATKVariantContextUtils.getLocation(genomeLocParser, vc1).minDistance(GATKVariantContextUtils.getLocation(genomeLocParser, vc2)); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index 7ebfec49e..2b53dea43 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -12,11 +12,12 @@ import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import java.io.PrintStream; import java.util.*; @@ -396,7 +397,7 @@ public class PhaseByTransmission extends RodWalker, HashMa public void initialize() { ArrayList rodNames = new ArrayList(); rodNames.add(variantCollection.variants.getName()); - Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); + Map vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); //Get the trios from the families passed as ped @@ -406,7 +407,7 @@ public class PhaseByTransmission extends RodWalker, HashMa Set headerLines = new HashSet(); - headerLines.addAll(VCFUtils.getHeaderFields(this.getToolkit())); + headerLines.addAll(GATKVCFUtils.getHeaderFields(this.getToolkit())); headerLines.add(new VCFFormatHeaderLine(TRANSMISSION_PROBABILITY_TAG_NAME, 1, VCFHeaderLineType.Integer, "Phred score of the genotype combination and phase given that the genotypes are correct")); headerLines.add(new VCFHeaderLine("source", SOURCE_NAME)); vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples)); @@ -811,9 +812,9 @@ public class PhaseByTransmission extends RodWalker, HashMa updateTrioMetricsCounters(phasedMother,phasedFather,phasedChild,mvCount,metricsCounters); mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s", vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(), - phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()), - phasedMother.getLikelihoodsString(), phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(), - phasedChild.getGenotypeString(),Arrays.asList(phasedChild.getDP()),phasedChild.getAD(),phasedChild.getLikelihoodsString()); + phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),printAD(phasedMother.getAD()), + phasedMother.getLikelihoodsString(), phasedFather.getGenotypeString(),phasedFather.getDP(),printAD(phasedFather.getAD()),phasedFather.getLikelihoodsString(), + phasedChild.getGenotypeString(),phasedChild.getDP(),printAD(phasedChild.getAD()),phasedChild.getLikelihoodsString()); if(!(phasedMother.getType()==mother.getType() && phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType())) metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1); } @@ -823,8 +824,8 @@ public class PhaseByTransmission extends RodWalker, HashMa metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1); mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s:%s:%s:%s\t.\t.\t.\t.\t%s\t%s\t%s\t%s", vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(), - phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()),phasedMother.getLikelihoodsString(), - phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString()); + phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),printAD(phasedMother.getAD()),phasedMother.getLikelihoodsString(), + phasedChild.getGenotypeString(),phasedChild.getDP(),printAD(phasedChild.getAD()),phasedChild.getLikelihoodsString()); } } else{ @@ -834,8 +835,8 @@ public class PhaseByTransmission extends RodWalker, HashMa metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1); mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t.\t.\t.\t.\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s", vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(), - phasedFather.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(), - phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString()); + phasedFather.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getDP(),printAD(phasedFather.getAD()),phasedFather.getLikelihoodsString(), + phasedChild.getGenotypeString(),phasedChild.getDP(),printAD(phasedChild.getAD()),phasedChild.getLikelihoodsString()); } //Report violation if set so @@ -850,6 +851,18 @@ public class PhaseByTransmission extends RodWalker, HashMa return metricsCounters; } + private static String printAD(final int[] AD) { + if ( AD == null || AD.length == 0 ) + return "."; + final StringBuilder sb = new StringBuilder(); + sb.append(AD[0]); + for ( int i = 1; i < AD.length; i++) { + sb.append(","); + sb.append(AD[i]); + } + return sb.toString(); + } + /** * Initializes the reporting counters. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingRead.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingRead.java index a95b13d68..4d764866d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingRead.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingRead.java @@ -23,7 +23,7 @@ */ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java index 630d99ce9..5d819ccfd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java @@ -29,9 +29,10 @@ import net.sf.samtools.util.StringUtil; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import java.util.*; @@ -161,8 +162,8 @@ class PhasingUtils { } static boolean mergeIntoMNPvalidationCheck(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2) { - GenomeLoc loc1 = VariantContextUtils.getLocation(genomeLocParser, vc1); - GenomeLoc loc2 = VariantContextUtils.getLocation(genomeLocParser, vc2); + GenomeLoc loc1 = GATKVariantContextUtils.getLocation(genomeLocParser, vc1); + GenomeLoc loc2 = GATKVariantContextUtils.getLocation(genomeLocParser, vc2); if (!loc1.onSameContig(loc2)) throw new ReviewedStingException("Can only merge vc1, vc2 if on the same chromosome"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java index 7d848d0d4..68eab9889 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java @@ -34,24 +34,26 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.variantcontext.*; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; +import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory; import java.io.*; import java.util.*; -import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFromRods; +import static org.broadinstitute.sting.utils.variant.GATKVCFUtils.getVCFHeadersFromRods; /** * Walks along all variant ROD loci, caching a user-defined window of VariantContext sites, and then finishes phasing them when they go out of range (using upstream and downstream reads). @@ -212,7 +214,7 @@ public class ReadBackedPhasing extends RodWalker hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); + hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit())); hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); // Phasing-specific INFO fields: @@ -267,13 +269,13 @@ public class ReadBackedPhasing extends RodWalker header = VCFUtils.getVCFHeadersFromRodPrefix(getToolkit(), alleles.getName()); + Map header = GATKVCFUtils.getVCFHeadersFromRodPrefix(getToolkit(), alleles.getName()); samples = SampleUtils.getSampleList(header, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); Set headerLines = VCFUtils.smartMergeHeaders(header.values(), logger); headerLines.add(new VCFHeaderLine("source", "GenotypeAndValidate")); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java index 9d96dedef..aaab1af83 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -16,11 +16,11 @@ import org.broadinstitute.sting.utils.codecs.table.TableFeature; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.File; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/FrequencyModeSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/FrequencyModeSelector.java index 62305d3c0..6bd0cbc4b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/FrequencyModeSelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/FrequencyModeSelector.java @@ -24,12 +24,9 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.ArrayList; -import java.util.HashMap; public abstract class FrequencyModeSelector implements Cloneable{ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java index d71d0c9c8..6d7f2b672 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GLBasedSampleSelector.java @@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalc; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.TreeSet; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java index de832b108..c69325d00 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GTBasedSampleSelector.java @@ -24,12 +24,8 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; -import java.util.ArrayList; -import java.util.Map; -import java.util.Set; import java.util.TreeSet; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java index 67ddc47ff..c4c9b0e9d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/GenomeEvent.java @@ -27,9 +27,9 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; import java.util.HashMap; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java index 7c1d63f02..a259544ca 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/KeepAFSpectrumFrequencySelector.java @@ -26,9 +26,9 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.vcf.VCFConstants; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; import java.util.ArrayList; import java.util.Collections; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/NullSampleSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/NullSampleSelector.java index a48bcb8a1..0a3ae538a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/NullSampleSelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/NullSampleSelector.java @@ -24,7 +24,7 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.TreeSet; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/SampleSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/SampleSelector.java index afbff93d0..2632716a9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/SampleSelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/SampleSelector.java @@ -23,7 +23,7 @@ */ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.TreeSet; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java index 4019c5631..7d03e8edb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/UniformSamplingFrequencySelector.java @@ -25,9 +25,9 @@ package org.broadinstitute.sting.gatk.walkers.validation.validationsiteselector; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.vcf.VCFConstants; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; import java.util.ArrayList; import java.util.Collections; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java index 9e5fd25a9..8a3efb50d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java @@ -31,13 +31,13 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import java.io.File; import java.util.*; @@ -203,7 +203,7 @@ public class ValidationSiteSelector extends RodWalker { public void initialize() { // Get list of samples to include in the output - Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit()); + Map vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit()); TreeSet vcfSamples = new TreeSet(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); Collection samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java index 201028d99..dc0f8f3fe 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java @@ -23,15 +23,16 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatc import org.broadinstitute.sting.gatk.walkers.varianteval.util.VariantEvalUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; import java.io.File; import java.io.FileNotFoundException; @@ -172,7 +173,7 @@ public class VariantEval extends RodWalker implements TreeRedu protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50; @Argument(shortName="ploidy", fullName="samplePloidy", doc="Per-sample ploidy (number of chromosomes per sample)", required=false) - protected int ploidy = VariantContextUtils.DEFAULT_PLOIDY; + protected int ploidy = GATKVariantContextUtils.DEFAULT_PLOIDY; @Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false) private File ancestralAlignmentsFile = null; @@ -259,7 +260,7 @@ public class VariantEval extends RodWalker implements TreeRedu } // Now that we have all the rods categorized, determine the sample list from the eval rods. - Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), evals); + Map vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), evals); Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); // Load the sample list, using an intermediate tree set to sort the samples diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java index 97814075c..6af70811f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java @@ -162,7 +162,7 @@ public class VariantEvalReportWriter { // create the table final String tableName = ve.getSimpleName(); final String tableDesc = ve.getClass().getAnnotation(Analysis.class).description(); - report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()), true); + report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()), true, false); // grab the table, and add the columns we need to it final GATKReportTable table = report.getTable(tableName); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java index c14754715..be1e300e2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java @@ -5,8 +5,8 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; /** * The Broad Institute diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java index c7392cff0..3d5d5415c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java @@ -6,8 +6,8 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.VariantContext; @Analysis(description = "Counts different classes of variants in the sample") public class CountVariants extends VariantEvaluator implements StandardEval { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java index 15f791e9d..e51068431 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java @@ -7,9 +7,9 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Molten; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypeType; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.GenotypeType; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java index 0b17c7adb..2cfade223 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java @@ -30,8 +30,8 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Molten; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java index dda7e8611..fc2e205c6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java @@ -32,9 +32,9 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.VariantContext; @Analysis(description = "Evaluation summary for indels") public class IndelSummary extends VariantEvaluator implements StandardEval { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java index 97a8b4dda..755fd1b03 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java @@ -8,7 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEval; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.MendelianViolation; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Map; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java index 09c8687eb..f0b4dd90d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java @@ -31,9 +31,9 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; @Analysis(description = "Evaluation summary for multi-allelic variants") public class MultiallelicSummary extends VariantEvaluator implements StandardEval { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java index a0cb662e0..b1ec79586 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java @@ -29,7 +29,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; @Analysis(name = "PrintMissingComp", description = "the overlap between eval and comp sites") public class PrintMissingComp extends VariantEvaluator { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java index b87a8ee85..091eed217 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java @@ -5,9 +5,9 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java index fe2437976..f761ce973 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java @@ -5,9 +5,9 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; @Analysis(description = "Ti/Tv Variant Evaluator") public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEval { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java index a2bcdaf1d..323e67c75 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java @@ -5,10 +5,10 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Collection; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java index c08ff379b..162ca5df1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java @@ -5,7 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEval; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; public abstract class VariantEvaluator implements Comparable { private VariantEval walker; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java index 71ea6af98..151fb8fa5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java @@ -34,11 +34,11 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.sting.utils.interval.IntervalUtils; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/genotypePhasingEvaluator/GenotypePhasingEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/genotypePhasingEvaluator/GenotypePhasingEvaluator.java index 500ab8e65..9845318a0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/genotypePhasingEvaluator/GenotypePhasingEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/genotypePhasingEvaluator/GenotypePhasingEvaluator.java @@ -13,9 +13,9 @@ //import org.broadinstitute.sting.gatk.walkers.varianteval.util.EvaluationContext; //import org.broadinstitute.sting.utils.GenomeLoc; //import org.broadinstitute.sting.utils.MathUtils; -//import org.broadinstitute.sting.utils.variantcontext.Genotype; -//import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; -//import org.broadinstitute.sting.utils.variantcontext.VariantContext; +//import org.broadinstitute.variant.variantcontext.Genotype; +//import org.broadinstitute.variant.variantcontext.GenotypesContext; +//import org.broadinstitute.variant.variantcontext.VariantContext; // //import java.util.HashMap; //import java.util.HashSet; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java index 7197fc14c..51552d552 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -4,10 +4,10 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantSummary; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java index 817663026..f35b33c47 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java @@ -3,9 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; -import java.util.ArrayList; import java.util.Collections; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java index 1274028d7..425aca43c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java @@ -3,9 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; -import java.util.ArrayList; import java.util.Collections; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java index 328bab1db..b1b8d53a1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java @@ -2,9 +2,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java index 7536b0237..97aa45773 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.ArrayList; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java index eab59864f..ceefb8e43 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.ArrayList; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/DynamicStratification.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/DynamicStratification.java index 21255f7b3..d4646a3ce 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/DynamicStratification.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/DynamicStratification.java @@ -24,11 +24,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.List; -import java.util.Map; -import java.util.Set; +import org.broadinstitute.variant.variantcontext.VariantContext; /** * Tag this stratification as dynamically determining the final strat based on the input data diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java index 6328d6a51..5aa47a0a5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java @@ -3,9 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; -import java.util.ArrayList; import java.util.Arrays; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java index 278ced713..72e3e2838 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.ArrayList; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java index 330451fff..2ebe49228 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.ArrayList; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java index 089351eaa..6bb738f7e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java @@ -2,9 +2,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; -import java.util.ArrayList; import java.util.Collections; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IntervalStratification.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IntervalStratification.java index 4fc381b3f..bbb9ef9f4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IntervalStratification.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IntervalStratification.java @@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.interval.IntervalUtils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java index c89c4be66..d152b086e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java @@ -3,8 +3,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; import java.util.List; import java.util.ArrayList; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java index 2ad08d806..61bc3f0b4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/OneBPIndel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/OneBPIndel.java index 65633bc2b..7ffc3f56b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/OneBPIndel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/OneBPIndel.java @@ -26,10 +26,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; -import java.util.Collections; import java.util.List; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java index 621f4337f..62f5282c7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java @@ -4,7 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantSummary; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/TandemRepeat.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/TandemRepeat.java index 834c02b83..aaa8e685a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/TandemRepeat.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/TandemRepeat.java @@ -24,11 +24,10 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; -import org.broad.tribble.util.ParsingUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.List; @@ -51,7 +50,7 @@ public class TandemRepeat extends VariantStratifier { public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { if ( eval == null || ! eval.isIndel() ) return ALL; - else if ( VariantContextUtils.isTandemRepeat(eval, ref.getForwardBases()) ) { + else if ( GATKVariantContextUtils.isTandemRepeat(eval, ref.getForwardBases()) ) { print("REPEAT", eval, ref); return REPEAT; } else { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java index 90b6230ca..ffaa00b4a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java @@ -5,7 +5,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEval; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manager.Stratifier; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Collections; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java index 09b89386f..081d09ded 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java @@ -26,9 +26,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; -import java.util.Arrays; import java.util.Collections; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/EvaluationContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/EvaluationContext.java index c98c05a9a..07eb3ee7a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/EvaluationContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/EvaluationContext.java @@ -7,7 +7,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEval; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manager.StratificationManager; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Set; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java index 38f7a7f40..737c49e43 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util; import org.apache.commons.jexl2.Expression; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; public class SortableJexlVCMatchExp extends VariantContextUtils.JexlVCMatchExp implements Comparable { /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index e84b0b10e..3d22b7665 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -35,12 +35,12 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.Require import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.StandardStratification; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index 158d1e78a..dbe6ce9e1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -38,12 +38,13 @@ import org.broadinstitute.sting.gatk.walkers.PartitionType; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import java.io.File; import java.util.*; @@ -150,7 +151,7 @@ public class ApplyRecalibration extends RodWalker implements T // setup the header fields final Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames)); + hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames)); addVQSRStandardHeaderLines(hInfo); final TreeSet samples = new TreeSet(); samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java index 5f688d001..a177ce904 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java @@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.apache.log4j.Logger; import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.commandline.Tags; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; /** * Created by IntelliJ IDEA. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java index 04ba3ff14..361677e37 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java @@ -29,7 +29,7 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.File; import java.io.FileNotFoundException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index f18db412f..016ce7372 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -30,15 +30,15 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.help.HelpUtils; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index c670ad2fd..bdbcb8399 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -38,15 +38,15 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.R.RScriptExecutor; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.io.Resource; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import java.io.File; import java.io.FileNotFoundException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 68fac7631..eb671ff4a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -37,14 +37,15 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.utils.variantcontext.writer.Options; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.variantcontext.writer.Options; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import java.util.*; @@ -192,7 +193,7 @@ public class CombineVariants extends RodWalker implements Tree private Set samples; public void initialize() { - Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit()); + Map vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit()); if ( vcfWriter instanceof VariantContextWriterStub) { sitesOnlyVCF = ((VariantContextWriterStub)vcfWriter).getWriterOptions().contains(Options.DO_NOT_WRITE_GENOTYPES); @@ -289,13 +290,13 @@ public class CombineVariants extends RodWalker implements Tree // iterate over the types so that it's deterministic for (VariantContext.Type type : VariantContext.Type.values()) { if (VCsByType.containsKey(type)) - mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type), + mergedVCs.add(VariantContextUtils.simpleMerge(VCsByType.get(type), priority, rodNames.size() , filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); } } else if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.MIX_TYPES) { - mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcs, + mergedVCs.add(VariantContextUtils.simpleMerge(vcs, priority, rodNames.size(), filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index 92d6e686b..055e73b5a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -33,12 +33,12 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; @@ -62,7 +62,7 @@ public class FilterLiftedVariants extends RodWalker { public void initialize() { String trackName = variantCollection.variants.getName(); Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)); - Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); + Map vcfHeaders = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaDataInSortedOrder() : Collections.emptySet(), samples); writer.writeHeader(vcfHeader); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index 9fe499a03..fbb81fda0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -39,14 +39,14 @@ import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import org.broadinstitute.sting.utils.variantcontext.*; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; +import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory; import java.util.*; @@ -93,7 +93,7 @@ public class LeftAlignVariants extends RodWalker { public void initialize() { String trackName = variantCollection.variants.getName(); Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)); - Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); + Map vcfHeaders = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); Set headerLines = vcfHeaders.get(trackName).getMetaDataInSortedOrder(); baseWriter.writeHeader(new VCFHeader(headerLines, samples)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 63209e98c..0afa4e46e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -38,14 +38,15 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory; import java.io.File; import java.util.*; @@ -94,7 +95,7 @@ public class LiftoverVariants extends RodWalker { String trackName = variantCollection.variants.getName(); Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)); - Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); + Map vcfHeaders = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); Set metaData = new HashSet(); if ( vcfHeaders.containsKey(trackName) ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java index 2ab5136a7..2d8f9df8f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java @@ -35,14 +35,14 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory; import java.io.File; import java.util.*; @@ -78,7 +78,7 @@ public class RandomlySplitVariants extends RodWalker { final List inputNames = Arrays.asList(variantCollection.variants.getName()); Set samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames); Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames)); + hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames)); vcfWriter1.writeHeader(new VCFHeader(hInfo, samples)); vcfWriter2 = VariantContextWriterFactory.create(file2, getMasterSequenceDictionary()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java index 46a3a8cd1..f9453e7c4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java @@ -35,12 +35,13 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.text.ListFileUtils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; import java.io.File; import java.util.*; @@ -148,7 +149,7 @@ public class SelectHeaders extends RodWalker implements TreeRe // Get list of samples to include in the output List rodNames = Arrays.asList(variantCollection.variants.getName()); - Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); + Map vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); Set headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger); headerLines.add(new VCFHeaderLine(VCFHeader.SOURCE_KEY, "SelectHeaders")); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 9253446c8..cffd405b1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -41,12 +41,14 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.utils.variantcontext.*; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import java.io.File; import java.io.FileNotFoundException; @@ -349,7 +351,7 @@ public class SelectVariants extends RodWalker implements TreeR // Get list of samples to include in the output List rodNames = Arrays.asList(variantCollection.variants.getName()); - vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); + vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); TreeSet vcfSamples = new TreeSet(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); Collection samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles); @@ -451,7 +453,7 @@ public class SelectVariants extends RodWalker implements TreeR UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.BOTH; UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES; UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; - UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); + UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY); headerLines.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null)); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 3e6ab050a..8e42f803b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -34,12 +34,14 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.File; +import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.Set; @@ -149,7 +151,7 @@ public class ValidateVariants extends RodWalker { if ( tracker.hasValues(dbsnp.dbsnp) ) { rsIDs = new HashSet(); for ( VariantContext rsID : tracker.getValues(dbsnp.dbsnp, ref.getLocus()) ) - rsIDs.add(rsID.getID()); + rsIDs.addAll(Arrays.asList(rsID.getID().split(VCFConstants.ID_FIELD_SEPARATOR))); } try { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index 9236247f1..dfc9a8eea 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -34,13 +34,14 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; import java.util.*; @@ -164,7 +165,7 @@ public class VariantValidationAssessor extends RodWalker // setup the header fields Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames)); + hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames)); // set up the info and filter headers hInfo.add(new VCFInfoHeaderLine("NoCallPct", 1, VCFHeaderLineType.Float, "Percent of no-calls")); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java index 4777b807f..98fe6636c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broad.tribble.TribbleException; -import org.broadinstitute.sting.alignment.bwa.java.AlignmentMatchSequence; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; @@ -10,18 +9,16 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.Reference; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import java.io.*; import java.util.*; @@ -117,7 +114,7 @@ public class VariantsToBinaryPed extends RodWalker { // family ID, individual ID, Paternal ID, Maternal ID, Sex, Phenotype int dummyID = 0; // increments for dummy parental and family IDs used // want to be especially careful to maintain order here - Map headers = VCFUtils.getVCFHeadersFromRods(getToolkit()); + Map headers = GATKVCFUtils.getVCFHeadersFromRods(getToolkit()); for ( Map.Entry header : headers.entrySet() ) { if ( ! header.getKey().equals(variantCollection.variants.getName()) && ! metaDataFile.getAbsolutePath().endsWith(".fam") ) { continue; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index dd5264a1b..ff65197b1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -27,19 +27,19 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; import java.io.PrintStream; import java.lang.reflect.Array; @@ -178,7 +178,7 @@ public class VariantsToTable extends RodWalker { public void initialize() { if ( !genotypeFieldsToTake.isEmpty() ) { - Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), variants); + Map vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), variants); TreeSet vcfSamples = new TreeSet(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); samples.addAll(vcfSamples); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 059e9c5fb..7c44966d8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -39,16 +39,17 @@ import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.variantcontext.*; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; +import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory; import java.io.File; import java.util.*; @@ -212,7 +213,7 @@ public class VariantsToVCF extends RodWalker { // setup the header fields Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName()))); + hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName()))); hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY)); allowedGenotypeFormatStrings.add(VCFConstants.GENOTYPE_KEY); diff --git a/public/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibLsf.java b/public/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibLsf.java index 30b83abc2..688825cbc 100644 --- a/public/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibLsf.java +++ b/public/java/src/org/broadinstitute/sting/jna/lsf/v7_0_6/LibLsf.java @@ -1399,7 +1399,10 @@ public class LibLsf { public static native int ls_isPartialLicensingEnabled(); - public static native lsfLicUsage.ByReference ls_getLicenseUsage(); + /* NOTE: ls_getLicenseUsage() is not supported by LSF v8.x + * Wei Xing, ICR + */ +// public static native lsfLicUsage.ByReference ls_getLicenseUsage(); public static native lsInfo.ByReference ls_info(); diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java index 2226c6458..597a916f1 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java @@ -29,7 +29,7 @@ import net.sf.samtools.SAMFileWriter; import org.broad.tribble.Tribble; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import java.io.File; import java.lang.annotation.Annotation; diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java index 108a1c423..54eb67d8c 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java @@ -29,7 +29,7 @@ import net.sf.samtools.SAMFileWriter; import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringUtils; import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import java.io.File; import java.io.InputStream; diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java index bf60b4a80..dbffacfbc 100644 --- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java +++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java @@ -34,10 +34,10 @@ import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; import org.broad.tribble.Feature; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; /** * Factory class for creating GenomeLocs diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index 4c708f2bf..2476a666e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -24,15 +24,14 @@ package org.broadinstitute.sting.utils; -import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import net.sf.samtools.Cigar; import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.Serializable; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java b/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java index c6ca39f4b..8601f08b4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/IndelUtils.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.utils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.ArrayList; import java.util.Arrays; @@ -122,9 +122,9 @@ public class IndelUtils { ArrayList inds = new ArrayList(); if ( vc.isSimpleInsertion() ) { - indelAlleleString = vc.getAlternateAllele(0).getDisplayString(); + indelAlleleString = vc.getAlternateAllele(0).getDisplayString().substring(1); } else if ( vc.isSimpleDeletion() ) { - indelAlleleString = vc.getReference().getDisplayString(); + indelAlleleString = vc.getReference().getDisplayString().substring(1); } else { inds.add(IND_FOR_OTHER_EVENT); diff --git a/public/java/src/org/broadinstitute/sting/utils/LRUCache.java b/public/java/src/org/broadinstitute/sting/utils/LRUCache.java new file mode 100644 index 000000000..a3514c95f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/LRUCache.java @@ -0,0 +1,20 @@ +package org.broadinstitute.sting.utils; + +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * An LRU cache implemented as an extension to LinkedHashMap + */ +public class LRUCache extends LinkedHashMap { + private int capacity; // Maximum number of items in the cache. + + public LRUCache(int capacity) { + super(capacity+1, 1.0f, true); // Pass 'true' for accessOrder. + this.capacity = capacity; + } + + protected boolean removeEldestEntry(final Map.Entry entry) { + return (size() > this.capacity); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java index a605a5596..ed55d2f97 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java +++ b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.utils; import org.broadinstitute.sting.gatk.samples.Sample; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypeType; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.GenotypeType; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java b/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java index 504704e55..847d8067c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java +++ b/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java @@ -24,8 +24,7 @@ package org.broadinstitute.sting.utils; -import net.sf.samtools.SAMReadGroupRecord; -import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /** * A canonical, master list of the standard NGS platforms. These values @@ -64,25 +63,15 @@ public enum NGSPlatform { } /** - * Convenience constructor -- calculates the NGSPlatfrom from a SAMRecord. - * Note you should not use this function if you have a GATKSAMRecord -- use the - * accessor method instead. + * Convenience get -- get the NGSPlatfrom from a SAMRecord. * - * @param read + * Just gets the platform from the GATKReadGroupRecord associated with this read. + * + * @param read a GATKSAMRecord * @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match */ - public static final NGSPlatform fromRead(SAMRecord read) { - return fromReadGroup(read.getReadGroup()); - } - - /** - * Returns the NGSPlatform corresponding to the PL tag in the read group - * @param rg - * @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match - */ - public static final NGSPlatform fromReadGroup(SAMReadGroupRecord rg) { - if ( rg == null ) return UNKNOWN; - return fromReadGroupPL(rg.getPlatform()); + public static NGSPlatform fromRead(GATKSAMRecord read) { + return read.getReadGroup().getNGSPlatform(); } /** @@ -90,7 +79,7 @@ public enum NGSPlatform { * @param plFromRG -- the PL field (or equivalent) in a ReadGroup object * @return an NGSPlatform object matching the PL field of the header, or UNKNOWN if there was no match */ - public static final NGSPlatform fromReadGroupPL(final String plFromRG) { + public static NGSPlatform fromReadGroupPL(final String plFromRG) { if ( plFromRG == null ) return UNKNOWN; // todo -- algorithm could be implemented more efficiently, as the list of all @@ -113,7 +102,7 @@ public enum NGSPlatform { * @param platform the read group string that describes the platform used * @return true if the platform is known (i.e. it's in the list and is not UNKNOWN) */ - public static final boolean isKnown (final String platform) { + public static final boolean isKnown(final String platform) { return fromReadGroupPL(platform) != UNKNOWN; } } diff --git a/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java b/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java index 26c95bffd..2ccc145f9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java @@ -28,12 +28,12 @@ package org.broadinstitute.sting.utils; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.text.ListFileUtils; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.variant.variantcontext.VariantContextUtils; import java.io.File; import java.io.FileNotFoundException; @@ -102,18 +102,18 @@ public class SampleUtils { public static Set getUniqueSamplesFromRods(GenomeAnalysisEngine toolkit, Collection rodNames) { Set samples = new LinkedHashSet(); - for ( VCFHeader header : VCFUtils.getVCFHeadersFromRods(toolkit, rodNames).values() ) + for ( VCFHeader header : GATKVCFUtils.getVCFHeadersFromRods(toolkit, rodNames).values() ) samples.addAll(header.getGenotypeSamples()); return samples; } public static Set getRodNamesWithVCFHeader(GenomeAnalysisEngine toolkit, Collection rodNames) { - return VCFUtils.getVCFHeadersFromRods(toolkit, rodNames).keySet(); + return GATKVCFUtils.getVCFHeadersFromRods(toolkit, rodNames).keySet(); } public static Set getSampleListWithVCFHeader(GenomeAnalysisEngine toolkit, Collection rodNames) { - return getSampleList(VCFUtils.getVCFHeadersFromRods(toolkit, rodNames)); + return getSampleList(GATKVCFUtils.getVCFHeadersFromRods(toolkit, rodNames)); } public static Set getSampleList(Map headers) { @@ -149,7 +149,7 @@ public class SampleUtils { // iterate to get all of the sample names - for ( Map.Entry pair : VCFUtils.getVCFHeadersFromRods(toolkit).entrySet() ) { + for ( Map.Entry pair : GATKVCFUtils.getVCFHeadersFromRods(toolkit).entrySet() ) { for ( String sample : pair.getValue().getGenotypeSamples() ) addUniqueSample(samples, sampleOverlapMap, rodNamesToSampleNames, sample, pair.getKey()); } diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java index 3966434c0..51753ecef 100644 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java +++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java @@ -6,6 +6,7 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMUtils; +import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -37,6 +38,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils; state[i] being wrong. */ public class BAQ { + private final static Logger logger = Logger.getLogger(BAQ.class); private final static boolean DEBUG = false; public enum CalculationMode { @@ -179,8 +181,7 @@ public class BAQ { /*** initialization ***/ // change coordinates - int l_ref = ref.length; - + final int l_ref = ref.length; // set band width int bw2, bw = l_ref > l_query? l_ref : l_query; @@ -266,26 +267,6 @@ public class BAQ { s[l_query+1] = sum; // the last scaling factor } - //gdbebug+ -/* - double cac=0.; - // undo scaling of forward probabilities to obtain plain probability of observation given model - double[] su = new double[f[l_query].length]; - { - double sum = 0.; - double[] logs = new double[s.length]; - for (k=0; k < logs.length; k++) { - logs[k] = Math.log10(s[k]); - sum += logs[k]; - } - for (k=0; k < f[l_query].length; k++) - su[k]= Math.log10(f[l_query][k])+ sum; - - cac = MathUtils.softMax(su); - } - System.out.format("s:%f\n",cac); - // gdebug- - */ /*** backward ***/ // b[l_query] (b[l_query+1][0]=1 and thus \tilde{b}[][]=1/s[l_query+1]; this is where s[l_query+1] comes from) for (k = 1; k <= l_ref; ++k) { @@ -305,8 +286,8 @@ public class BAQ { for (k = end; k >= beg; --k) { int u, v11, v01, v10; u = set_u(bw, i, k); v11 = set_u(bw, i+1, k+1); v10 = set_u(bw, i+1, k); v01 = set_u(bw, i, k+1); - double e = (k >= l_ref? 0 : calcEpsilon(ref[k], qyi1, _iqual[qstart+i])) * bi1[v11]; - bi[u+0] = e * m[0] + EI * m[1] * bi1[v10+1] + m[2] * bi[v01+2]; // bi1[v11] has been foled into e. + final double e = (k >= l_ref? 0 : calcEpsilon(ref[k], qyi1, _iqual[qstart+i])) * bi1[v11]; + bi[u+0] = e * m[0] + EI * m[1] * bi1[v10+1] + m[2] * bi[v01+2]; // bi1[v11] has been folded into e. bi[u+1] = e * m[3] + EI * m[4] * bi1[v10+1]; bi[u+2] = (e * m[6] + m[8] * bi[v01+2]) * y; } @@ -332,12 +313,12 @@ public class BAQ { /*** MAP ***/ for (i = 1; i <= l_query; ++i) { double sum = 0., max = 0.; - double[] fi = f[i], bi = b[i]; + final double[] fi = f[i], bi = b[i]; int beg = 1, end = l_ref, x, max_k = -1; x = i - bw; beg = beg > x? beg : x; x = i + bw; end = end < x? end : x; for (k = beg; k <= end; ++k) { - int u = set_u(bw, i, k); + final int u = set_u(bw, i, k); double z; sum += (z = fi[u+0] * bi[u+0]); if (z > max) { max = z; max_k = (k-1)<<2 | 0; } sum += (z = fi[u+1] * bi[u+1]); if (z > max) { max = z; max_k = (k-1)<<2 | 1; } @@ -531,7 +512,11 @@ public class BAQ { } } +// final SimpleTimer total = new SimpleTimer(); +// final SimpleTimer local = new SimpleTimer(); +// int n = 0; public BAQCalculationResult calcBAQFromHMM(byte[] ref, byte[] query, byte[] quals, int queryStart, int queryEnd ) { +// total.restart(); if ( queryStart < 0 ) throw new ReviewedStingException("BUG: queryStart < 0: " + queryStart); if ( queryEnd < 0 ) throw new ReviewedStingException("BUG: queryEnd < 0: " + queryEnd); if ( queryEnd < queryStart ) throw new ReviewedStingException("BUG: queryStart < queryEnd : " + queryStart + " end =" + queryEnd); @@ -539,7 +524,12 @@ public class BAQ { // note -- assumes ref is offset from the *CLIPPED* start BAQCalculationResult baqResult = new BAQCalculationResult(query, quals, ref); int queryLen = queryEnd - queryStart; +// local.restart(); hmm_glocal(baqResult.refBases, baqResult.readBases, queryStart, queryLen, baqResult.rawQuals, baqResult.state, baqResult.bq); +// local.stop(); +// total.stop(); +// if ( n++ % 100000 == 0 ) +// logger.info("n = " + n + ": Total " + total.getElapsedTimeNano() + " local " + local.getElapsedTimeNano()); return baqResult; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleFeature.java index 0aa9ecba2..fc7df9f76 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleFeature.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.utils.codecs.beagle; import org.broad.tribble.Feature; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Allele; import java.util.ArrayList; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapFeature.java index d0480a90b..2a36e8ad3 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapFeature.java @@ -26,7 +26,7 @@ package org.broadinstitute.sting.utils.codecs.hapmap; import org.broad.tribble.Feature; import org.broad.tribble.annotation.Strand; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Allele; import java.util.HashMap; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/utils/collections/ExpandingArrayList.java b/public/java/src/org/broadinstitute/sting/utils/collections/ExpandingArrayList.java index 04ef8ece3..abd4eeaba 100755 --- a/public/java/src/org/broadinstitute/sting/utils/collections/ExpandingArrayList.java +++ b/public/java/src/org/broadinstitute/sting/utils/collections/ExpandingArrayList.java @@ -54,6 +54,7 @@ public class ExpandingArrayList extends ArrayList { private void maybeExpand(int index, E value) { if ( index >= size() ) { + ensureCapacity(index+1); // make sure we have space to hold at least index + 1 elements // We need to add null items until we can safely set index to element for ( int i = size(); i <= index; i++ ) add(value); diff --git a/public/java/src/org/broadinstitute/sting/utils/collections/NestedIntegerArray.java b/public/java/src/org/broadinstitute/sting/utils/collections/NestedIntegerArray.java index 050ed52ac..890a9b488 100755 --- a/public/java/src/org/broadinstitute/sting/utils/collections/NestedIntegerArray.java +++ b/public/java/src/org/broadinstitute/sting/utils/collections/NestedIntegerArray.java @@ -58,13 +58,20 @@ public class NestedIntegerArray { int dimensionsToPreallocate = Math.min(dimensions.length, NUM_DIMENSIONS_TO_PREALLOCATE); - logger.info(String.format("Creating NestedIntegerArray with dimensions %s", Arrays.toString(dimensions))); - logger.info(String.format("Pre-allocating first %d dimensions", dimensionsToPreallocate)); + if ( logger.isDebugEnabled() ) logger.debug(String.format("Creating NestedIntegerArray with dimensions %s", Arrays.toString(dimensions))); + if ( logger.isDebugEnabled() ) logger.debug(String.format("Pre-allocating first %d dimensions", dimensionsToPreallocate)); data = new Object[dimensions[0]]; preallocateArray(data, 0, dimensionsToPreallocate); - logger.info(String.format("Done pre-allocating first %d dimensions", dimensionsToPreallocate)); + if ( logger.isDebugEnabled() ) logger.debug(String.format("Done pre-allocating first %d dimensions", dimensionsToPreallocate)); + } + + /** + * @return the dimensions of this nested integer array. DO NOT MODIFY + */ + public int[] getDimensions() { + return dimensions; } /** @@ -174,23 +181,23 @@ public class NestedIntegerArray { } } - public static class Leaf { + public static class Leaf { public final int[] keys; - public final Object value; + public final T value; - public Leaf(final int[] keys, final Object value) { + public Leaf(final int[] keys, final T value) { this.keys = keys; this.value = value; } } - public List getAllLeaves() { - final List result = new ArrayList(); + public List> getAllLeaves() { + final List> result = new ArrayList>(); fillAllLeaves(data, new int[0], result); return result; } - private void fillAllLeaves(final Object[] array, final int[] path, final List result) { + private void fillAllLeaves(final Object[] array, final int[] path, final List> result) { for ( int key = 0; key < array.length; key++ ) { final Object value = array[key]; if ( value == null ) @@ -199,7 +206,7 @@ public class NestedIntegerArray { if ( value instanceof Object[] ) { fillAllLeaves((Object[]) value, newPath, result); } else { - result.add(new Leaf(newPath, value)); + result.add(new Leaf(newPath, (T)value)); } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java b/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java index 7ae575534..85e4a338a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.utils.duplicates; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.QualityUtils; diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 523fd5a97..b1410da27 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -32,7 +32,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.HelpUtils; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.File; diff --git a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java index 9bb0e646f..ff85c33a0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java @@ -30,7 +30,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Allele; import java.io.PrintStream; import java.lang.reflect.Constructor; @@ -38,7 +38,7 @@ import java.util.*; public abstract class PerReadAlleleLikelihoodMap { - public static final double INFORMATIVE_LIKELIHOOD_THRESHOLD = 0.1; + public static final double INFORMATIVE_LIKELIHOOD_THRESHOLD = 0.2; protected List alleles; protected Map> likelihoodReadMap; @@ -126,7 +126,7 @@ public abstract class PerReadAlleleLikelihoodMap { public static PerReadAlleleLikelihoodMap getBestAvailablePerReadAlleleLikelihoodMap() { final Class PerReadAlleleLikelihoodMapClass = GATKLiteUtils.getProtectedClassIfAvailable(PerReadAlleleLikelihoodMap.class); try { - Constructor constructor = PerReadAlleleLikelihoodMapClass.getDeclaredConstructor((Class[])null); + final Constructor constructor = PerReadAlleleLikelihoodMapClass.getDeclaredConstructor((Class[])null); constructor.setAccessible(true); return (PerReadAlleleLikelihoodMap)constructor.newInstance(); } diff --git a/public/java/src/org/broadinstitute/sting/utils/genotyper/StandardPerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/utils/genotyper/StandardPerReadAlleleLikelihoodMap.java index 7db818592..1a27c6ecb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/genotyper/StandardPerReadAlleleLikelihoodMap.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotyper/StandardPerReadAlleleLikelihoodMap.java @@ -28,7 +28,7 @@ package org.broadinstitute.sting.utils.genotyper; import org.broadinstitute.sting.utils.classloader.PublicPackageSource; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Allele; import java.io.PrintStream; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/EOFMarkedValue.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/EOFMarkedValue.java index d0ad51cb0..464ebfcd5 100644 --- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/EOFMarkedValue.java +++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/EOFMarkedValue.java @@ -1,3 +1,28 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.utils.nanoScheduler; /** diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java index 0e0237412..0ccb2b8cc 100644 --- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java +++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java @@ -1,16 +1,40 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.utils.nanoScheduler; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.MultiThreadedErrorTracker; import java.util.Iterator; -import java.util.concurrent.BlockingQueue; import java.util.concurrent.CountDownLatch; /** - * Producer Thread that reads input values from an inputReads and puts them into an output queue + * Helper class that allows multiple threads to reads input values from + * an iterator, and track the number of items read from that iterator. */ -class InputProducer implements Runnable { +class InputProducer { private final static Logger logger = Logger.getLogger(InputProducer.class); /** @@ -18,13 +42,6 @@ class InputProducer implements Runnable { */ final Iterator inputReader; - /** - * Where we put our input values for consumption - */ - final BlockingQueue outputQueue; - - final MultiThreadedErrorTracker errorTracker; - /** * Have we read the last value from inputReader? * @@ -34,25 +51,20 @@ class InputProducer implements Runnable { */ boolean readLastValue = false; + /** + * Once we've readLastValue, lastValue contains a continually + * updating InputValue where EOF is true. It's not necessarily + * a single value, as each read updates lastValue with the + * next EOF marker + */ + private InputValue lastValue = null; + int nRead = 0; int inputID = -1; - /** - * A latch used to block threads that want to start up only when all of the values - * in inputReader have been read by the thread executing run() - */ - final CountDownLatch latch = new CountDownLatch(1); - - public InputProducer(final Iterator inputReader, - final MultiThreadedErrorTracker errorTracker, - final BlockingQueue outputQueue) { + public InputProducer(final Iterator inputReader) { if ( inputReader == null ) throw new IllegalArgumentException("inputReader cannot be null"); - if ( errorTracker == null ) throw new IllegalArgumentException("errorTracker cannot be null"); - if ( outputQueue == null ) throw new IllegalArgumentException("OutputQueue cannot be null"); - this.inputReader = inputReader; - this.errorTracker = errorTracker; - this.outputQueue = outputQueue; } /** @@ -82,9 +94,8 @@ class InputProducer implements Runnable { * This method is synchronized, as it manipulates local state accessed across multiple threads. * * @return the next input stream value, or null if the stream contains no more elements - * @throws InterruptedException */ - private synchronized InputType readNextItem() throws InterruptedException { + private synchronized InputType readNextItem() { if ( ! inputReader.hasNext() ) { // we are done, mark ourselves as such and return null readLastValue = true; @@ -100,49 +111,60 @@ class InputProducer implements Runnable { } /** - * Run this input producer, looping over all items in the input reader and - * enqueueing them as InputValues into the outputQueue. After the - * end of the stream has been encountered, any threads waiting because - * they called waitForDone() will be freed. + * Are there currently more values in the iterator? + * + * Note the word currently. It's possible that some already submitted + * job will read a value from this InputProvider, so in some sense + * there are no more values and in the future there'll be no next + * value. That said, once this returns false it means that all + * of the possible values have been read + * + * @return true if a future call to next might return a non-EOF value, false if + * the underlying iterator is definitely empty */ - public void run() { - try { - while ( true ) { - final InputType value = readNextItem(); - - if ( value == null ) { - if ( ! readLastValue ) - throw new IllegalStateException("value == null but readLastValue is false!"); - - // add the EOF object so our consumer knows we are done in all inputs - // note that we do not increase inputID here, so that variable indicates the ID - // of the last real value read from the queue - outputQueue.put(new InputValue(inputID + 1)); - break; - } else { - // add the actual value to the outputQueue - outputQueue.put(new InputValue(++inputID, value)); - } - } - - latch.countDown(); - } catch (Throwable ex) { - errorTracker.notifyOfError(ex); - } finally { -// logger.info("Exiting input thread readLastValue = " + readLastValue); - } + public synchronized boolean hasNext() { + return ! allInputsHaveBeenRead(); } /** - * Block until all of the items have been read from inputReader. + * Get the next InputValue from this producer. The next value is + * either (1) the next value from the iterator, in which case the + * the return value is an InputValue containing that value, or (2) + * an InputValue with the EOF marker, indicating that the underlying + * iterator has been exhausted. * - * Note that this call doesn't actually read anything. You have to submit a thread - * to actually execute run() directly. + * This function never fails -- it can be called endlessly and + * while the underlying iterator has values it returns them, and then + * it returns a succession of EOF marking input values. * - * @throws InterruptedException + * @return an InputValue containing the next value in the underlying + * iterator, or one with EOF marker, if the iterator is exhausted */ - public void waitForDone() throws InterruptedException { - latch.await(); + public synchronized InputValue next() { + if ( readLastValue ) { + // we read the last value, so our value is the next + // EOF marker based on the last value. Make sure to + // update the last value so the markers keep incrementing + // their job ids + lastValue = lastValue.nextEOF(); + return lastValue; + } else { + final InputType value = readNextItem(); + + if ( value == null ) { + if ( ! readLastValue ) + throw new IllegalStateException("value == null but readLastValue is false!"); + + // add the EOF object so our consumer knows we are done in all inputs + // note that we do not increase inputID here, so that variable indicates the ID + // of the last real value read from the queue + lastValue = new InputValue(inputID + 1); + return lastValue; + } else { + // add the actual value to the outputQueue + return new InputValue(++inputID, value); + } + } } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/MapResult.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/MapResult.java index 83d671560..fd23b11d8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/MapResult.java +++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/MapResult.java @@ -1,3 +1,28 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.utils.nanoScheduler; /** @@ -25,14 +50,6 @@ class MapResult extends EOFMarkedValue implements Comparable= 0"); } - /** - * Create the EOF marker version of MapResult - */ - MapResult() { - super(); - this.jobID = Integer.MAX_VALUE; - } - /** * @return the job ID of the map job that produced this MapResult */ diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/MapResultsQueue.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/MapResultsQueue.java new file mode 100644 index 000000000..ef74d669d --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/MapResultsQueue.java @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.nanoScheduler; + +import org.broadinstitute.sting.utils.collections.ExpandingArrayList; + +/** + * Created with IntelliJ IDEA. + * User: depristo + * Date: 12/19/12 + * Time: 3:53 PM + * + * This class makes some critical assumptions. First is that the jobID of the first + * job is 0. If this isn't true the MapResultsQueue will certainly fail. + */ +public class MapResultsQueue { + //private final static boolean DEBUG = false; + //private final static Logger logger = Logger.getLogger(MapResultsQueue.class); + + /** + * Although naturally stored as priority blocking queue, this is actually quite expensive + * due to the O(n log n) sorting calculation. Since we know that the job ids start + * at 0 and increment by 1 in each successive job, we store an array instead. The + * array is indexed by jobID, and contains the MapResult for that job id. Because elements + * can be added to the queue in any order, we need to use an expanding array list to + * store the elements. + */ + final ExpandingArrayList> queue = new ExpandingArrayList>(10000); + + /** + * The jobID of the last job we've seen + */ + int prevJobID = -1; // no jobs observed + + /** + * Put mapResult into this MapResultsQueue, associated with its jobID + * @param mapResult a non-null map result + */ + public synchronized void put(final MapResult mapResult) { + if ( mapResult == null ) throw new IllegalArgumentException("mapResult cannot be null"); + + // make sure that nothing is at the job id for map + assert queue.size() < mapResult.getJobID() || queue.get(mapResult.getJobID()) == null; + + queue.set(mapResult.getJobID(), mapResult); + } + + /** + * Should we reduce the next value in the mapResultQueue? + * + * @return true if we should reduce + */ + public synchronized boolean nextValueIsAvailable() { + final MapResult nextMapResult = queue.get(nextJobID()); + + if ( nextMapResult == null ) { + // natural case -- the next job hasn't had a value added yet + return false; + } else if ( nextMapResult.getJobID() != nextJobID() ) { + // sanity check -- the job id at next isn't the one we expect + throw new IllegalStateException("Next job ID " + nextMapResult.getJobID() + " is not == previous job id " + prevJobID + " + 1"); + } else { + // there's a value at the next job id, so return true + return true; + } + } + + /** + * Get the next job ID'd be expect to see given our previous job id + * @return the next job id we'd fetch to reduce + */ + private int nextJobID() { + return prevJobID + 1; + } + + /** + * Can only be called when nextValueIsAvailable is true + * @return + * @throws InterruptedException + */ + // TODO -- does this have to be synchronized? -- I think the answer is no + public synchronized MapResult take() throws InterruptedException { + final MapResult result = queue.get(nextJobID()); + + // make sure the value we've fetched has the right id + assert result.getJobID() == nextJobID(); + + prevJobID = result.getJobID(); + queue.set(prevJobID, null); + + return result; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSMapFunction.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSMapFunction.java index cc5335051..1311126d0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSMapFunction.java +++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSMapFunction.java @@ -1,3 +1,28 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.utils.nanoScheduler; /** diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSProgressFunction.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSProgressFunction.java index 8b12c62c4..785a7f4fd 100644 --- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSProgressFunction.java +++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSProgressFunction.java @@ -1,3 +1,28 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.utils.nanoScheduler; /** diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSReduceFunction.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSReduceFunction.java index 879a33a1d..8191b16c9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSReduceFunction.java +++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSReduceFunction.java @@ -1,3 +1,28 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.utils.nanoScheduler; /** diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java index 4cc91faa4..c3854eef2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java +++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java @@ -1,3 +1,28 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.utils.nanoScheduler; import com.google.java.contract.Ensures; @@ -43,14 +68,20 @@ import java.util.concurrent.*; public class NanoScheduler { private final static Logger logger = Logger.getLogger(NanoScheduler.class); private final static boolean ALLOW_SINGLE_THREAD_FASTPATH = true; - private final static boolean LOG_MAP_TIMES = false; + protected final static int UPDATE_PROGRESS_FREQ = 100; + /** + * Currently not used, but kept because it's conceptual reasonable to have a buffer + */ final int bufferSize; + + /** + * The number of threads we're using to execute the map jobs in this nano scheduler + */ final int nThreads; - final ExecutorService inputExecutor; + final ExecutorService masterExecutor; final ExecutorService mapExecutor; - final Semaphore runningMapJobSlots; final MultiThreadedErrorTracker errorTracker = new MultiThreadedErrorTracker(); boolean shutdown = false; @@ -75,14 +106,10 @@ public class NanoScheduler { this.nThreads = nThreads; if ( nThreads == 1 ) { - this.mapExecutor = this.inputExecutor = this.masterExecutor = null; - runningMapJobSlots = null; + this.mapExecutor = this.masterExecutor = null; } else { - this.mapExecutor = Executors.newFixedThreadPool(nThreads - 1, new NamedThreadFactory("NS-map-thread-%d")); - runningMapJobSlots = new Semaphore(this.bufferSize); - - this.inputExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d")); this.masterExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-master-thread-%d")); + this.mapExecutor = Executors.newFixedThreadPool(nThreads, new NamedThreadFactory("NS-map-thread-%d")); } } @@ -111,7 +138,6 @@ public class NanoScheduler { */ public void shutdown() { if ( nThreads > 1 ) { - shutdownExecutor("inputExecutor", inputExecutor); shutdownExecutor("mapExecutor", mapExecutor); shutdownExecutor("masterExecutor", masterExecutor); } @@ -247,8 +273,7 @@ public class NanoScheduler { // map final MapType mapValue = map.apply(input); - if ( progressFunction != null ) - progressFunction.progress(input); + updateProgress(i++, input); // reduce sum = reduce.apply(mapValue, sum); @@ -258,6 +283,16 @@ public class NanoScheduler { return sum; } + /** + * Maybe update the progress meter (maybe because we don't want to do so so often that it costs cpu time) + * @param counter increasing counter to use to cut down on updates + * @param input the input we're currently at + */ + private void updateProgress(final int counter, final InputType input) { + if ( progressFunction != null && counter % UPDATE_PROGRESS_FREQ == 0 ) + progressFunction.progress(input); + } + /** * Efficient parallel version of Map/Reduce * @@ -323,7 +358,6 @@ public class NanoScheduler { if ( errorTracker.hasAnErrorOccurred() ) { masterExecutor.shutdownNow(); mapExecutor.shutdownNow(); - inputExecutor.shutdownNow(); errorTracker.throwErrorIfPending(); } } @@ -351,43 +385,26 @@ public class NanoScheduler { @Override public ReduceType call() { - // a blocking queue that limits the number of input datum to the requested buffer size - // note we need +1 because we continue to enqueue the lastObject - final BlockingQueue.InputValue> inputQueue - = new LinkedBlockingDeque.InputValue>(bufferSize+1); - // Create the input producer and start it running - final InputProducer inputProducer = - new InputProducer(inputReader, errorTracker, inputQueue); - inputExecutor.submit(inputProducer); + final InputProducer inputProducer = new InputProducer(inputReader); - // a priority queue that stores up to bufferSize elements - // produced by completed map jobs. - final PriorityBlockingQueue> mapResultQueue = - new PriorityBlockingQueue>(); + // create the MapResultsQueue to store results of map jobs. + final MapResultsQueue mapResultQueue = new MapResultsQueue(); - final Reducer reducer - = new Reducer(reduce, errorTracker, initialValue); + // create the reducer we'll use for this nano scheduling run + final Reducer reducer = new Reducer(reduce, errorTracker, initialValue); + + final CountDownLatch runningMapJobs = new CountDownLatch(nThreads); try { - int nSubmittedJobs = 0; - - while ( continueToSubmitJobs(nSubmittedJobs, inputProducer) ) { - // acquire a slot to run a map job. Blocks if too many jobs are enqueued - runningMapJobSlots.acquire(); - - mapExecutor.submit(new MapReduceJob(inputQueue, mapResultQueue, map, reducer)); - nSubmittedJobs++; + // create and submit the info needed by the read/map/reduce threads to do their work + for ( int i = 0; i < nThreads; i++ ) { + mapExecutor.submit(new ReadMapReduceJob(inputProducer, mapResultQueue, runningMapJobs, map, reducer)); } - // mark the last job id we've submitted so we now the id to wait for - //logger.warn("setting jobs submitted to " + nSubmittedJobs); - reducer.setTotalJobCount(nSubmittedJobs); - // wait for all of the input and map threads to finish - return waitForCompletion(inputProducer, reducer); + return waitForCompletion(mapResultQueue, runningMapJobs, reducer); } catch (Throwable ex) { -// logger.warn("Reduce job got exception " + ex); errorTracker.notifyOfError(ex); return initialValue; } @@ -396,56 +413,40 @@ public class NanoScheduler { /** * Wait until the input thread and all map threads have completed running, and return the final reduce result */ - private ReduceType waitForCompletion(final InputProducer inputProducer, + private ReduceType waitForCompletion(final MapResultsQueue mapResultsQueue, + final CountDownLatch runningMapJobs, final Reducer reducer) throws InterruptedException { + // wait for all the map threads to finish by waiting on the runningMapJobs latch + runningMapJobs.await(); + + // do a final reduce here. This is critically important because the InputMapReduce jobs + // no longer block on reducing, so it's possible for all the threads to end with a few + // reduce jobs on the queue still to do. This call ensures that we reduce everything + reducer.reduceAsMuchAsPossible(mapResultsQueue, true); + // wait until we have a final reduce result -// logger.warn("waiting for final reduce"); - final ReduceType finalSum = reducer.waitForFinalReduce(); - - // now wait for the input provider thread to terminate -// logger.warn("waiting on inputProducer"); - inputProducer.waitForDone(); - - // wait for all the map threads to finish by acquiring and then releasing all map job semaphores -// logger.warn("waiting on map"); - runningMapJobSlots.acquire(bufferSize); - runningMapJobSlots.release(bufferSize); + final ReduceType finalSum = reducer.getReduceResult(); // everything is finally shutdown, return the final reduce value return finalSum; } - - /** - * Should we continue to submit jobs given the number of jobs already submitted and the - * number of read items in inputProducer? - * - * We continue to submit jobs while inputProducer hasn't reached EOF or the number - * of jobs we've enqueued isn't the number of read elements. This means that in - * some cases we submit more jobs than total read elements (cannot know because of - * multi-threading) so map jobs must handle the case where getNext() returns EOF. - * - * @param nJobsSubmitted - * @param inputProducer - * @return - */ - private boolean continueToSubmitJobs(final int nJobsSubmitted, final InputProducer inputProducer) { - final int nReadItems = inputProducer.getNumInputValues(); - return nReadItems == -1 || nJobsSubmitted < nReadItems; - } } - private class MapReduceJob implements Runnable { - final BlockingQueue.InputValue> inputQueue; - final PriorityBlockingQueue> mapResultQueue; + private class ReadMapReduceJob implements Runnable { + final InputProducer inputProducer; + final MapResultsQueue mapResultQueue; final NSMapFunction map; final Reducer reducer; + final CountDownLatch runningMapJobs; - private MapReduceJob(BlockingQueue.InputValue> inputQueue, - final PriorityBlockingQueue> mapResultQueue, - final NSMapFunction map, - final Reducer reducer) { - this.inputQueue = inputQueue; + private ReadMapReduceJob(final InputProducer inputProducer, + final MapResultsQueue mapResultQueue, + final CountDownLatch runningMapJobs, + final NSMapFunction map, + final Reducer reducer) { + this.inputProducer = inputProducer; this.mapResultQueue = mapResultQueue; + this.runningMapJobs = runningMapJobs; this.map = map; this.reducer = reducer; } @@ -453,43 +454,41 @@ public class NanoScheduler { @Override public void run() { try { - //debugPrint("Running MapReduceJob " + jobID); - final InputProducer.InputValue inputWrapper = inputQueue.take(); - final int jobID = inputWrapper.getId(); + boolean done = false; + while ( ! done ) { + // get the next item from the input producer + final InputProducer.InputValue inputWrapper = inputProducer.next(); - final MapResult result; - if ( ! inputWrapper.isEOFMarker() ) { - // just skip doing anything if we don't have work to do, which is possible - // because we don't necessarily know how much input there is when we queue - // up our jobs - final InputType input = inputWrapper.getValue(); + // depending on inputWrapper, actually do some work or not, putting result input result object + final MapResult result; + if ( ! inputWrapper.isEOFMarker() ) { + // just skip doing anything if we don't have work to do, which is possible + // because we don't necessarily know how much input there is when we queue + // up our jobs + final InputType input = inputWrapper.getValue(); - // map - final MapType mapValue = map.apply(input); + // actually execute the map + final MapType mapValue = map.apply(input); - // enqueue the result into the mapResultQueue - result = new MapResult(mapValue, jobID); + // enqueue the result into the mapResultQueue + result = new MapResult(mapValue, inputWrapper.getId()); - if ( progressFunction != null ) - progressFunction.progress(input); - } else { - // push back the EOF marker so other waiting threads can read it - inputQueue.put(inputWrapper.nextEOF()); + mapResultQueue.put(result); - // if there's no input we push empty MapResults with jobIDs for synchronization with Reducer - result = new MapResult(jobID); + // reduce as much as possible, without blocking, if another thread is already doing reduces + final int nReduced = reducer.reduceAsMuchAsPossible(mapResultQueue, false); + + updateProgress(inputWrapper.getId(), input); + } else { + done = true; + } } - - mapResultQueue.put(result); - - final int nReduced = reducer.reduceAsMuchAsPossible(mapResultQueue); } catch (Throwable ex) { -// logger.warn("Map job got exception " + ex); errorTracker.notifyOfError(ex); } finally { // we finished a map job, release the job queue semaphore - runningMapJobSlots.release(); + runningMapJobs.countDown(); } } } -} +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/Reducer.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/Reducer.java index 5cae28187..294065838 100644 --- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/Reducer.java +++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/Reducer.java @@ -1,39 +1,68 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.utils.nanoScheduler; import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.MultiThreadedErrorTracker; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.PriorityBlockingQueue; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; /** - * Reducer supporting two-threaded reduce of the map/reduce. + * Reducer supporting multi-threaded reduce of the map/reduce. * - * The first thread, using the reduceAsMuchAsPossible function, actually reduces the data - * as it arrives in the blockingQueue. + * reduceAsMuchAsPossible is the key function. Multiple threads can call into this, providing + * the map results queue, and this class accumulates the result of calling reduce + * on the maps objects. reduceAsMuchAsPossible isn't directly synchronized, but manages multi-threading + * directly with a lock. Threads can request either to block on the reduce call until it can be + * executed, or immediately exit if the lock isn't available. That allows multi-threaded users + * to avoid piling up waiting to reduce while one thread is reducing. They can instead immediately + * leave to go do something else productive * - * The second thread, using the waitForFinalReduce, can block on this data structure - * until that all jobs have arrived and been reduced. - * - * The key function for communication here is setTotalJobCount(), which the thread that submits - * jobs that enqueue MapResults into the blocking queue must call ONCE to tell the - * Reducer the total number of jobs that have been submitted for map. When numOfSubmittedJobs - * have been processed, this class frees a latch that allows thread blocked on waitForFinalReduce to proceed. - * - * This thread reads from mapResultsQueue until the poison EOF object arrives. At each - * stage is calls reduce(value, sum). The blocking mapResultQueue ensures that the - * queue waits until the mapResultQueue has a value to take. Then, it gets and waits - * until the map result Future has a value. + * @author depristo + * @since 2012 */ class Reducer { private final static Logger logger = Logger.getLogger(Reducer.class); - private final static int UNSET_NUM_SUBMITTED_JOBS = -2; - final CountDownLatch countDownLatch = new CountDownLatch(1); - final NSReduceFunction reduce; - final MultiThreadedErrorTracker errorTracker; + /** + * The reduce function to execute + */ + private final NSReduceFunction reduce; + + /** + * Used to communicate errors to the outer master thread + */ + private final MultiThreadedErrorTracker errorTracker; + + /** + * Lock used to protect the call reduceAsMuchAsPossible from race conditions + */ + private final Lock reduceLock = new ReentrantLock(); /** * The sum of the reduce function applied to all MapResults. After this Reducer @@ -41,18 +70,6 @@ class Reducer { */ ReduceType sum; - int numSubmittedJobs = UNSET_NUM_SUBMITTED_JOBS; // not yet set - - /** - * The jobID of the last job we've seen - */ - int prevJobID = -1; // no jobs observed - - /** - * A counter keeping track of the number of jobs we're reduced - */ - int numJobsReduced = 0; - /** * Create a new Reducer that will apply the reduce function with initialSum value * to values via reduceAsMuchAsPossible, timing the reduce function call costs with @@ -72,26 +89,6 @@ class Reducer { this.sum = initialSum; } - /** - * Should we reduce the next value in the mapResultQueue? - * - * @param mapResultQueue the queue of map results - * @return true if we should reduce - */ - @Requires("mapResultQueue != null") - private synchronized boolean reduceNextValueInQueue(final PriorityBlockingQueue> mapResultQueue) { - final MapResult nextMapResult = mapResultQueue.peek(); - if ( nextMapResult == null ) { - return false; - } else if ( nextMapResult.getJobID() < prevJobID + 1 ) { - throw new IllegalStateException("Next job ID " + nextMapResult.getJobID() + " is < previous job id " + prevJobID); - } else if ( nextMapResult.getJobID() == prevJobID + 1 ) { - return true; - } else { - return false; - } - } - /** * Reduce as much data as possible in mapResultQueue, returning the number of reduce calls completed * @@ -104,97 +101,69 @@ class Reducer { * @throws InterruptedException */ @Ensures("result >= 0") - public synchronized int reduceAsMuchAsPossible(final PriorityBlockingQueue> mapResultQueue) { + public int reduceAsMuchAsPossible(final MapResultsQueue mapResultQueue, final boolean waitForLock) { if ( mapResultQueue == null ) throw new IllegalArgumentException("mapResultQueue cannot be null"); int nReducesNow = 0; -// if ( numSubmittedJobs != UNSET_NUM_SUBMITTED_JOBS ) -// logger.warn(" maybeReleaseLatch " + numJobsReduced + " numSubmittedJobs " + numSubmittedJobs + " queue " + mapResultQueue.size()); + final boolean haveLock = acquireReduceLock(waitForLock); try { - while ( reduceNextValueInQueue(mapResultQueue) ) { - final MapResult result = mapResultQueue.take(); - prevJobID = result.getJobID(); + if ( haveLock ) { + while ( mapResultQueue.nextValueIsAvailable() ) { + final MapResult result = mapResultQueue.take(); - if ( ! result.isEOFMarker() ) { - nReducesNow++; + if ( ! result.isEOFMarker() ) { + nReducesNow++; - // apply reduce, keeping track of sum - sum = reduce.apply(result.getValue(), sum); + // apply reduce, keeping track of sum + sum = reduce.apply(result.getValue(), sum); + } } - - numJobsReduced++; - maybeReleaseLatch(); } } catch (Exception ex) { errorTracker.notifyOfError(ex); - countDownLatch.countDown(); + } finally { + if ( haveLock ) // if we acquired the lock, unlock it + releaseReduceLock(); } -// if ( numSubmittedJobs == UNSET_NUM_SUBMITTED_JOBS ) -// logger.warn(" maybeReleaseLatch " + numJobsReduced + " numSubmittedJobs " + numSubmittedJobs + " queue " + mapResultQueue.size()); return nReducesNow; } /** - * release the latch if appropriate + * Acquire the reduce lock, either returning immediately if not possible or blocking until the lock is available * - * Appropriate means we've seen the last job, or there's only a single job id + * @param blockUntilAvailable if true, we will block until the lock is available, otherwise we return immediately + * without acquiring the lock + * @return true if the lock has been acquired, false otherwise */ - private synchronized void maybeReleaseLatch() { - if ( numJobsReduced == numSubmittedJobs ) { - // either we've already seen the last one prevJobID == numSubmittedJobs or - // the last job ID is -1, meaning that no jobs were ever submitted - countDownLatch.countDown(); + protected boolean acquireReduceLock(final boolean blockUntilAvailable) { + if ( blockUntilAvailable ) { + reduceLock.lock(); + return true; + } else { + return reduceLock.tryLock(); } } /** - * For testing only + * Free the reduce lock. * - * @return true if latch is released + * Assumes that the invoking thread actually previously acquired the lock (it's a problem if not). */ - protected synchronized boolean latchIsReleased() { - return countDownLatch.getCount() == 0; + protected void releaseReduceLock() { + reduceLock.unlock(); } /** - * Key function: tell this class the total number of jobs will provide data in the mapResultsQueue + * Get the current reduce result resulting from applying reduce(...) to all MapResult elements. * - * The total job count when we free threads blocked on waitForFinalReduce. When we see numOfSubmittedJobs - * MapResults from the queue, those threads are released. - * - * Until this function is called, those thread will block forever. The numOfSubmittedJobs has a few constraints. - * First, it must be >= 0. 0 indicates that in fact no jobs will ever be submitted (i.e., there's no - * data coming) so the latch should be opened immediately. If it's >= 1, we will wait until - * we see numOfSubmittedJobs jobs before freeing them. - * - * Note that we throw an IllegalStateException if this function is called twice. - * - * @param numOfSubmittedJobs int >= 0 indicating the total number of MapResults that will - * enqueue results into our queue - */ - public synchronized void setTotalJobCount(final int numOfSubmittedJobs) { - if ( numOfSubmittedJobs < 0 ) - throw new IllegalArgumentException("numOfSubmittedJobs must be >= 0, but saw " + numOfSubmittedJobs); - if ( this.numSubmittedJobs != UNSET_NUM_SUBMITTED_JOBS) - throw new IllegalStateException("setlastJobID called multiple times, but should only be called once"); - - //logger.warn("setTotalJobCount " + numJobsReduced + " numSubmitted " + numOfSubmittedJobs); - this.numSubmittedJobs = numOfSubmittedJobs; - maybeReleaseLatch(); - } - - /** - * Block until the last job has submitted its MapResult to our queue, and we've reduced it, and - * return the reduce result resulting from applying reduce(...) to all MapResult elements. + * Note that this method cannot know if future reduce calls are coming in. So it simply gets + * the current reduce result. It is up to the caller to know whether the returned value is + * a partial result, or the full final value * * @return the total reduce result across all jobs - * @throws InterruptedException */ - public ReduceType waitForFinalReduce() throws InterruptedException { - //logger.warn("waitForFinalReduce() " + numJobsReduced + " " + numSubmittedJobs); - countDownLatch.await(); - //logger.warn(" done waitForFinalReduce"); + public ReduceType getReduceResult() { return sum; } } diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java index ff274499b..7f323adae 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.utils.pileup; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.fragments.FragmentCollection; diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index 8cba5ec23..429de4a0f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.utils.pileup; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java index b69283b9d..161335957 100755 --- a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java +++ b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java @@ -145,7 +145,7 @@ public class ProgressMeter { private final SimpleTimer timer = new SimpleTimer(); private GenomeLoc maxGenomeLoc = null; - private String positionMessage = "starting"; + private Position position = new Position(PositionStatus.STARTING); private long nTotalRecordsProcessed = 0; final ProgressMeterDaemon progressMeterDaemon; @@ -234,9 +234,65 @@ public class ProgressMeter { this.nTotalRecordsProcessed = Math.max(this.nTotalRecordsProcessed, nTotalRecordsProcessed); // a pretty name for our position - this.positionMessage = maxGenomeLoc == null - ? "unmapped reads" - : String.format("%s:%d", maxGenomeLoc.getContig(), maxGenomeLoc.getStart()); + this.position = maxGenomeLoc == null ? new Position(PositionStatus.IN_UNMAPPED_READS) : new Position(maxGenomeLoc); + } + + /** + * Describes the status of this position marker, such as starting up, done, in the unmapped reads, + * or somewhere on the genome + */ + private enum PositionStatus { + STARTING("Starting"), + DONE("done"), + IN_UNMAPPED_READS("unmapped reads"), + ON_GENOME(null); + + public final String message; + + private PositionStatus(String message) { + this.message = message; + } + } + + /** + * A pair of position status and the genome loc, if necessary. Used to get a + * status update message as needed, without the computational cost of formatting + * the genome loc string every time a progress notification happens (which is almost + * always not printed) + */ + private class Position { + final PositionStatus type; + final GenomeLoc maybeLoc; + + /** + * Create a position object of any type != ON_GENOME + * @param type + */ + @Requires({"type != null", "type != PositionStatus.ON_GENOME"}) + private Position(PositionStatus type) { + this.type = type; + this.maybeLoc = null; + } + + /** + * Create a position object of type ON_GENOME at genomeloc loc + * @param loc + */ + @Requires("loc != null") + private Position(GenomeLoc loc) { + this.type = PositionStatus.ON_GENOME; + this.maybeLoc = loc; + } + + /** + * @return a human-readable representation of this position + */ + private String getMessage() { + if ( type == PositionStatus.ON_GENOME ) + return maxGenomeLoc.getContig() + ":" + maxGenomeLoc.getStart(); + else + return type.message; + } } /** @@ -267,7 +323,7 @@ public class ProgressMeter { updateLoggerPrintFrequency(estTotalRuntime.getTimeInSeconds()); logger.info(String.format("%15s %5.2e %s %s %5.1f%% %s %s", - positionMessage, progressData.getUnitsProcessed()*1.0, elapsed, unitRate, + position.getMessage(), progressData.getUnitsProcessed()*1.0, elapsed, unitRate, 100*fractionGenomeTargetCompleted, estTotalRuntime, timeToCompletion)); } @@ -317,7 +373,7 @@ public class ProgressMeter { public void notifyDone(final long nTotalRecordsProcessed) { // print out the progress meter this.nTotalRecordsProcessed = nTotalRecordsProcessed; - this.positionMessage = "done"; + this.position = new Position(PositionStatus.DONE); printProgress(true); logger.info(String.format("Total runtime %.2f secs, %.2f min, %.2f hours", diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 5d4020a07..567514f8c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.recalibration; import net.sf.samtools.SAMTag; import net.sf.samtools.SAMUtils; +import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; @@ -44,7 +45,8 @@ import java.io.File; */ public class BaseRecalibration { - private final static int MAXIMUM_RECALIBRATED_READ_LENGTH = 5000; + private static Logger logger = Logger.getLogger(BaseRecalibration.class); + private final static boolean TEST_CACHING = false; private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done) private final RecalibrationTables recalibrationTables; @@ -54,12 +56,8 @@ public class BaseRecalibration { private final int preserveQLessThan; private final boolean emitOriginalQuals; - // TODO -- was this supposed to be used somewhere? -// private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values. -// static { -// for (int i = 0; i < EventType.values().length; i++) -// qualityScoreByFullCovariateKey[i] = new NestedHashMap(); -// } + private final NestedIntegerArray globalDeltaQs; + private final NestedIntegerArray deltaQReporteds; /** @@ -84,6 +82,44 @@ public class BaseRecalibration { this.disableIndelQuals = disableIndelQuals; this.preserveQLessThan = preserveQLessThan; this.emitOriginalQuals = emitOriginalQuals; + + logger.info("Calculating cached tables..."); + + // + // Create a NestedIntegerArray that maps from rgKey x errorModel -> double, + // where the double is the result of this calculation. The entire calculation can + // be done upfront, on initialization of this BaseRecalibration structure + // + final NestedIntegerArray byReadGroupTable = recalibrationTables.getReadGroupTable(); + globalDeltaQs = new NestedIntegerArray( byReadGroupTable.getDimensions() ); + logger.info("Calculating global delta Q table..."); + for ( NestedIntegerArray.Leaf leaf : byReadGroupTable.getAllLeaves() ) { + final int rgKey = leaf.keys[0]; + final int eventIndex = leaf.keys[1]; + final double globalDeltaQ = calculateGlobalDeltaQ(rgKey, EventType.eventFrom(eventIndex)); + globalDeltaQs.put(globalDeltaQ, rgKey, eventIndex); + } + + + // The calculation of the deltaQ report is constant. key[0] and key[1] are the read group and qual, respectively + // and globalDeltaQ is a constant for the read group. So technically the delta Q reported is simply a lookup + // into a matrix indexed by rgGroup, qual, and event type. + // the code below actually creates this cache with a NestedIntegerArray calling into the actual + // calculateDeltaQReported code. + final NestedIntegerArray byQualTable = recalibrationTables.getQualityScoreTable(); + deltaQReporteds = new NestedIntegerArray( byQualTable.getDimensions() ); + logger.info("Calculating delta Q reported table..."); + for ( NestedIntegerArray.Leaf leaf : byQualTable.getAllLeaves() ) { + final int rgKey = leaf.keys[0]; + final int qual = leaf.keys[1]; + final int eventIndex = leaf.keys[2]; + final EventType event = EventType.eventFrom(eventIndex); + final double globalDeltaQ = getGlobalDeltaQ(rgKey, event); + final double deltaQReported = calculateDeltaQReported(rgKey, qual, event, globalDeltaQ, (byte)qual); + deltaQReporteds.put(deltaQReported, rgKey, qual, eventIndex); + } + + logger.info("done calculating cache"); } /** @@ -91,6 +127,18 @@ public class BaseRecalibration { * * It updates the base qualities of the read with the new recalibrated qualities (for all event types) * + * Implements a serial recalibration of the reads using the combinational table. + * First, we perform a positional recalibration, and then a subsequent dinuc correction. + * + * Given the full recalibration table, we perform the following preprocessing steps: + * + * - calculate the global quality score shift across all data [DeltaQ] + * - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift + * -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual + * - The final shift equation is: + * + * Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... ) + * * @param read the read to recalibrate */ public void recalibrateRead(final GATKSAMRecord read) { @@ -103,6 +151,7 @@ public class BaseRecalibration { } final ReadCovariates readCovariates = RecalUtils.computeCovariates(read, requestedCovariates); + final int readLength = read.getReadLength(); for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) { @@ -111,58 +160,88 @@ public class BaseRecalibration { } final byte[] quals = read.getBaseQualities(errorModel); - final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); // get the keyset for this base using the error model - final int readLength = read.getReadLength(); + // get the keyset for this base using the error model + final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel); + + // the rg key is constant over the whole read, the global deltaQ is too + final int rgKey = fullReadKeySet[0][0]; + + final double globalDeltaQ = getGlobalDeltaQ(rgKey, errorModel); + for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read + final byte origQual = quals[offset]; - final byte originalQualityScore = quals[offset]; + // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality) + if ( origQual >= preserveQLessThan ) { + // get the keyset for this base using the error model + final int[] keySet = fullReadKeySet[offset]; + final double deltaQReported = getDeltaQReported(keySet[0], keySet[1], errorModel, globalDeltaQ); + final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables, keySet, errorModel, globalDeltaQ, deltaQReported, origQual); + + // calculate the recalibrated qual using the BQSR formula + double recalibratedQualDouble = origQual + globalDeltaQ + deltaQReported + deltaQCovariates; + + // recalibrated quality is bound between 1 and MAX_QUAL + final byte recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQualDouble), QualityUtils.MAX_RECALIBRATED_Q_SCORE); + + // return the quantized version of the recalibrated quality + final byte recalibratedQualityScore = quantizationInfo.getQuantizedQuals().get(recalibratedQual); - if (originalQualityScore >= preserveQLessThan) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality) - final int[] keySet = fullReadKeySet[offset]; // get the keyset for this base using the error model - final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base quals[offset] = recalibratedQualityScore; } } + + // finally update the base qualities in the read read.setBaseQualities(quals, errorModel); } } + private double getGlobalDeltaQ(final int rgKey, final EventType errorModel) { + final Double cached = globalDeltaQs.get(rgKey, errorModel.index); - /** - * Implements a serial recalibration of the reads using the combinational table. - * First, we perform a positional recalibration, and then a subsequent dinuc correction. - * - * Given the full recalibration table, we perform the following preprocessing steps: - * - * - calculate the global quality score shift across all data [DeltaQ] - * - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift - * -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual - * - The final shift equation is: - * - * Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... ) - * - * @param key The list of Comparables that were calculated from the covariates - * @param errorModel the event type - * @return A recalibrated quality score as a byte - */ - private byte performSequentialQualityCalculation(final int[] key, final EventType errorModel) { + if ( TEST_CACHING ) { + final double calcd = calculateGlobalDeltaQ(rgKey, errorModel); + if ( calcd != cached ) + throw new IllegalStateException("calculated " + calcd + " and cached " + cached + " global delta q not equal at " + rgKey + " / " + errorModel); + } - final byte qualFromRead = (byte)(long)key[1]; - final double globalDeltaQ = calculateGlobalDeltaQ(recalibrationTables.getReadGroupTable(), key, errorModel); - final double deltaQReported = calculateDeltaQReported(recalibrationTables.getQualityScoreTable(), key, errorModel, globalDeltaQ, qualFromRead); - final double deltaQCovariates = calculateDeltaQCovariates(recalibrationTables, key, errorModel, globalDeltaQ, deltaQReported, qualFromRead); - - double recalibratedQual = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; // calculate the recalibrated qual using the BQSR formula - recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQual), QualityUtils.MAX_RECALIBRATED_Q_SCORE); // recalibrated quality is bound between 1 and MAX_QUAL - - return quantizationInfo.getQuantizedQuals().get((int) recalibratedQual); // return the quantized version of the recalibrated quality + return cachedWithDefault(cached); } - private double calculateGlobalDeltaQ(final NestedIntegerArray table, final int[] key, final EventType errorModel) { + private double getDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ) { + final Double cached = deltaQReporteds.get(rgKey, qualKey, errorModel.index); + + if ( TEST_CACHING ) { + final double calcd = calculateDeltaQReported(rgKey, qualKey, errorModel, globalDeltaQ, (byte)qualKey); + if ( calcd != cached ) + throw new IllegalStateException("calculated " + calcd + " and cached " + cached + " global delta q not equal at " + rgKey + " / " + qualKey + " / " + errorModel); + } + + return cachedWithDefault(cached); + } + + /** + * @param d a Double (that may be null) that is the result of a delta Q calculation + * @return a double == d if d != null, or 0.0 if it is + */ + private double cachedWithDefault(final Double d) { + return d == null ? 0.0 : d; + } + + /** + * Note that this calculation is a constant for each rgKey and errorModel. We need only + * compute this value once for all data. + * + * @param rgKey + * @param errorModel + * @return + */ + private double calculateGlobalDeltaQ(final int rgKey, final EventType errorModel) { double result = 0.0; - final RecalDatum empiricalQualRG = table.get(key[0], errorModel.index); + final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.index); + if (empiricalQualRG != null) { final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality(); final double aggregrateQReported = empiricalQualRG.getEstimatedQReported(); @@ -172,10 +251,10 @@ public class BaseRecalibration { return result; } - private double calculateDeltaQReported(final NestedIntegerArray table, final int[] key, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) { + private double calculateDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) { double result = 0.0; - final RecalDatum empiricalQualQS = table.get(key[0], key[1], errorModel.index); + final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(rgKey, qualKey, errorModel.index); if (empiricalQualQS != null) { final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality(); result = deltaQReportedEmpirical - qualFromRead - globalDeltaQ; @@ -192,12 +271,28 @@ public class BaseRecalibration { if (key[i] < 0) continue; - final RecalDatum empiricalQualCO = recalibrationTables.getTable(i).get(key[0], key[1], key[i], errorModel.index); - if (empiricalQualCO != null) { - final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality(); - result += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported)); - } + result += calculateDeltaQCovariate(recalibrationTables.getTable(i), + key[0], key[1], key[i], errorModel, + globalDeltaQ, deltaQReported, qualFromRead); } + return result; } + + private double calculateDeltaQCovariate(final NestedIntegerArray table, + final int rgKey, + final int qualKey, + final int tableKey, + final EventType errorModel, + final double globalDeltaQ, + final double deltaQReported, + final byte qualFromRead) { + final RecalDatum empiricalQualCO = table.get(rgKey, qualKey, tableKey, errorModel.index); + if (empiricalQualCO != null) { + final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality(); + return deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported); + } else { + return 0.0; + } + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java index f3644fdd8..e0c1261fe 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/QuantizationInfo.java @@ -67,8 +67,13 @@ public class QuantizationInfo { return quantizationLevels; } - public GATKReportTable generateReportTable() { - GATKReportTable quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3); + public GATKReportTable generateReportTable(boolean sortBycols) { + GATKReportTable quantizedTable; + if(sortBycols) { + quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, false, true); + } else { + quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3); + } quantizedTable.addColumn(RecalUtils.QUALITY_SCORE_COLUMN_NAME); quantizedTable.addColumn(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME); quantizedTable.addColumn(RecalUtils.QUANTIZED_VALUE_COLUMN_NAME); diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java index 2b682f84b..4ddcb2b92 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/ReadCovariates.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.utils.recalibration; -import java.util.Arrays; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.LRUCache; /** * The object temporarily held by a read that describes all of it's covariates. @@ -11,12 +12,47 @@ import java.util.Arrays; * @since 2/8/12 */ public class ReadCovariates { + private final static Logger logger = Logger.getLogger(ReadCovariates.class); + + /** + * How big should we let the LRU cache grow + */ + private static final int LRU_CACHE_SIZE = 500; + + /** + * Use an LRU cache to keep cache of keys (int[][][]) arrays for each read length we've seen. + * The cache allows us to avoid the expense of recreating these arrays for every read. The LRU + * keeps the total number of cached arrays to less than LRU_CACHE_SIZE. + * + * This is a thread local variable, so the total memory required may grow to N_THREADS x LRU_CACHE_SIZE + */ + private final static ThreadLocal> keysCache = new ThreadLocal>() { + @Override protected LRUCache initialValue() { + return new LRUCache(LRU_CACHE_SIZE); + } + }; + + /** + * Our keys, indexed by event type x read length x covariate + */ private final int[][][] keys; + /** + * The index of the current covariate, used by addCovariate + */ private int currentCovariateIndex = 0; public ReadCovariates(final int readLength, final int numberOfCovariates) { - keys = new int[EventType.values().length][readLength][numberOfCovariates]; + final LRUCache cache = keysCache.get(); + final int[][][] cachedKeys = cache.get(readLength); + if ( cachedKeys == null ) { + // There's no cached value for read length so we need to create a new int[][][] array + if ( logger.isDebugEnabled() ) logger.debug("Keys cache miss for length " + readLength + " cache size " + cache.size()); + keys = new int[EventType.values().length][readLength][numberOfCovariates]; + cache.put(readLength, keys); + } else { + keys = cachedKeys; + } } public void setCovariateIndex(final int index) { @@ -24,22 +60,26 @@ public class ReadCovariates { } /** - * Necessary due to bug in BaseRecalibration recalibrateRead function. It is clearly seeing space it's not supposed to - * @return + * Update the keys for mismatch, insertion, and deletion for the current covariate at read offset + * + * @param mismatch the mismatch key value + * @param insertion the insertion key value + * @param deletion the deletion key value + * @param readOffset the read offset, must be >= 0 and <= the read length used to create this ReadCovariates */ - public ReadCovariates clear() { - for ( int i = 0; i < keys.length; i++ ) - for ( int j = 0; j < keys[i].length; j++) - Arrays.fill(keys[i][j], 0); - return this; - } - public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) { keys[EventType.BASE_SUBSTITUTION.index][readOffset][currentCovariateIndex] = mismatch; keys[EventType.BASE_INSERTION.index][readOffset][currentCovariateIndex] = insertion; keys[EventType.BASE_DELETION.index][readOffset][currentCovariateIndex] = deletion; } + /** + * Get the keys for all covariates at read position for error model + * + * @param readPosition + * @param errorModel + * @return + */ public int[] getKeySet(final int readPosition, final EventType errorModel) { return keys[errorModel.index][readPosition]; } @@ -48,21 +88,12 @@ public class ReadCovariates { return keys[errorModel.index]; } - public int[] getMismatchesKeySet(final int readPosition) { - return keys[EventType.BASE_SUBSTITUTION.index][readPosition]; - } + // ---------------------------------------------------------------------- + // + // routines for testing + // + // ---------------------------------------------------------------------- - public int[] getInsertionsKeySet(final int readPosition) { - return keys[EventType.BASE_INSERTION.index][readPosition]; - } - - public int[] getDeletionsKeySet(final int readPosition) { - return keys[EventType.BASE_DELETION.index][readPosition]; - } - - /** - * Testing routines - */ protected int[][] getMismatchesKeySet() { return keys[EventType.BASE_SUBSTITUTION.index]; } @@ -74,4 +105,16 @@ public class ReadCovariates { protected int[][] getDeletionsKeySet() { return keys[EventType.BASE_DELETION.index]; } + + protected int[] getMismatchesKeySet(final int readPosition) { + return getKeySet(readPosition, EventType.BASE_SUBSTITUTION); + } + + protected int[] getInsertionsKeySet(final int readPosition) { + return getKeySet(readPosition, EventType.BASE_INSERTION); + } + + protected int[] getDeletionsKeySet(final int readPosition) { + return getKeySet(readPosition, EventType.BASE_DELETION); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java index 207988749..1ab3b10c4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java @@ -135,14 +135,6 @@ public class RecalDatum { this.estimatedQReported = estimatedQReported; } - public static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) { - final Random random = new Random(); - final int nObservations = random.nextInt(maxObservations); - final int nErrors = random.nextInt(maxErrors); - final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE); - return new RecalDatum(nObservations, nErrors, (byte)qual); - } - public final double getEstimatedQReported() { return estimatedQReported; } @@ -191,9 +183,9 @@ public class RecalDatum { return (byte)(Math.round(getEmpiricalQuality())); } - //--------------------------------------------------------------------------------------------------------------- + //--------------------------------------------------------------------------------------------------------------- // - // increment methods + // toString methods // //--------------------------------------------------------------------------------------------------------------- @@ -206,73 +198,56 @@ public class RecalDatum { return String.format("%s,%.2f,%.2f", toString(), getEstimatedQReported(), getEmpiricalQuality() - getEstimatedQReported()); } -// /** -// * We don't compare the estimated quality reported because it may be different when read from -// * report tables. -// * -// * @param o the other recal datum -// * @return true if the two recal datums have the same number of observations, errors and empirical quality. -// */ -// @Override -// public boolean equals(Object o) { -// if (!(o instanceof RecalDatum)) -// return false; -// RecalDatum other = (RecalDatum) o; -// return super.equals(o) && -// MathUtils.compareDoubles(this.empiricalQuality, other.empiricalQuality, 0.001) == 0; -// } - //--------------------------------------------------------------------------------------------------------------- // // increment methods // //--------------------------------------------------------------------------------------------------------------- - public double getNumObservations() { + public final double getNumObservations() { return numObservations; } - public synchronized void setNumObservations(final double numObservations) { + public final synchronized void setNumObservations(final double numObservations) { if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0"); this.numObservations = numObservations; empiricalQuality = UNINITIALIZED; } - public double getNumMismatches() { + public final double getNumMismatches() { return numMismatches; } @Requires({"numMismatches >= 0"}) - public synchronized void setNumMismatches(final double numMismatches) { + public final synchronized void setNumMismatches(final double numMismatches) { if ( numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0"); this.numMismatches = numMismatches; empiricalQuality = UNINITIALIZED; } @Requires({"by >= 0"}) - public synchronized void incrementNumObservations(final double by) { + public final synchronized void incrementNumObservations(final double by) { numObservations += by; empiricalQuality = UNINITIALIZED; } @Requires({"by >= 0"}) - public synchronized void incrementNumMismatches(final double by) { + public final synchronized void incrementNumMismatches(final double by) { numMismatches += by; empiricalQuality = UNINITIALIZED; } @Requires({"incObservations >= 0", "incMismatches >= 0"}) @Ensures({"numObservations == old(numObservations) + incObservations", "numMismatches == old(numMismatches) + incMismatches"}) - public synchronized void increment(final double incObservations, final double incMismatches) { - incrementNumObservations(incObservations); - incrementNumMismatches(incMismatches); + public final synchronized void increment(final double incObservations, final double incMismatches) { + numObservations += incObservations; + numMismatches += incMismatches; + empiricalQuality = UNINITIALIZED; } @Ensures({"numObservations == old(numObservations) + 1", "numMismatches >= old(numMismatches)"}) - public synchronized void increment(final boolean isError) { - incrementNumObservations(1); - if ( isError ) - incrementNumMismatches(1); + public final synchronized void increment(final boolean isError) { + increment(1, isError ? 1 : 0.0); } // ------------------------------------------------------------------------------------- @@ -286,7 +261,7 @@ public class RecalDatum { */ @Requires("empiricalQuality == UNINITIALIZED") @Ensures("empiricalQuality != UNINITIALIZED") - private synchronized final void calcEmpiricalQuality() { + private synchronized void calcEmpiricalQuality() { final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate()); empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE); } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java index 7e90d98b9..d4e781fdd 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java @@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.recalibration.covariates.*; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.R.RScriptExecutor; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.PluginManager; @@ -257,11 +257,10 @@ public class RecalUtils { } } - private static List generateReportTables(final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) { + private static List generateReportTables(final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, boolean sortByCols) { List result = new LinkedList(); int reportTableIndex = 0; int rowIndex = 0; - final Map covariateNameMap = new HashMap(requestedCovariates.length); for (final Covariate covariate : requestedCovariates) covariateNameMap.put(covariate, parseCovariateName(covariate)); @@ -287,7 +286,11 @@ public class RecalUtils { final GATKReportTable reportTable; if (tableIndex <= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) { - reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size()); + if(sortByCols) { + reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size(), false, true); + } else { + reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size()); + } for (final Pair columnName : columnNames) reportTable.addColumn(columnName.getFirst(), columnName.getSecond()); rowIndex = 0; // reset the row index since we're starting with a new table @@ -334,8 +337,8 @@ public class RecalUtils { return covariate.getClass().getSimpleName().split("Covariate")[0]; } - public static void outputRecalibrationReport(final RecalibrationArgumentCollection RAC, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates) { - outputRecalibrationReport(RAC.generateReportTable(covariateNames(requestedCovariates)), quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), RAC.RECAL_TABLE); + public static void outputRecalibrationReport(final RecalibrationArgumentCollection RAC, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, boolean sortByCols) { + outputRecalibrationReport(RAC.generateReportTable(covariateNames(requestedCovariates)), quantizationInfo.generateReportTable(sortByCols), generateReportTables(recalibrationTables, requestedCovariates, sortByCols), RAC.RECAL_TABLE); } /** @@ -351,8 +354,8 @@ public class RecalUtils { return Utils.join(",", names); } - public static void outputRecalibrationReport(final GATKReportTable argumentTable, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile) { - outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(), generateReportTables(recalibrationTables, requestedCovariates), outputFile); + public static void outputRecalibrationReport(final GATKReportTable argumentTable, final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final PrintStream outputFile, boolean sortByCols) { + outputRecalibrationReport(argumentTable, quantizationInfo.generateReportTable(sortByCols), generateReportTables(recalibrationTables, requestedCovariates, sortByCols), outputFile); } private static void outputRecalibrationReport(final GATKReportTable argumentTable, final GATKReportTable quantizationTable, final List recalTables, final PrintStream outputFile) { @@ -766,4 +769,28 @@ public class RecalUtils { return base; } } + + /** + * Combines the recalibration data for table1 and table2 into table1 + * + * Note that table1 is the destination, so it is modified + * + * @param table1 the destination table to merge table2 into + * @param table2 the source table to merge into table1 + */ + public static void combineTables(final NestedIntegerArray table1, final NestedIntegerArray table2) { + if ( table1 == null ) throw new IllegalArgumentException("table1 cannot be null"); + if ( table2 == null ) throw new IllegalArgumentException("table2 cannot be null"); + if ( ! Arrays.equals(table1.getDimensions(), table2.getDimensions())) + throw new IllegalArgumentException("Table1 " + Utils.join(",", table1.getDimensions()) + " not equal to " + Utils.join(",", table2.getDimensions())); + + for (final NestedIntegerArray.Leaf row : table2.getAllLeaves()) { + final RecalDatum myDatum = table1.get(row.keys); + + if (myDatum == null) + table1.put(row.value, row.keys); + else + myDatum.combine(row.value); + } + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java index 527306c85..4ff17f302 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java @@ -81,7 +81,7 @@ public class RecalibrationReport { /** * Counts the number of unique read groups in the table * - * @param reportTable the GATKReport table containing data for this table + * @param reportTable the GATKReport table containing data for this table * @return the number of unique read groups */ private int countReadGroups(final GATKReportTable reportTable) { @@ -105,19 +105,10 @@ public class RecalibrationReport { * @param other the recalibration report to combine with this one */ public void combine(final RecalibrationReport other) { - for ( int tableIndex = 0; tableIndex < recalibrationTables.numTables(); tableIndex++ ) { final NestedIntegerArray myTable = recalibrationTables.getTable(tableIndex); final NestedIntegerArray otherTable = other.recalibrationTables.getTable(tableIndex); - - for (final NestedIntegerArray.Leaf row : otherTable.getAllLeaves()) { - final RecalDatum myDatum = myTable.get(row.keys); - - if (myDatum == null) - myTable.put((RecalDatum)row.value, row.keys); - else - myDatum.combine((RecalDatum)row.value); - } + RecalUtils.combineTables(myTable, otherTable); } } @@ -304,6 +295,9 @@ public class RecalibrationReport { else if (argument.equals("binary_tag_name")) RAC.BINARY_TAG_NAME = (value == null) ? null : (String) value; + + else if (argument.equals("sort_by_all_columns")) + RAC.SORT_BY_ALL_COLUMNS = Boolean.parseBoolean((String) value); } return RAC; @@ -318,7 +312,7 @@ public class RecalibrationReport { } public void output(PrintStream output) { - RecalUtils.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output); + RecalUtils.outputRecalibrationReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, output, RAC.SORT_BY_ALL_COLUMNS); } public RecalibrationArgumentCollection getRAC() { diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java index 0dd510245..3f968d7f6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationTables.java @@ -25,11 +25,13 @@ package org.broadinstitute.sting.utils.recalibration; +import com.google.java.contract.Ensures; import org.broadinstitute.sting.utils.collections.LoggingNestedIntegerArray; import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import java.io.PrintStream; +import java.util.ArrayList; /** * Utility class to facilitate on-the-fly base quality score recalibration. @@ -38,8 +40,7 @@ import java.io.PrintStream; * Date: 6/20/12 */ -public class RecalibrationTables { - +public final class RecalibrationTables { public enum TableType { READ_GROUP_TABLE(0), QUALITY_SCORE_TABLE(1), @@ -52,49 +53,82 @@ public class RecalibrationTables { } } - private final NestedIntegerArray[] tables; + private final ArrayList> tables; + private final int qualDimension; + private final int eventDimension = EventType.values().length; + private final int numReadGroups; + private final PrintStream log; public RecalibrationTables(final Covariate[] covariates) { this(covariates, covariates[TableType.READ_GROUP_TABLE.index].maximumKeyValue() + 1, null); } - public RecalibrationTables(final Covariate[] covariates, final PrintStream log) { - this(covariates, covariates[TableType.READ_GROUP_TABLE.index].maximumKeyValue() + 1, log); - } - public RecalibrationTables(final Covariate[] covariates, final int numReadGroups) { this(covariates, numReadGroups, null); } public RecalibrationTables(final Covariate[] covariates, final int numReadGroups, final PrintStream log) { - tables = new NestedIntegerArray[covariates.length]; + tables = new ArrayList>(covariates.length); + for ( int i = 0; i < covariates.length; i++ ) + tables.add(i, null); // initialize so we can set below - final int qualDimension = covariates[TableType.QUALITY_SCORE_TABLE.index].maximumKeyValue() + 1; - final int eventDimension = EventType.values().length; + qualDimension = covariates[TableType.QUALITY_SCORE_TABLE.index].maximumKeyValue() + 1; + this.numReadGroups = numReadGroups; + this.log = log; + + tables.set(TableType.READ_GROUP_TABLE.index, + log == null ? new NestedIntegerArray(numReadGroups, eventDimension) : + new LoggingNestedIntegerArray(log, "READ_GROUP_TABLE", numReadGroups, eventDimension)); + + tables.set(TableType.QUALITY_SCORE_TABLE.index, makeQualityScoreTable()); - tables[TableType.READ_GROUP_TABLE.index] = log == null ? new NestedIntegerArray(numReadGroups, eventDimension) : - new LoggingNestedIntegerArray(log, "READ_GROUP_TABLE", numReadGroups, eventDimension); - tables[TableType.QUALITY_SCORE_TABLE.index] = log == null ? new NestedIntegerArray(numReadGroups, qualDimension, eventDimension) : - new LoggingNestedIntegerArray(log, "QUALITY_SCORE_TABLE", numReadGroups, qualDimension, eventDimension); for (int i = TableType.OPTIONAL_COVARIATE_TABLES_START.index; i < covariates.length; i++) - tables[i] = log == null ? new NestedIntegerArray(numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension) : - new LoggingNestedIntegerArray(log, String.format("OPTIONAL_COVARIATE_TABLE_%d", i - TableType.OPTIONAL_COVARIATE_TABLES_START.index + 1), - numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension); + tables.set(i, + log == null ? new NestedIntegerArray(numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension) : + new LoggingNestedIntegerArray(log, String.format("OPTIONAL_COVARIATE_TABLE_%d", i - TableType.OPTIONAL_COVARIATE_TABLES_START.index + 1), + numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension)); } + @Ensures("result != null") public NestedIntegerArray getReadGroupTable() { - return (NestedIntegerArray)tables[TableType.READ_GROUP_TABLE.index]; + return getTable(TableType.READ_GROUP_TABLE.index); } + @Ensures("result != null") public NestedIntegerArray getQualityScoreTable() { - return (NestedIntegerArray)tables[TableType.QUALITY_SCORE_TABLE.index]; + return getTable(TableType.QUALITY_SCORE_TABLE.index); } + @Ensures("result != null") public NestedIntegerArray getTable(final int index) { - return (NestedIntegerArray)tables[index]; + return tables.get(index); } + @Ensures("result >= 0") public int numTables() { - return tables.length; + return tables.size(); + } + + /** + * Allocate a new quality score table, based on requested parameters + * in this set of tables, without any data in it. The return result + * of this table is suitable for acting as a thread-local cache + * for quality score values + * @return a newly allocated, empty read group x quality score table + */ + public NestedIntegerArray makeQualityScoreTable() { + return log == null + ? new NestedIntegerArray(numReadGroups, qualDimension, eventDimension) + : new LoggingNestedIntegerArray(log, "QUALITY_SCORE_TABLE", numReadGroups, qualDimension, eventDimension); + } + + /** + * Merge in the quality score table information from qualityScoreTable into this + * recalibration table's quality score table. + * + * @param qualityScoreTable the quality score table we want to merge in + */ + public void combineQualityScoreTable(final NestedIntegerArray qualityScoreTable) { + RecalUtils.combineTables(getQualityScoreTable(), qualityScoreTable); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java index 5e470b35f..6619c24eb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ContextCovariate.java @@ -26,13 +26,13 @@ package org.broadinstitute.sting.utils.recalibration.covariates; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.recalibration.ReadCovariates; import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.ArrayList; @@ -99,9 +99,21 @@ public class ContextCovariate implements StandardCovariate { final ArrayList indelKeys = contextWith(bases, indelsContextSize, indelsKeyMask); final int readLength = bases.length; + + // this is necessary to ensure that we don't keep historical data in the ReadCovariates values + // since the context covariate may not span the entire set of values in read covariates + // due to the clipping of the low quality bases + if ( readLength != originalBases.length ) { + // don't both zeroing out if we are going to overwrite the whole array + for ( int i = 0; i < originalBases.length; i++ ) + // this base has been clipped off, so zero out the covariate values here + values.addCovariate(0, 0, 0, i); + } + for (int i = 0; i < readLength; i++) { + final int readOffset = (negativeStrand ? readLength - i - 1 : i); final int indelKey = indelKeys.get(i); - values.addCovariate(mismatchKeys.get(i), indelKey, indelKey, (negativeStrand ? readLength - i - 1 : i)); + values.addCovariate(mismatchKeys.get(i), indelKey, indelKey, readOffset); } // put the original bases back in diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java index a9b6c7152..6bff833e4 100755 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.utils.recalibration.covariates; import org.broadinstitute.sting.utils.recalibration.ReadCovariates; import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java index 29c15adf7..47f11312a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/ReadGroupCovariate.java @@ -1,11 +1,13 @@ package org.broadinstitute.sting.utils.recalibration.covariates; -import org.broadinstitute.sting.utils.recalibration.ReadCovariates; import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.ReadCovariates; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.HashMap; +import java.util.Map; +import java.util.Set; /* * Copyright (c) 2009 The Broad Institute @@ -77,6 +79,14 @@ public class ReadGroupCovariate implements RequiredCovariate { return keyForReadGroup((String) value); } + /** + * Get the mapping from read group names to integer key values for all read groups in this covariate + * @return a set of mappings from read group names -> integer key values + */ + public Set> getKeyMap() { + return readGroupLookupTable.entrySet(); + } + private int keyForReadGroup(final String readGroupId) { // Rather than synchronize this entire method (which would be VERY expensive for walkers like the BQSR), // synchronize only the table updates. diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java index d4e4ab65e..36352f806 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java @@ -1,11 +1,9 @@ package org.broadinstitute.sting.utils.recalibration.covariates; import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; -import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.TandemRepeat; -import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.recalibration.ReadCovariates; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import java.util.Arrays; @@ -29,9 +27,9 @@ public class RepeatLengthCovariate implements ExperimentalCovariate { int maxRL = 0; for (int str = 1; str <= 8; str++) { if (i + str <= readBytes.length) { - maxRL = Math.max(maxRL, VariantContextUtils.findNumberofRepetitions( - Arrays.copyOfRange(readBytes,i,i + str), - Arrays.copyOfRange(readBytes,i,readBytes.length) + maxRL = Math.max(maxRL, GATKVariantContextUtils.findNumberofRepetitions( + Arrays.copyOfRange(readBytes, i, i + str), + Arrays.copyOfRange(readBytes, i, readBytes.length) )); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index 585578958..7ef05edd8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -30,7 +30,7 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java index 0859957a3..77cf500f2 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java @@ -188,6 +188,7 @@ public class ArtificialSAMUtils { GATKSAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length); rec.setReadBases(bases); rec.setBaseQualities(qual); + rec.setReadGroup(new GATKSAMReadGroupRecord("x")); if (refIndex == -1) { rec.setReadUnmappedFlag(true); } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java index 849a7ddee..5f70ced92 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java @@ -12,9 +12,6 @@ import org.broadinstitute.sting.utils.NGSPlatform; * */ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord { - - public static final String LANE_TAG = "LN"; - // the SAMReadGroupRecord data we're caching private String mSample = null; private String mPlatform = null; @@ -33,46 +30,14 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord { super(record.getReadGroupId(), record); } - public GATKSAMReadGroupRecord(SAMReadGroupRecord record, NGSPlatform pl) { - super(record.getReadGroupId(), record); - setPlatform(pl.getDefaultPlatform()); - mNGSPlatform = pl; - retrievedPlatform = retrievedNGSPlatform = true; - } - - /////////////////////////////////////////////////////////////////////////////// - // *** The following methods are overloaded to cache the appropriate data ***// - /////////////////////////////////////////////////////////////////////////////// - - public String getSample() { - if ( !retrievedSample ) { - mSample = super.getSample(); - retrievedSample = true; - } - return mSample; - } - - public void setSample(String s) { - super.setSample(s); - mSample = s; - retrievedSample = true; - } - - public String getPlatform() { - if ( !retrievedPlatform ) { - mPlatform = super.getPlatform(); - retrievedPlatform = true; - } - return mPlatform; - } - - public void setPlatform(String s) { - super.setPlatform(s); - mPlatform = s; - retrievedPlatform = true; - retrievedNGSPlatform = false; // recalculate the NGSPlatform - } - + /** + * Get the NGSPlatform enum telling us the platform of this read group + * + * This function call is caching, so subsequent calls to it are free, while + * the first time it's called there's a bit of work to resolve the enum + * + * @return an NGSPlatform enum value + */ public NGSPlatform getNGSPlatform() { if ( ! retrievedNGSPlatform ) { mNGSPlatform = NGSPlatform.fromReadGroupPL(getPlatform()); @@ -82,11 +47,40 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord { return mNGSPlatform; } - public String getLane() { - return this.getAttribute(LANE_TAG); + /////////////////////////////////////////////////////////////////////////////// + // *** The following methods are overloaded to cache the appropriate data ***// + /////////////////////////////////////////////////////////////////////////////// + + @Override + public String getSample() { + if ( !retrievedSample ) { + mSample = super.getSample(); + retrievedSample = true; + } + return mSample; } - - public void setLane(String lane) { - this.setAttribute(LANE_TAG, lane); + + @Override + public void setSample(String s) { + super.setSample(s); + mSample = s; + retrievedSample = true; + } + + @Override + public String getPlatform() { + if ( !retrievedPlatform ) { + mPlatform = super.getPlatform(); + retrievedPlatform = true; + } + return mPlatform; + } + + @Override + public void setPlatform(String s) { + super.setPlatform(s); + mPlatform = s; + retrievedPlatform = true; + retrievedNGSPlatform = false; // recalculate the NGSPlatform } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 6c7a162f8..beadead0a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -25,9 +25,9 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.*; -import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.recalibration.EventType; import java.util.Arrays; import java.util.HashMap; @@ -56,6 +56,12 @@ public class GATKSAMRecord extends BAMRecord { public static final String BQSR_BASE_INSERTION_QUALITIES = "BI"; // base qualities for insertions public static final String BQSR_BASE_DELETION_QUALITIES = "BD"; // base qualities for deletions + /** + * The default quality score for an insertion or deletion, if + * none are provided for this read. + */ + public static final byte DEFAULT_INSERTION_DELETION_QUAL = (byte)45; + // the SAMRecord data we're caching private String mReadString = null; private GATKSAMReadGroupRecord mReadGroup = null; @@ -141,16 +147,36 @@ public class GATKSAMRecord extends BAMRecord { mReadString = s; } + /** + * Get the GATKSAMReadGroupRecord of this read + * @return a non-null GATKSAMReadGroupRecord + */ @Override public GATKSAMReadGroupRecord getReadGroup() { - if ( !retrievedReadGroup ) { - SAMReadGroupRecord tempReadGroup = super.getReadGroup(); - mReadGroup = (tempReadGroup == null ? null : new GATKSAMReadGroupRecord(tempReadGroup)); + if ( ! retrievedReadGroup ) { + final SAMReadGroupRecord rg = super.getReadGroup(); + + // three cases: rg may be null (no rg, rg may already be a GATKSAMReadGroupRecord, or it may be + // a regular SAMReadGroupRecord in which case we have to make it a GATKSAMReadGroupRecord + if ( rg == null ) + mReadGroup = null; + else if ( rg instanceof GATKSAMReadGroupRecord ) + mReadGroup = (GATKSAMReadGroupRecord)rg; + else + mReadGroup = new GATKSAMReadGroupRecord(rg); + retrievedReadGroup = true; } return mReadGroup; } + public void setReadGroup( final GATKSAMReadGroupRecord readGroup ) { + mReadGroup = readGroup; + retrievedReadGroup = true; + setAttribute("RG", mReadGroup.getId()); // todo -- this should be standardized, but we don't have access to SAMTagUtils! + } + + @Override public int hashCode() { return super.hashCode(); @@ -229,7 +255,7 @@ public class GATKSAMRecord extends BAMRecord { byte [] quals = getExistingBaseInsertionQualities(); if( quals == null ) { quals = new byte[getBaseQualities().length]; - Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will + Arrays.fill(quals, DEFAULT_INSERTION_DELETION_QUAL); // Some day in the future when base insertion and base deletion quals exist the samtools API will // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45 } return quals; @@ -245,7 +271,7 @@ public class GATKSAMRecord extends BAMRecord { byte[] quals = getExistingBaseDeletionQualities(); if( quals == null ) { quals = new byte[getBaseQualities().length]; - Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will + Arrays.fill(quals, DEFAULT_INSERTION_DELETION_QUAL); // Some day in the future when base insertion and base deletion quals exist the samtools API will // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45 } return quals; @@ -259,12 +285,6 @@ public class GATKSAMRecord extends BAMRecord { return getReadGroup().getNGSPlatform(); } - public void setReadGroup( final GATKSAMReadGroupRecord readGroup ) { - mReadGroup = readGroup; - retrievedReadGroup = true; - setAttribute("RG", mReadGroup.getId()); // todo -- this should be standardized, but we don't have access to SAMTagUtils! - } - /////////////////////////////////////////////////////////////////////////////// // *** ReduceReads functions ***// /////////////////////////////////////////////////////////////////////////////// @@ -397,9 +417,6 @@ public class GATKSAMRecord extends BAMRecord { else if (op != CigarOperator.HARD_CLIP) break; } - - if ( softStart < 1 ) - softStart = 1; } return softStart; } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index bd908727f..263cd9bd1 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -226,7 +226,7 @@ public class ReadUtils { * @param read the read to test * @return checks the read group tag PL for the default 454 tag */ - public static boolean is454Read(SAMRecord read) { + public static boolean is454Read(GATKSAMRecord read) { return NGSPlatform.fromRead(read) == NGSPlatform.LS454; } @@ -236,7 +236,7 @@ public class ReadUtils { * @param read the read to test * @return checks the read group tag PL for the default ion tag */ - public static boolean isIonRead(SAMRecord read) { + public static boolean isIonRead(GATKSAMRecord read) { return NGSPlatform.fromRead(read) == NGSPlatform.ION_TORRENT; } @@ -246,7 +246,7 @@ public class ReadUtils { * @param read the read to test * @return checks the read group tag PL for the default SOLiD tag */ - public static boolean isSOLiDRead(SAMRecord read) { + public static boolean isSOLiDRead(GATKSAMRecord read) { return NGSPlatform.fromRead(read) == NGSPlatform.SOLID; } @@ -256,7 +256,7 @@ public class ReadUtils { * @param read the read to test * @return checks the read group tag PL for the default SLX tag */ - public static boolean isIlluminaRead(SAMRecord read) { + public static boolean isIlluminaRead(GATKSAMRecord read) { return NGSPlatform.fromRead(read) == NGSPlatform.ILLUMINA; } @@ -268,7 +268,7 @@ public class ReadUtils { * @param name the upper-cased platform name to test * @return whether or not name == PL tag in the read group of read */ - public static boolean isPlatformRead(SAMRecord read, String name) { + public static boolean isPlatformRead(GATKSAMRecord read, String name) { SAMReadGroupRecord readGroup = read.getReadGroup(); if (readGroup != null) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java new file mode 100755 index 000000000..4e394ace5 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.variant; + +import org.broad.tribble.Feature; +import org.broad.tribble.FeatureCodecHeader; +import org.broad.tribble.readers.PositionalBufferedStream; +import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.*; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.*; + +/** + * A set of GATK-specific static utility methods for common operations on VCF files/records. + */ +public class GATKVCFUtils { + + /** + * Constructor access disallowed...static utility methods only! + */ + private GATKVCFUtils() { } + + public static Map getVCFHeadersFromRods(GenomeAnalysisEngine toolkit, List> rodBindings) { + // Collect the eval rod names + final Set names = new TreeSet(); + for ( final RodBinding evalRod : rodBindings ) + names.add(evalRod.getName()); + return getVCFHeadersFromRods(toolkit, names); + } + + public static Map getVCFHeadersFromRods(GenomeAnalysisEngine toolkit) { + return getVCFHeadersFromRods(toolkit, (Collection)null); + } + + public static Map getVCFHeadersFromRods(GenomeAnalysisEngine toolkit, Collection rodNames) { + Map data = new HashMap(); + + // iterate to get all of the sample names + List dataSources = toolkit.getRodDataSources(); + for ( ReferenceOrderedDataSource source : dataSources ) { + // ignore the rod if it's not in our list + if ( rodNames != null && !rodNames.contains(source.getName()) ) + continue; + + if ( source.getHeader() != null && source.getHeader() instanceof VCFHeader ) + data.put(source.getName(), (VCFHeader)source.getHeader()); + } + + return data; + } + + public static Map getVCFHeadersFromRodPrefix(GenomeAnalysisEngine toolkit,String prefix) { + Map data = new HashMap(); + + // iterate to get all of the sample names + List dataSources = toolkit.getRodDataSources(); + for ( ReferenceOrderedDataSource source : dataSources ) { + // ignore the rod if lacks the prefix + if ( ! source.getName().startsWith(prefix) ) + continue; + + if ( source.getHeader() != null && source.getHeader() instanceof VCFHeader ) + data.put(source.getName(), (VCFHeader)source.getHeader()); + } + + return data; + } + + /** + * Gets the header fields from all VCF rods input by the user + * + * @param toolkit GATK engine + * + * @return a set of all fields + */ + public static Set getHeaderFields(GenomeAnalysisEngine toolkit) { + return getHeaderFields(toolkit, null); + } + + /** + * Gets the header fields from all VCF rods input by the user + * + * @param toolkit GATK engine + * @param rodNames names of rods to use, or null if we should use all possible ones + * + * @return a set of all fields + */ + public static Set getHeaderFields(GenomeAnalysisEngine toolkit, Collection rodNames) { + + // keep a map of sample name to occurrences encountered + TreeSet fields = new TreeSet(); + + // iterate to get all of the sample names + List dataSources = toolkit.getRodDataSources(); + for ( ReferenceOrderedDataSource source : dataSources ) { + // ignore the rod if it's not in our list + if ( rodNames != null && !rodNames.contains(source.getName()) ) + continue; + + if ( source.getRecordType().equals(VariantContext.class)) { + VCFHeader header = (VCFHeader)source.getHeader(); + if ( header != null ) + fields.addAll(header.getMetaDataInSortedOrder()); + } + } + + return fields; + } + + /** + * Add / replace the contig header lines in the VCFHeader with the information in the GATK engine + * + * @param header the header to update + * @param engine the GATK engine containing command line arguments and the master sequence dictionary + */ + public static VCFHeader withUpdatedContigs(final VCFHeader header, final GenomeAnalysisEngine engine) { + return VCFUtils.withUpdatedContigs(header, engine.getArguments().referenceFile, engine.getMasterSequenceDictionary()); + } + + /** + * Read all of the VCF records from source into memory, returning the header and the VariantContexts + * + * @param source the file to read, must be in VCF4 format + * @return + * @throws java.io.IOException + */ + public static Pair> readVCF(final File source) throws IOException { + // read in the features + final List vcs = new ArrayList(); + final VCFCodec codec = new VCFCodec(); + PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source)); + FeatureCodecHeader header = codec.readHeader(pbs); + pbs.close(); + + pbs = new PositionalBufferedStream(new FileInputStream(source)); + pbs.skip(header.getHeaderEnd()); + + final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue(); + + while ( ! pbs.isDone() ) { + final VariantContext vc = codec.decode(pbs); + if ( vc != null ) + vcs.add(vc); + } + + return new Pair>(vcfHeader, vcs); + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java new file mode 100644 index 000000000..47f766d9b --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java @@ -0,0 +1,450 @@ +package org.broadinstitute.sting.utils.variant; + +import com.google.java.contract.Requires; +import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.variant.variantcontext.*; + +import java.util.*; + +public class GATKVariantContextUtils { + + public static final int DEFAULT_PLOIDY = 2; + public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. + private static final List NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); + + /** + * create a genome location, given a variant context + * @param genomeLocParser parser + * @param vc the variant context + * @return the genomeLoc + */ + public static final GenomeLoc getLocation(GenomeLocParser genomeLocParser,VariantContext vc) { + return genomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd(), true); + } + + /** + * Returns true iff VC is an non-complex indel where every allele represents an expansion or + * contraction of a series of identical bases in the reference. + * + * For example, suppose the ref bases are CTCTCTGA, which includes a 3x repeat of CTCTCT + * + * If VC = -/CT, then this function returns true because the CT insertion matches exactly the + * upcoming reference. + * If VC = -/CTA then this function returns false because the CTA isn't a perfect match + * + * Now consider deletions: + * + * If VC = CT/- then again the same logic applies and this returns true + * The case of CTA/- makes no sense because it doesn't actually match the reference bases. + * + * The logic of this function is pretty simple. Take all of the non-null alleles in VC. For + * each insertion allele of n bases, check if that allele matches the next n reference bases. + * For each deletion allele of n bases, check if this matches the reference bases at n - 2 n, + * as it must necessarily match the first n bases. If this test returns true for all + * alleles you are a tandem repeat, otherwise you are not. + * + * @param vc + * @param refBasesStartingAtVCWithPad not this is assumed to include the PADDED reference + * @return + */ + @Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"}) + public static boolean isTandemRepeat(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) { + final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1); + if ( ! vc.isIndel() ) // only indels are tandem repeats + return false; + + final Allele ref = vc.getReference(); + + for ( final Allele allele : vc.getAlternateAlleles() ) { + if ( ! isRepeatAllele(ref, allele, refBasesStartingAtVCWithoutPad) ) + return false; + } + + // we've passed all of the tests, so we are a repeat + return true; + } + + /** + * + * @param vc + * @param refBasesStartingAtVCWithPad + * @return + */ + @Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"}) + public static Pair,byte[]> getNumTandemRepeatUnits(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) { + final boolean VERBOSE = false; + final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1); + if ( ! vc.isIndel() ) // only indels are tandem repeats + return null; + + final Allele refAllele = vc.getReference(); + final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length()); + + byte[] repeatUnit = null; + final ArrayList lengths = new ArrayList(); + + for ( final Allele allele : vc.getAlternateAlleles() ) { + Pair result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes()); + + final int[] repetitionCount = result.first; + // repetition count = 0 means allele is not a tandem expansion of context + if (repetitionCount[0] == 0 || repetitionCount[1] == 0) + return null; + + if (lengths.size() == 0) { + lengths.add(repetitionCount[0]); // add ref allele length only once + } + lengths.add(repetitionCount[1]); // add this alt allele's length + + repeatUnit = result.second; + if (VERBOSE) { + System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad); + System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0])); + System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1])); + System.out.println("RU:"+new String(repeatUnit)); + } + } + + return new Pair, byte[]>(lengths,repeatUnit); + } + + protected static Pair getNumTandemRepeatUnits(final byte[] refBases, final byte[] altBases, final byte[] remainingRefContext) { + /* we can't exactly apply same logic as in basesAreRepeated() to compute tandem unit and number of repeated units. + Consider case where ref =ATATAT and we have an insertion of ATAT. Natural description is (AT)3 -> (AT)5. + */ + + byte[] longB; + // find first repeat unit based on either ref or alt, whichever is longer + if (altBases.length > refBases.length) + longB = altBases; + else + longB = refBases; + + // see if non-null allele (either ref or alt, whichever is longer) can be decomposed into several identical tandem units + // for example, -*,CACA needs to first be decomposed into (CA)2 + final int repeatUnitLength = findRepeatedSubstring(longB); + final byte[] repeatUnit = Arrays.copyOf(longB, repeatUnitLength); + + final int[] repetitionCount = new int[2]; +// repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext)); +// repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext)); + int repetitionsInRef = findNumberofRepetitions(repeatUnit,refBases); + repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext))-repetitionsInRef; + repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext))-repetitionsInRef; + + return new Pair(repetitionCount, repeatUnit); + + } + + /** + * Find out if a string can be represented as a tandem number of substrings. + * For example ACTACT is a 2-tandem of ACT, + * but ACTACA is not. + * + * @param bases String to be tested + * @return Length of repeat unit, if string can be represented as tandem of substring (if it can't + * be represented as one, it will be just the length of the input string) + */ + public static int findRepeatedSubstring(byte[] bases) { + + int repLength; + for (repLength=1; repLength <=bases.length; repLength++) { + final byte[] candidateRepeatUnit = Arrays.copyOf(bases,repLength); + boolean allBasesMatch = true; + for (int start = repLength; start < bases.length; start += repLength ) { + // check that remaining of string is exactly equal to repeat unit + final byte[] basePiece = Arrays.copyOfRange(bases,start,start+candidateRepeatUnit.length); + if (!Arrays.equals(candidateRepeatUnit, basePiece)) { + allBasesMatch = false; + break; + } + } + if (allBasesMatch) + return repLength; + } + + return repLength; + } + + /** + * Helper routine that finds number of repetitions a string consists of. + * For example, for string ATAT and repeat unit AT, number of repetitions = 2 + * @param repeatUnit Substring + * @param testString String to test + * @return Number of repetitions (0 if testString is not a concatenation of n repeatUnit's + */ + public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) { + int numRepeats = 0; + for (int start = 0; start < testString.length; start += repeatUnit.length) { + int end = start + repeatUnit.length; + byte[] unit = Arrays.copyOfRange(testString,start, end); + if(Arrays.equals(unit,repeatUnit)) + numRepeats++; + else + return numRepeats; + } + return numRepeats; + } + + /** + * Helper function for isTandemRepeat that checks that allele matches somewhere on the reference + * @param ref + * @param alt + * @param refBasesStartingAtVCWithoutPad + * @return + */ + protected static boolean isRepeatAllele(final Allele ref, final Allele alt, final String refBasesStartingAtVCWithoutPad) { + if ( ! Allele.oneIsPrefixOfOther(ref, alt) ) + return false; // we require one allele be a prefix of another + + if ( ref.length() > alt.length() ) { // we are a deletion + return basesAreRepeated(ref.getBaseString(), alt.getBaseString(), refBasesStartingAtVCWithoutPad, 2); + } else { // we are an insertion + return basesAreRepeated(alt.getBaseString(), ref.getBaseString(), refBasesStartingAtVCWithoutPad, 1); + } + } + + protected static boolean basesAreRepeated(final String l, final String s, final String ref, final int minNumberOfMatches) { + final String potentialRepeat = l.substring(s.length()); // skip s bases + + for ( int i = 0; i < minNumberOfMatches; i++) { + final int start = i * potentialRepeat.length(); + final int end = (i+1) * potentialRepeat.length(); + if ( ref.length() < end ) + return false; // we ran out of bases to test + final String refSub = ref.substring(start, end); + if ( ! refSub.equals(potentialRepeat) ) + return false; // repeat didn't match, fail + } + + return true; // we passed all tests, we matched + } + + /** + * Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs + * + * @param vc variant context with genotype likelihoods + * @return genotypes context + */ + public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) { + return subsetDiploidAlleles(vc, vc.getAlleles(), true); + } + + /** + * Split variant context into its biallelic components if there are more than 2 alleles + * + * For VC has A/B/C alleles, returns A/B and A/C contexts. + * Genotypes are all no-calls now (it's not possible to fix them easily) + * Alleles are right trimmed to satisfy VCF conventions + * + * If vc is biallelic or non-variant it is just returned + * + * Chromosome counts are updated (but they are by definition 0) + * + * @param vc a potentially multi-allelic variant context + * @return a list of bi-allelic (or monomorphic) variant context + */ + public static List splitVariantContextToBiallelics(final VariantContext vc) { + if ( ! vc.isVariant() || vc.isBiallelic() ) + // non variant or biallelics already satisfy the contract + return Collections.singletonList(vc); + else { + final List biallelics = new LinkedList(); + + for ( final Allele alt : vc.getAlternateAlleles() ) { + VariantContextBuilder builder = new VariantContextBuilder(vc); + final List alleles = Arrays.asList(vc.getReference(), alt); + builder.alleles(alleles); + builder.genotypes(subsetDiploidAlleles(vc, alleles, false)); + VariantContextUtils.calculateChromosomeCounts(builder, true); + biallelics.add(reverseTrimAlleles(builder.make())); + } + + return biallelics; + } + } + + /** + * subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately) + * + * @param vc variant context with genotype likelihoods + * @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC *** + * @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs + * @return genotypes + */ + public static GenotypesContext subsetDiploidAlleles(final VariantContext vc, + final List allelesToUse, + final boolean assignGenotypes) { + + // the genotypes with PLs + final GenotypesContext oldGTs = vc.getGenotypes(); + + // samples + final List sampleIndices = oldGTs.getSampleNamesOrderedByName(); + + // the new genotypes to create + final GenotypesContext newGTs = GenotypesContext.create(); + + // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward + final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); + final int numNewAltAlleles = allelesToUse.size() - 1; + + // which PLs should be carried forward? + ArrayList likelihoodIndexesToUse = null; + + // an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles, + // then we can keep the PLs as is; otherwise, we determine which ones to keep + if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) { + likelihoodIndexesToUse = new ArrayList(30); + + final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles]; + for ( int i = 0; i < numOriginalAltAlleles; i++ ) { + if ( allelesToUse.contains(vc.getAlternateAllele(i)) ) + altAlleleIndexToUse[i] = true; + } + + // numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2 + final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + numOriginalAltAlleles, DEFAULT_PLOIDY); + for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) { + final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex); + // consider this entry only if both of the alleles are good + if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) ) + likelihoodIndexesToUse.add(PLindex); + } + } + + // create the new genotypes + for ( int k = 0; k < oldGTs.size(); k++ ) { + final Genotype g = oldGTs.get(sampleIndices.get(k)); + if ( !g.hasLikelihoods() ) { + newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); + continue; + } + + // create the new likelihoods array from the alleles we are allowed to use + final double[] originalLikelihoods = g.getLikelihoods().getAsVector(); + double[] newLikelihoods; + if ( likelihoodIndexesToUse == null ) { + newLikelihoods = originalLikelihoods; + } else { + newLikelihoods = new double[likelihoodIndexesToUse.size()]; + int newIndex = 0; + for ( int oldIndex : likelihoodIndexesToUse ) + newLikelihoods[newIndex++] = originalLikelihoods[oldIndex]; + + // might need to re-normalize + newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true); + } + + // if there is no mass on the (new) likelihoods, then just no-call the sample + if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) { + newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); + } + else { + final GenotypeBuilder gb = new GenotypeBuilder(g); + + if ( numNewAltAlleles == 0 ) + gb.noPL(); + else + gb.PL(newLikelihoods); + + // if we weren't asked to assign a genotype, then just no-call the sample + if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) { + gb.alleles(NO_CALL_ALLELES); + } + else { + // find the genotype with maximum likelihoods + int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods); + GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex); + + gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2))); + if ( numNewAltAlleles != 0 ) gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods)); + } + newGTs.add(gb.make()); + } + } + + return newGTs; + } + + public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) { + + // see whether we need to trim common reference base from all alleles + final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false); + if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 ) + return inputVC; + + final List alleles = new ArrayList(); + final GenotypesContext genotypes = GenotypesContext.create(); + final Map originalToTrimmedAlleleMap = new HashMap(); + + for (final Allele a : inputVC.getAlleles()) { + if (a.isSymbolic()) { + alleles.add(a); + originalToTrimmedAlleleMap.put(a, a); + } else { + // get bases for current allele and create a new one with trimmed bases + final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent); + final Allele trimmedAllele = Allele.create(newBases, a.isReference()); + alleles.add(trimmedAllele); + originalToTrimmedAlleleMap.put(a, trimmedAllele); + } + } + + // now we can recreate new genotypes with trimmed alleles + for ( final Genotype genotype : inputVC.getGenotypes() ) { + final List originalAlleles = genotype.getAlleles(); + final List trimmedAlleles = new ArrayList(); + for ( final Allele a : originalAlleles ) { + if ( a.isCalled() ) + trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); + else + trimmedAlleles.add(Allele.NO_CALL); + } + genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make()); + } + + return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make(); + } + + public static int computeReverseClipping(final List unclippedAlleles, + final byte[] ref, + final int forwardClipping, + final boolean allowFullClip) { + int clipping = 0; + boolean stillClipping = true; + + while ( stillClipping ) { + for ( final Allele a : unclippedAlleles ) { + if ( a.isSymbolic() ) + continue; + + // we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong + // position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine). + if ( a.length() - clipping == 0 ) + return clipping - (allowFullClip ? 0 : 1); + + if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) { + stillClipping = false; + } + else if ( ref.length == clipping ) { + if ( allowFullClip ) + stillClipping = false; + else + return -1; + } + else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) { + stillClipping = false; + } + } + if ( stillClipping ) + clipping++; + } + + return clipping; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/variant/bcf2/BCF2Codec.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java rename to public/java/src/org/broadinstitute/variant/bcf2/BCF2Codec.java index c221b8fba..b8bc1be6b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/variant/bcf2/BCF2Codec.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.codecs.bcf2; +package org.broadinstitute.variant.bcf2; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; @@ -30,12 +30,11 @@ import org.apache.log4j.Logger; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.FeatureCodecHeader; +import org.broad.tribble.TribbleException; import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.PositionalBufferedStream; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.vcf.*; +import org.broadinstitute.variant.variantcontext.*; import java.io.ByteArrayInputStream; import java.io.FileInputStream; @@ -127,7 +126,7 @@ public final class BCF2Codec implements FeatureCodec { createLazyGenotypesDecoder(info, builder); return builder.fullyDecoded(true).make(); } catch ( IOException e ) { - throw new UserException.CouldNotReadInputFile("Failed to read BCF file", e); + throw new TribbleException("Failed to read BCF file", e); } } @@ -166,7 +165,7 @@ public final class BCF2Codec implements FeatureCodec { this.header = (VCFHeader)headerParser.readHeader(headerReader); bps.close(); } catch ( IOException e ) { - throw new UserException.CouldNotReadInputFile("I/O error while reading BCF2 header"); + throw new TribbleException("I/O error while reading BCF2 header"); } // create the config offsets @@ -271,8 +270,8 @@ public final class BCF2Codec implements FeatureCodec { final int nSamples = nFormatSamples & 0x00FFFFF; if ( header.getNGenotypeSamples() != nSamples ) - error("GATK currently doesn't support reading BCF2 files with " + - "different numbers of samples per record. Saw " + header.getNGenotypeSamples() + + error("Reading BCF2 files with different numbers of samples per record " + + "is not currently supported. Saw " + header.getNGenotypeSamples() + " samples in header but have a record with " + nSamples + " samples"); decodeID(builder); @@ -494,6 +493,6 @@ public final class BCF2Codec implements FeatureCodec { } private void error(final String message) throws RuntimeException { - throw new UserException.MalformedBCF2(String.format("%s, at record %d with position %d:", message, recordNo, pos)); + throw new TribbleException(String.format("%s, at record %d with position %d:", message, recordNo, pos)); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java b/public/java/src/org/broadinstitute/variant/bcf2/BCF2Decoder.java similarity index 94% rename from public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java rename to public/java/src/org/broadinstitute/variant/bcf2/BCF2Decoder.java index 05ba2aa1f..ca5975ab0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java +++ b/public/java/src/org/broadinstitute/variant/bcf2/BCF2Decoder.java @@ -22,14 +22,13 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.codecs.bcf2; +package org.broadinstitute.variant.bcf2; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broad.tribble.FeatureCodec; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broad.tribble.TribbleException; import java.io.ByteArrayInputStream; import java.io.IOException; @@ -69,7 +68,7 @@ public final class BCF2Decoder { * @return */ public void readNextBlock(final int blockSizeInBytes, final InputStream stream) { - if ( blockSizeInBytes < 0 ) throw new UserException.MalformedBCF2("Invalid block size " + blockSizeInBytes); + if ( blockSizeInBytes < 0 ) throw new TribbleException("Invalid block size " + blockSizeInBytes); setRecordBytes(readRecordBytes(blockSizeInBytes, stream)); } @@ -84,7 +83,7 @@ public final class BCF2Decoder { final int bytesRead = (int)stream.skip(blockSizeInBytes); validateReadBytes(bytesRead, 1, blockSizeInBytes); } catch ( IOException e ) { - throw new UserException.CouldNotReadInputFile("I/O error while reading BCF2 file", e); + throw new TribbleException("I/O error while reading BCF2 file", e); } this.recordBytes = null; this.recordStream = null; @@ -175,7 +174,7 @@ public final class BCF2Decoder { case INT32: return value; case FLOAT: return rawFloatToFloat(value); case CHAR: return value & 0xFF; // TODO -- I cannot imagine why we'd get here, as string needs to be special cased - default: throw new ReviewedStingException("BCF2 codec doesn't know how to decode type " + type ); + default: throw new TribbleException("BCF2 codec doesn't know how to decode type " + type ); } } } @@ -205,7 +204,7 @@ public final class BCF2Decoder { return BCF2Utils.isCollapsedString(s) ? BCF2Utils.explodeStringList(s) : s; } } catch ( IOException e ) { - throw new ReviewedStingException("readByte failure", e); + throw new TribbleException("readByte failure", e); } } @@ -348,7 +347,7 @@ public final class BCF2Decoder { validateReadBytes(bytesRead, nReadAttempts, blockSizeInBytes); } catch ( IOException e ) { - throw new UserException.CouldNotReadInputFile("I/O error while reading BCF2 file", e); + throw new TribbleException("I/O error while reading BCF2 file", e); } return record; @@ -365,7 +364,7 @@ public final class BCF2Decoder { assert expected >= 0; if ( actuallyRead < expected ) { - throw new UserException.MalformedBCF2( + throw new TribbleException( String.format("Failed to read next complete record: expected %d bytes but read only %d after %d iterations", expected, actuallyRead, nReadAttempts)); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java b/public/java/src/org/broadinstitute/variant/bcf2/BCF2GenotypeFieldDecoders.java similarity index 97% rename from public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java rename to public/java/src/org/broadinstitute/variant/bcf2/BCF2GenotypeFieldDecoders.java index e4ae96262..9355d56de 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java +++ b/public/java/src/org/broadinstitute/variant/bcf2/BCF2GenotypeFieldDecoders.java @@ -22,15 +22,15 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.codecs.bcf2; +package org.broadinstitute.variant.bcf2; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder; +import org.broadinstitute.variant.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.GenotypeBuilder; import java.io.IOException; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java b/public/java/src/org/broadinstitute/variant/bcf2/BCF2LazyGenotypesDecoder.java similarity index 89% rename from public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java rename to public/java/src/org/broadinstitute/variant/bcf2/BCF2LazyGenotypesDecoder.java index 46b1fa6c1..c2d893b7f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java +++ b/public/java/src/org/broadinstitute/variant/bcf2/BCF2LazyGenotypesDecoder.java @@ -22,13 +22,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.codecs.bcf2; +package org.broadinstitute.variant.bcf2; import com.google.java.contract.Requires; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broad.tribble.TribbleException; +import org.broadinstitute.variant.variantcontext.*; import java.io.IOException; import java.util.*; @@ -85,7 +84,7 @@ public class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser try { fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders); } catch ( ClassCastException e ) { - throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field + throw new TribbleException("BUG: expected encoding of field " + field + " inconsistent with the value observed in the decoded value"); } } @@ -96,7 +95,7 @@ public class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset()); } catch ( IOException e ) { - throw new ReviewedStingException("Unexpected IOException parsing already read genotypes data block", e); + throw new TribbleException("Unexpected IOException parsing already read genotypes data block", e); } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java b/public/java/src/org/broadinstitute/variant/bcf2/BCF2Type.java similarity index 99% rename from public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java rename to public/java/src/org/broadinstitute/variant/bcf2/BCF2Type.java index 1162a5d1e..b24f2a582 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java +++ b/public/java/src/org/broadinstitute/variant/bcf2/BCF2Type.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.codecs.bcf2; +package org.broadinstitute.variant.bcf2; import com.google.java.contract.Requires; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/variant/bcf2/BCF2Utils.java similarity index 96% rename from public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java rename to public/java/src/org/broadinstitute/variant/bcf2/BCF2Utils.java index 2ac916db1..30d6b4ee4 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java +++ b/public/java/src/org/broadinstitute/variant/bcf2/BCF2Utils.java @@ -22,12 +22,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.codecs.bcf2; +package org.broadinstitute.variant.bcf2; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broad.tribble.TribbleException; +import org.broadinstitute.variant.vcf.*; import java.io.*; import java.util.*; @@ -214,7 +214,7 @@ public final class BCF2Utils { return potentialType; } - throw new ReviewedStingException("Integer cannot be encoded in allowable range of even INT32: " + value); + throw new TribbleException("Integer cannot be encoded in allowable range of even INT32: " + value); } @Ensures("result.isIntegerType()") @@ -249,7 +249,7 @@ public final class BCF2Utils { case INT8: return t2; case INT16: return t2 == BCF2Type.INT32 ? t2 : t1; case INT32: return t1; - default: throw new ReviewedStingException("BUG: unexpected BCF2Type " + t1); + default: throw new TribbleException("BUG: unexpected BCF2Type " + t1); } } @@ -262,7 +262,7 @@ public final class BCF2Utils { case INT8: break; case INT16: maxType = BCF2Type.INT16; break; case INT32: return BCF2Type.INT32; // fast path for largest possible value - default: throw new ReviewedStingException("Unexpected integer type " + type1 ); + default: throw new TribbleException("Unexpected integer type " + type1 ); } } return maxType; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java b/public/java/src/org/broadinstitute/variant/bcf2/BCFVersion.java similarity index 97% rename from public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java rename to public/java/src/org/broadinstitute/variant/bcf2/BCFVersion.java index 742da7c0c..5e1915c22 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCFVersion.java +++ b/public/java/src/org/broadinstitute/variant/bcf2/BCFVersion.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.codecs.bcf2; +package org.broadinstitute.variant.bcf2; import java.io.IOException; import java.io.InputStream; diff --git a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/public/java/src/org/broadinstitute/variant/utils/BaseUtils.java similarity index 96% rename from public/java/src/org/broadinstitute/sting/utils/BaseUtils.java rename to public/java/src/org/broadinstitute/variant/utils/BaseUtils.java index 53a49d8b2..4786622b0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java +++ b/public/java/src/org/broadinstitute/variant/utils/BaseUtils.java @@ -1,10 +1,9 @@ -package org.broadinstitute.sting.utils; +package org.broadinstitute.variant.utils; import net.sf.samtools.util.StringUtil; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.Arrays; +import java.util.Random; /** * BaseUtils contains some basic utilities for manipulating nucleotides. @@ -47,6 +46,9 @@ public class BaseUtils { public static final int gIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'G'); public static final int tIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'T'); + // Use a fixed random seed to allow for deterministic results when using random bases + private static final Random randomNumberGen = new Random(47382911L); + /// In genetics, a transition is a mutation changing a purine to another purine nucleotide (A <-> G) or // a pyrimidine to another pyrimidine nucleotide (C <-> T). // Approximately two out of every three single nucleotide polymorphisms (SNPs) are transitions. @@ -185,7 +187,7 @@ public class BaseUtils { */ static public int simpleBaseToBaseIndex(final byte base) { if ( base < 0 || base >= 256 ) - throw new UserException.BadInput("Non-standard bases were encountered in either the input reference or BAM file(s)"); + throw new IllegalArgumentException("Non-standard bases were encountered in either the input reference or BAM file(s)"); return baseIndexMap[base]; } @@ -425,7 +427,7 @@ public class BaseUtils { int randomBaseIndex = excludeBaseIndex; while (randomBaseIndex == excludeBaseIndex) { - randomBaseIndex = GenomeAnalysisEngine.getRandomGenerator().nextInt(4); + randomBaseIndex = randomNumberGen.nextInt(4); } return randomBaseIndex; diff --git a/public/java/src/org/broadinstitute/variant/utils/Utils.java b/public/java/src/org/broadinstitute/variant/utils/Utils.java new file mode 100644 index 000000000..1272429cb --- /dev/null +++ b/public/java/src/org/broadinstitute/variant/utils/Utils.java @@ -0,0 +1,130 @@ +package org.broadinstitute.variant.utils; + +import java.util.Collection; +import java.util.Iterator; + +public class Utils { + + /** + * The smallest log10 value we'll emit from normalizeFromLog10 and other functions + * where the real-space value is 0.0. + */ + public final static double LOG10_P_OF_ZERO = -1000000.0; + + /** + * Returns a string of the form elt1.toString() [sep elt2.toString() ... sep elt.toString()] for a collection of + * elti objects (note there's no actual space between sep and the elti elements). Returns + * "" if collection is empty. If collection contains just elt, then returns elt.toString() + * + * @param separator the string to use to separate objects + * @param objects a collection of objects. the element order is defined by the iterator over objects + * @param the type of the objects + * @return a non-null string + */ + public static String join(final String separator, final Collection objects) { + if (objects.isEmpty()) { // fast path for empty collection + return ""; + } else { + final Iterator iter = objects.iterator(); + final T first = iter.next(); + + if ( ! iter.hasNext() ) // fast path for singleton collections + return first.toString(); + else { // full path for 2+ collection that actually need a join + final StringBuilder ret = new StringBuilder(first.toString()); + while(iter.hasNext()) { + ret.append(separator); + ret.append(iter.next().toString()); + } + return ret.toString(); + } + } + } + + + /** + * normalizes the log10-based array. ASSUMES THAT ALL ARRAY ENTRIES ARE <= 0 (<= 1 IN REAL-SPACE). + * + * @param array the array to be normalized + * @return a newly allocated array corresponding the normalized values in array + */ + public static double[] normalizeFromLog10(double[] array) { + return normalizeFromLog10(array, false); + } + + /** + * normalizes the log10-based array. ASSUMES THAT ALL ARRAY ENTRIES ARE <= 0 (<= 1 IN REAL-SPACE). + * + * @param array the array to be normalized + * @param takeLog10OfOutput if true, the output will be transformed back into log10 units + * @return a newly allocated array corresponding the normalized values in array, maybe log10 transformed + */ + public static double[] normalizeFromLog10(double[] array, boolean takeLog10OfOutput) { + return normalizeFromLog10(array, takeLog10OfOutput, false); + } + + /** + * See #normalizeFromLog10 but with the additional option to use an approximation that keeps the calculation always in log-space + * + * @param array + * @param takeLog10OfOutput + * @param keepInLogSpace + * + * @return + */ + public static double[] normalizeFromLog10(double[] array, boolean takeLog10OfOutput, boolean keepInLogSpace) { + // for precision purposes, we need to add (or really subtract, since they're + // all negative) the largest value; also, we need to convert to normal-space. + double maxValue = arrayMax(array); + + // we may decide to just normalize in log space without converting to linear space + if (keepInLogSpace) { + for (int i = 0; i < array.length; i++) { + array[i] -= maxValue; + } + return array; + } + + // default case: go to linear space + double[] normalized = new double[array.length]; + + for (int i = 0; i < array.length; i++) + normalized[i] = Math.pow(10, array[i] - maxValue); + + // normalize + double sum = 0.0; + for (int i = 0; i < array.length; i++) + sum += normalized[i]; + for (int i = 0; i < array.length; i++) { + double x = normalized[i] / sum; + if (takeLog10OfOutput) { + x = Math.log10(x); + if ( x < LOG10_P_OF_ZERO || Double.isInfinite(x) ) + x = array[i] - maxValue; + } + + normalized[i] = x; + } + + return normalized; + } + + public static double arrayMax(final double[] array) { + return array[maxElementIndex(array, array.length)]; + } + + public static int maxElementIndex(final double[] array, final int endIndex) { + if (array == null || array.length == 0) + throw new IllegalArgumentException("Array cannot be null!"); + + int maxI = 0; + for (int i = 1; i < endIndex; i++) { + if (array[i] > array[maxI]) + maxI = i; + } + + return maxI; + } +} + + diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java b/public/java/src/org/broadinstitute/variant/variantcontext/Allele.java similarity index 99% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java rename to public/java/src/org/broadinstitute/variant/variantcontext/Allele.java index 85c925204..b231019b8 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/Allele.java @@ -1,6 +1,6 @@ -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import java.util.Arrays; import java.util.Collection; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java b/public/java/src/org/broadinstitute/variant/variantcontext/CommonInfo.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java rename to public/java/src/org/broadinstitute/variant/variantcontext/CommonInfo.java index 127f91677..3cc54384f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/CommonInfo.java @@ -1,7 +1,7 @@ -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFConstants; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java b/public/java/src/org/broadinstitute/variant/variantcontext/FastGenotype.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java rename to public/java/src/org/broadinstitute/variant/variantcontext/FastGenotype.java index 4a7df9da4..8d8a8bbfb 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/FastGenotype.java @@ -22,11 +22,10 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/variant/variantcontext/Genotype.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java rename to public/java/src/org/broadinstitute/variant/variantcontext/Genotype.java index 67e80cf3c..4f83fa7be 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/Genotype.java @@ -1,12 +1,11 @@ -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; import com.google.java.contract.Ensures; import com.google.java.contract.Invariant; import com.google.java.contract.Requires; import org.broad.tribble.util.ParsingUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.variant.vcf.VCFConstants; import java.util.*; @@ -209,7 +208,7 @@ public abstract class Genotype implements Comparable { } if ( observedAllele == null ) - throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null"); + throw new IllegalStateException("BUG: there are no alleles present in this genotype but the alleles list is not null"); return sawMultipleAlleles ? GenotypeType.HET : observedAllele.isReference() ? GenotypeType.HOM_REF : GenotypeType.HOM_VAR; } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java b/public/java/src/org/broadinstitute/variant/variantcontext/GenotypeBuilder.java similarity index 99% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java rename to public/java/src/org/broadinstitute/variant/variantcontext/GenotypeBuilder.java index 8fd792d3b..b8af1a305 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/GenotypeBuilder.java @@ -22,14 +22,14 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; import com.google.java.contract.Ensures; import com.google.java.contract.Invariant; import com.google.java.contract.Requires; import org.broad.tribble.util.ParsingUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFConstants; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java b/public/java/src/org/broadinstitute/variant/variantcontext/GenotypeLikelihoods.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java rename to public/java/src/org/broadinstitute/variant/variantcontext/GenotypeLikelihoods.java index 641eb5449..287105dde 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/GenotypeLikelihoods.java @@ -22,15 +22,13 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.broad.tribble.TribbleException; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.variant.utils.Utils; +import org.broadinstitute.variant.vcf.VCFConstants; import java.util.Arrays; import java.util.EnumMap; @@ -156,7 +154,7 @@ public class GenotypeLikelihoods { //Returns null in case of missing likelihoods public EnumMap getAsMap(boolean normalizeFromLog10){ //Make sure that the log10likelihoods are set - double[] likelihoods = normalizeFromLog10 ? MathUtils.normalizeFromLog10(getAsVector()) : getAsVector(); + double[] likelihoods = normalizeFromLog10 ? Utils.normalizeFromLog10(getAsVector()) : getAsVector(); if(likelihoods == null) return null; EnumMap likelihoodsMap = new EnumMap(GenotypeType.class); @@ -216,7 +214,7 @@ public class GenotypeLikelihoods { if (qual < 0) { // QUAL can be negative if the chosen genotype is not the most likely one individually. // In this case, we compute the actual genotype probability and QUAL is the likelihood of it not being the chosen one - double[] normalized = MathUtils.normalizeFromLog10(likelihoods); + double[] normalized = Utils.normalizeFromLog10(likelihoods); double chosenGenotype = normalized[iOfChoosenGenotype]; return Math.log10(1.0 - chosenGenotype); } else { @@ -234,7 +232,7 @@ public class GenotypeLikelihoods { likelihoodsAsVector[i] = Integer.parseInt(strings[i]) / -10.0; } } catch (NumberFormatException e) { - throw new UserException.MalformedVCF("The GL/PL tag contains non-integer values: " + likelihoodsAsString_PLs); + throw new TribbleException("The GL/PL tag contains non-integer values: " + likelihoodsAsString_PLs); } return likelihoodsAsVector; } else @@ -335,7 +333,7 @@ public class GenotypeLikelihoods { // a bit of sanity checking for ( int i = 0; i < cache.length; i++ ) { if ( cache[i] == null ) - throw new ReviewedStingException("BUG: cache entry " + i + " is unexpected null"); + throw new IllegalStateException("BUG: cache entry " + i + " is unexpected null"); } return cache; @@ -422,7 +420,7 @@ public class GenotypeLikelihoods { public static GenotypeLikelihoodsAllelePair getAllelePair(final int PLindex) { // make sure that we've cached enough data if ( PLindex >= PLIndexToAlleleIndex.length ) - throw new ReviewedStingException("GATK limitation: cannot genotype more than " + MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED + " alleles"); + throw new IllegalStateException("Internal limitation: cannot genotype more than " + MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED + " alleles"); return PLIndexToAlleleIndex[PLindex]; } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeType.java b/public/java/src/org/broadinstitute/variant/variantcontext/GenotypeType.java similarity index 96% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeType.java rename to public/java/src/org/broadinstitute/variant/variantcontext/GenotypeType.java index 1e3f43065..53798015e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeType.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/GenotypeType.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; /** * Summary types for Genotype objects diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java b/public/java/src/org/broadinstitute/variant/variantcontext/GenotypesContext.java similarity index 99% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java rename to public/java/src/org/broadinstitute/variant/variantcontext/GenotypesContext.java index f306bac4d..d7239b2f3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/GenotypesContext.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/LazyGenotypesContext.java b/public/java/src/org/broadinstitute/variant/variantcontext/LazyGenotypesContext.java similarity index 99% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/LazyGenotypesContext.java rename to public/java/src/org/broadinstitute/variant/variantcontext/LazyGenotypesContext.java index 1f73ecabd..8ecca17ca 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/LazyGenotypesContext.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/LazyGenotypesContext.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/variant/variantcontext/VariantContext.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java rename to public/java/src/org/broadinstitute/variant/variantcontext/VariantContext.java index 12f9cb20c..f0588f840 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/VariantContext.java @@ -1,12 +1,10 @@ -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; import org.apache.log4j.Logger; import org.broad.tribble.Feature; import org.broad.tribble.TribbleException; import org.broad.tribble.util.ParsingUtils; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.variant.vcf.*; import java.util.*; @@ -1156,7 +1154,7 @@ public class VariantContext implements Feature { // to enable tribble integratio if ( WARN_ABOUT_BAD_END ) logger.warn(message); else - throw new ReviewedStingException(message); + throw new TribbleException(message); } } else { final long length = (stop - start) + 1; @@ -1387,7 +1385,7 @@ public class VariantContext implements Feature { // to enable tribble integratio final int obsSize = decoded instanceof List ? ((List) decoded).size() : 1; final int expSize = format.getCount(this); if ( obsSize != expSize ) { - throw new UserException.MalformedVCFHeader("Discordant field size detected for field " + + throw new TribbleException.InvalidHeader("Discordant field size detected for field " + field + " at " + getChr() + ":" + getStart() + ". Field had " + obsSize + " values " + "but the header says this should have " + expSize + " values based on header record " + format); @@ -1437,17 +1435,17 @@ public class VariantContext implements Feature { // to enable tribble integratio case Flag: final boolean b = Boolean.valueOf(string) || string.equals("1"); if ( b == false ) - throw new UserException.MalformedVCF("VariantContext FLAG fields " + field + " cannot contain false values" + throw new TribbleException("VariantContext FLAG fields " + field + " cannot contain false values" + " as seen at " + getChr() + ":" + getStart()); return b; case String: return string; case Integer: return Integer.valueOf(string); case Float: return Double.valueOf(string); - default: throw new ReviewedStingException("Unexpected type for field" + field); + default: throw new TribbleException("Unexpected type for field" + field); } } } catch (NumberFormatException e) { - throw new UserException.MalformedVCF("Could not decode field " + field + " with value " + string + " of declared type " + format.getType()); + throw new TribbleException("Could not decode field " + field + " with value " + string + " of declared type " + format.getType()); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java b/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextBuilder.java similarity index 93% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java rename to public/java/src/org/broadinstitute/variant/variantcontext/VariantContextBuilder.java index 40ac089ef..bd12b8be5 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextBuilder.java @@ -22,12 +22,10 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; import com.google.java.contract.*; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.variant.vcf.VCFConstants; import java.util.*; @@ -105,7 +103,7 @@ public class VariantContextBuilder { * @param parent Cannot be null */ public VariantContextBuilder(VariantContext parent) { - if ( parent == null ) throw new ReviewedStingException("BUG: VariantContextBuilder parent argument cannot be null in VariantContextBuilder"); + if ( parent == null ) throw new IllegalArgumentException("BUG: VariantContextBuilder parent argument cannot be null in VariantContextBuilder"); this.alleles = parent.alleles; this.attributes = parent.getAttributes(); this.attributesCanBeModified = false; @@ -121,7 +119,7 @@ public class VariantContextBuilder { } public VariantContextBuilder(VariantContextBuilder parent) { - if ( parent == null ) throw new ReviewedStingException("BUG: VariantContext parent argument cannot be null in VariantContextBuilder"); + if ( parent == null ) throw new IllegalArgumentException("BUG: VariantContext parent argument cannot be null in VariantContextBuilder"); this.alleles = parent.alleles; this.attributesCanBeModified = false; this.contig = parent.contig; @@ -384,20 +382,6 @@ public class VariantContextBuilder { return this; } - /** - * Tells us that the resulting VariantContext should have the specified location - * @param loc - * @return - */ - @Requires({"loc.getContig() != null", "loc.getStart() >= 0", "loc.getStop() >= 0"}) - public VariantContextBuilder loc(final GenomeLoc loc) { - this.contig = loc.getContig(); - this.start = loc.getStart(); - this.stop = loc.getStop(); - toValidate.add(VariantContext.Validation.ALLELES); - return this; - } - /** * Tells us that the resulting VariantContext should have the specified contig chr * @param contig @@ -442,8 +426,6 @@ public class VariantContextBuilder { /** * Compute the end position for this VariantContext from the alleles themselves * - * @see VariantContextUtils.computeEndFromAlleles() - * * assigns this builder the stop position computed. * * @param alleles the list of alleles to consider. The reference allele must be the first one diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java similarity index 66% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java rename to public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java index 8b360eb5e..5d9a6d476 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java @@ -21,21 +21,18 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.commons.jexl2.Expression; import org.apache.commons.jexl2.JexlEngine; -import org.apache.commons.lang.ArrayUtils; import org.apache.log4j.Logger; +import org.broad.tribble.TribbleException; import org.broad.tribble.util.popgen.HardyWeinbergCalculation; -import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.variant.utils.Utils; +import org.broadinstitute.variant.vcf.*; import java.io.Serializable; import java.util.*; @@ -50,7 +47,6 @@ public class VariantContextUtils { private static Set MISSING_KEYS_WARNED_ABOUT = new HashSet(); final public static JexlEngine engine = new JexlEngine(); - public static final int DEFAULT_PLOIDY = 2; private final static boolean ASSUME_MISSING_FIELDS_ARE_STRINGS = false; static { @@ -173,7 +169,7 @@ public class VariantContextUtils { return new VCFInfoHeaderLine(field, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Auto-generated string header for " + field); } else - throw new UserException.MalformedVCF("Fully decoding VariantContext requires header line for all fields, but none was found for " + field); + throw new TribbleException("Fully decoding VariantContext requires header line for all fields, but none was found for " + field); } return metaData; } @@ -208,10 +204,10 @@ public class VariantContextUtils { */ public static List initializeMatchExps(String[] names, String[] exps) { if ( names == null || exps == null ) - throw new ReviewedStingException("BUG: neither names nor exps can be null: names " + Arrays.toString(names) + " exps=" + Arrays.toString(exps) ); + throw new IllegalArgumentException("BUG: neither names nor exps can be null: names " + Arrays.toString(names) + " exps=" + Arrays.toString(exps) ); if ( names.length != exps.length ) - throw new UserException("Inconsistent number of provided filter names and expressions: names=" + Arrays.toString(names) + " exps=" + Arrays.toString(exps)); + throw new IllegalArgumentException("Inconsistent number of provided filter names and expressions: names=" + Arrays.toString(names) + " exps=" + Arrays.toString(exps)); Map map = new HashMap(); for ( int i = 0; i < names.length; i++ ) { map.put(names[i], exps[i]); } @@ -247,7 +243,7 @@ public class VariantContextUtils { Expression exp = engine.createExpression(expStr); exps.add(new JexlVCMatchExp(name, exp)); } catch (Exception e) { - throw new UserException.BadArgumentValue(name, "Invalid expression used (" + expStr + "). Please see the JEXL docs for correct syntax.") ; + throw new IllegalArgumentException("Argument " + name + "has a bad value. Invalid expression used (" + expStr + "). Please see the JEXL docs for correct syntax.") ; } } @@ -409,7 +405,6 @@ public class VariantContextUtils { KEEP_UNCONDITIONAL } - @Hidden public enum MultipleAllelesMergeType { /** * Combine only alleles of the same type (SNP, indel, etc.) into a single VCF record. @@ -426,7 +421,6 @@ public class VariantContextUtils { * If uniquifySamples is true, the priority order is ignored and names are created by concatenating the VC name with * the sample name * - * @param genomeLocParser loc parser * @param unsortedVCs collection of unsorted VCs * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs * @param filteredRecordMergeType merge type for filtered records @@ -438,8 +432,7 @@ public class VariantContextUtils { * @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count? * @return new VariantContext representing the merge of unsortedVCs */ - public static VariantContext simpleMerge(final GenomeLocParser genomeLocParser, - final Collection unsortedVCs, + public static VariantContext simpleMerge(final Collection unsortedVCs, final List priorityListOfVCs, final FilteredRecordMergeType filteredRecordMergeType, final GenotypeMergeType genotypeMergeOptions, @@ -449,7 +442,7 @@ public class VariantContextUtils { final boolean filteredAreUncalled, final boolean mergeInfoWithMaxAC ) { int originalNumOfVCs = priorityListOfVCs == null ? 0 : priorityListOfVCs.size(); - return simpleMerge(genomeLocParser,unsortedVCs,priorityListOfVCs,originalNumOfVCs,filteredRecordMergeType,genotypeMergeOptions,annotateOrigin,printMessages,setKey,filteredAreUncalled,mergeInfoWithMaxAC); + return simpleMerge(unsortedVCs,priorityListOfVCs,originalNumOfVCs,filteredRecordMergeType,genotypeMergeOptions,annotateOrigin,printMessages,setKey,filteredAreUncalled,mergeInfoWithMaxAC); } /** @@ -457,7 +450,6 @@ public class VariantContextUtils { * If uniquifySamples is true, the priority order is ignored and names are created by concatenating the VC name with * the sample name * - * @param genomeLocParser loc parser * @param unsortedVCs collection of unsorted VCs * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs * @param filteredRecordMergeType merge type for filtered records @@ -469,8 +461,7 @@ public class VariantContextUtils { * @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count? * @return new VariantContext representing the merge of unsortedVCs */ - public static VariantContext simpleMerge(final GenomeLocParser genomeLocParser, - final Collection unsortedVCs, + public static VariantContext simpleMerge(final Collection unsortedVCs, final List priorityListOfVCs, final int originalNumOfVCs, final FilteredRecordMergeType filteredRecordMergeType, @@ -517,7 +508,7 @@ public class VariantContextUtils { final Set variantSources = new HashSet(); // contains the set of sources we found in our set of VCs that are variant final Set rsIDs = new LinkedHashSet(1); // most of the time there's one id - GenomeLoc loc = getLocation(genomeLocParser,first); + VariantContext longestVC = first; int depth = 0; int maxAC = -1; final Map attributesWithMaxAC = new LinkedHashMap(); @@ -533,11 +524,11 @@ public class VariantContextUtils { // cycle through and add info from the other VCs, making sure the loc/reference matches for ( final VariantContext vc : VCs ) { - if ( loc.getStart() != vc.getStart() ) - throw new ReviewedStingException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString()); + if ( longestVC.getStart() != vc.getStart() ) + throw new IllegalStateException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString()); - if ( getLocation(genomeLocParser,vc).size() > loc.size() ) - loc = getLocation(genomeLocParser,vc); // get the longest location + if ( getSize(vc) > getSize(longestVC) ) + longestVC = vc; // get the longest location nFiltered += vc.isFiltered() ? 1 : 0; if ( vc.isVariant() ) variantSources.add(vc.getSource()); @@ -612,8 +603,8 @@ public class VariantContextUtils { continue; if ( hasPLIncompatibleAlleles(alleles, vc.alleles)) { if ( ! genotypes.isEmpty() ) - logger.debug(String.format("Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s", - genomeLocParser.createGenomeLoc(vc), alleles, vc.alleles)); + logger.debug(String.format("Stripping PLs at %s:%d-%d due to incompatible alleles merged=%s vs. single=%s", + vc.getChr(), vc.getStart(), vc.getEnd(), alleles, vc.alleles)); genotypes = stripPLsAndAD(genotypes); // this will remove stale AC,AF attributed from vc calculateChromosomeCounts(vc, attributes, true); @@ -661,7 +652,7 @@ public class VariantContextUtils { final String ID = rsIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(",", rsIDs); final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(ID); - builder.loc(loc.getContig(), loc.getStart(), loc.getStop()); + builder.loc(longestVC.getChr(), longestVC.getStart(), longestVC.getEnd()); builder.alleles(alleles); builder.genotypes(genotypes); builder.log10PError(log10PError); @@ -788,7 +779,7 @@ public class VariantContextUtils { for ( String name : vc.getSampleNames() ) { //System.out.printf("Checking %s %b%n", name, names.contains(name)); if ( names.contains(name) ) - throw new UserException("REQUIRE_UNIQUE sample names is true but duplicate names were discovered " + name); + throw new IllegalStateException("REQUIRE_UNIQUE sample names is true but duplicate names were discovered " + name); } names.addAll(vc.getSampleNames()); @@ -804,7 +795,7 @@ public class VariantContextUtils { if ( ref == null || ref.length() < myRef.length() ) ref = myRef; else if ( ref.length() == myRef.length() && ! ref.equals(myRef) ) - throw new UserException.BadInput(String.format("The provided variant file(s) have inconsistent references for the same position(s) at %s:%d, %s vs. %s", vc.getChr(), vc.getStart(), ref, myRef)); + throw new TribbleException(String.format("The provided variant file(s) have inconsistent references for the same position(s) at %s:%d, %s vs. %s", vc.getChr(), vc.getStart(), ref, myRef)); } return ref; @@ -826,7 +817,7 @@ public class VariantContextUtils { // Allele myRef = vc.getReference(); - if ( refAllele.length() <= myRef.length() ) throw new ReviewedStingException("BUG: myRef="+myRef+" is longer than refAllele="+refAllele); + if ( refAllele.length() <= myRef.length() ) throw new IllegalStateException("BUG: myRef="+myRef+" is longer than refAllele="+refAllele); byte[] extraBases = Arrays.copyOfRange(refAllele.getBases(), myRef.length(), refAllele.length()); // System.out.printf("Remapping allele at %s%n", vc); @@ -863,7 +854,7 @@ public class VariantContextUtils { private int getIndex(VariantContext vc) { int i = priorityListOfVCs.indexOf(vc.getSource()); - if ( i == -1 ) throw new UserException.BadArgumentValue(Utils.join(",", priorityListOfVCs), "Priority list " + priorityListOfVCs + " doesn't contain variant context " + vc.getSource()); + if ( i == -1 ) throw new IllegalArgumentException("Priority list " + priorityListOfVCs + " doesn't contain variant context " + vc.getSource()); return i; } @@ -985,14 +976,8 @@ public class VariantContextUtils { return BaseUtils.SNPSubstitutionType(ref.getBases()[0], alt.getBases()[0]) == BaseUtils.BaseSubstitutionType.TRANSVERSION; } - /** - * create a genome location, given a variant context - * @param genomeLocParser parser - * @param vc the variant context - * @return the genomeLoc - */ - public static final GenomeLoc getLocation(GenomeLocParser genomeLocParser,VariantContext vc) { - return genomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd(), true); + public static int getSize( VariantContext vc ) { + return vc.getEnd() - vc.getStart() + 1; } public static final Set genotypeNames(final Collection genotypes) { @@ -1002,354 +987,6 @@ public class VariantContextUtils { return names; } - /** - * Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs - * - * @param vc variant context with genotype likelihoods - * @return genotypes context - */ - public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) { - return subsetDiploidAlleles(vc, vc.getAlleles(), true); - } - - private static final List NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); - public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. - - /** - * Split variant context into its biallelic components if there are more than 2 alleles - * - * For VC has A/B/C alleles, returns A/B and A/C contexts. - * Genotypes are all no-calls now (it's not possible to fix them easily) - * Alleles are right trimmed to satisfy VCF conventions - * - * If vc is biallelic or non-variant it is just returned - * - * Chromosome counts are updated (but they are by definition 0) - * - * @param vc a potentially multi-allelic variant context - * @return a list of bi-allelic (or monomorphic) variant context - */ - public static List splitVariantContextToBiallelics(final VariantContext vc) { - if ( ! vc.isVariant() || vc.isBiallelic() ) - // non variant or biallelics already satisfy the contract - return Collections.singletonList(vc); - else { - final List biallelics = new LinkedList(); - - for ( final Allele alt : vc.getAlternateAlleles() ) { - VariantContextBuilder builder = new VariantContextBuilder(vc); - final List alleles = Arrays.asList(vc.getReference(), alt); - builder.alleles(alleles); - builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false)); - calculateChromosomeCounts(builder, true); - biallelics.add(reverseTrimAlleles(builder.make())); - } - - return biallelics; - } - } - - /** - * subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately) - * - * @param vc variant context with genotype likelihoods - * @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC *** - * @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs - * @return genotypes - */ - public static GenotypesContext subsetDiploidAlleles(final VariantContext vc, - final List allelesToUse, - final boolean assignGenotypes) { - - // the genotypes with PLs - final GenotypesContext oldGTs = vc.getGenotypes(); - - // samples - final List sampleIndices = oldGTs.getSampleNamesOrderedByName(); - - // the new genotypes to create - final GenotypesContext newGTs = GenotypesContext.create(); - - // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward - final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); - final int numNewAltAlleles = allelesToUse.size() - 1; - - // which PLs should be carried forward? - ArrayList likelihoodIndexesToUse = null; - - // an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles, - // then we can keep the PLs as is; otherwise, we determine which ones to keep - if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) { - likelihoodIndexesToUse = new ArrayList(30); - - final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles]; - for ( int i = 0; i < numOriginalAltAlleles; i++ ) { - if ( allelesToUse.contains(vc.getAlternateAllele(i)) ) - altAlleleIndexToUse[i] = true; - } - - // numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2 - final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + numOriginalAltAlleles, DEFAULT_PLOIDY); - for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) { - final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex); - // consider this entry only if both of the alleles are good - if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) ) - likelihoodIndexesToUse.add(PLindex); - } - } - - // create the new genotypes - for ( int k = 0; k < oldGTs.size(); k++ ) { - final Genotype g = oldGTs.get(sampleIndices.get(k)); - if ( !g.hasLikelihoods() ) { - newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); - continue; - } - - // create the new likelihoods array from the alleles we are allowed to use - final double[] originalLikelihoods = g.getLikelihoods().getAsVector(); - double[] newLikelihoods; - if ( likelihoodIndexesToUse == null ) { - newLikelihoods = originalLikelihoods; - } else { - newLikelihoods = new double[likelihoodIndexesToUse.size()]; - int newIndex = 0; - for ( int oldIndex : likelihoodIndexesToUse ) - newLikelihoods[newIndex++] = originalLikelihoods[oldIndex]; - - // might need to re-normalize - newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true); - } - - // if there is no mass on the (new) likelihoods, then just no-call the sample - if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) { - newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); - } - else { - final GenotypeBuilder gb = new GenotypeBuilder(g); - - if ( numNewAltAlleles == 0 ) - gb.noPL(); - else - gb.PL(newLikelihoods); - - // if we weren't asked to assign a genotype, then just no-call the sample - if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) { - gb.alleles(NO_CALL_ALLELES); - } - else { - // find the genotype with maximum likelihoods - int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods); - GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex); - - gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2))); - if ( numNewAltAlleles != 0 ) gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods)); - } - newGTs.add(gb.make()); - } - } - - return newGTs; - } - - /** - * Returns true iff VC is an non-complex indel where every allele represents an expansion or - * contraction of a series of identical bases in the reference. - * - * For example, suppose the ref bases are CTCTCTGA, which includes a 3x repeat of CTCTCT - * - * If VC = -/CT, then this function returns true because the CT insertion matches exactly the - * upcoming reference. - * If VC = -/CTA then this function returns false because the CTA isn't a perfect match - * - * Now consider deletions: - * - * If VC = CT/- then again the same logic applies and this returns true - * The case of CTA/- makes no sense because it doesn't actually match the reference bases. - * - * The logic of this function is pretty simple. Take all of the non-null alleles in VC. For - * each insertion allele of n bases, check if that allele matches the next n reference bases. - * For each deletion allele of n bases, check if this matches the reference bases at n - 2 n, - * as it must necessarily match the first n bases. If this test returns true for all - * alleles you are a tandem repeat, otherwise you are not. - * - * @param vc - * @param refBasesStartingAtVCWithPad not this is assumed to include the PADDED reference - * @return - */ - @Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"}) - public static boolean isTandemRepeat(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) { - final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1); - if ( ! vc.isIndel() ) // only indels are tandem repeats - return false; - - final Allele ref = vc.getReference(); - - for ( final Allele allele : vc.getAlternateAlleles() ) { - if ( ! isRepeatAllele(ref, allele, refBasesStartingAtVCWithoutPad) ) - return false; - } - - // we've passed all of the tests, so we are a repeat - return true; - } - - /** - * - * @param vc - * @param refBasesStartingAtVCWithPad - * @return - */ - @Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"}) - public static Pair,byte[]> getNumTandemRepeatUnits(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) { - final boolean VERBOSE = false; - final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1); - if ( ! vc.isIndel() ) // only indels are tandem repeats - return null; - - final Allele refAllele = vc.getReference(); - final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length()); - - byte[] repeatUnit = null; - final ArrayList lengths = new ArrayList(); - - for ( final Allele allele : vc.getAlternateAlleles() ) { - Pair result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes()); - - final int[] repetitionCount = result.first; - // repetition count = 0 means allele is not a tandem expansion of context - if (repetitionCount[0] == 0 || repetitionCount[1] == 0) - return null; - - if (lengths.size() == 0) { - lengths.add(repetitionCount[0]); // add ref allele length only once - } - lengths.add(repetitionCount[1]); // add this alt allele's length - - repeatUnit = result.second; - if (VERBOSE) { - System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad); - System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0])); - System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1])); - System.out.println("RU:"+new String(repeatUnit)); - } - } - - return new Pair, byte[]>(lengths,repeatUnit); - } - - protected static Pair getNumTandemRepeatUnits(final byte[] refBases, final byte[] altBases, final byte[] remainingRefContext) { - /* we can't exactly apply same logic as in basesAreRepeated() to compute tandem unit and number of repeated units. - Consider case where ref =ATATAT and we have an insertion of ATAT. Natural description is (AT)3 -> (AT)5. - */ - - byte[] longB; - // find first repeat unit based on either ref or alt, whichever is longer - if (altBases.length > refBases.length) - longB = altBases; - else - longB = refBases; - - // see if non-null allele (either ref or alt, whichever is longer) can be decomposed into several identical tandem units - // for example, -*,CACA needs to first be decomposed into (CA)2 - final int repeatUnitLength = findRepeatedSubstring(longB); - final byte[] repeatUnit = Arrays.copyOf(longB, repeatUnitLength); - - final int[] repetitionCount = new int[2]; -// repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext)); -// repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext)); - int repetitionsInRef = findNumberofRepetitions(repeatUnit,refBases); - repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext))-repetitionsInRef; - repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext))-repetitionsInRef; - - return new Pair(repetitionCount, repeatUnit); - - } - - /** - * Find out if a string can be represented as a tandem number of substrings. - * For example ACTACT is a 2-tandem of ACT, - * but ACTACA is not. - * - * @param bases String to be tested - * @return Length of repeat unit, if string can be represented as tandem of substring (if it can't - * be represented as one, it will be just the length of the input string) - */ - protected static int findRepeatedSubstring(byte[] bases) { - - int repLength; - for (repLength=1; repLength <=bases.length; repLength++) { - final byte[] candidateRepeatUnit = Arrays.copyOf(bases,repLength); - boolean allBasesMatch = true; - for (int start = repLength; start < bases.length; start += repLength ) { - // check that remaining of string is exactly equal to repeat unit - final byte[] basePiece = Arrays.copyOfRange(bases,start,start+candidateRepeatUnit.length); - if (!Arrays.equals(candidateRepeatUnit, basePiece)) { - allBasesMatch = false; - break; - } - } - if (allBasesMatch) - return repLength; - } - - return repLength; - } - - /** - * Helper routine that finds number of repetitions a string consists of. - * For example, for string ATAT and repeat unit AT, number of repetitions = 2 - * @param repeatUnit Substring - * @param testString String to test - * @return Number of repetitions (0 if testString is not a concatenation of n repeatUnit's - */ - public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) { - int numRepeats = 0; - for (int start = 0; start < testString.length; start += repeatUnit.length) { - int end = start + repeatUnit.length; - byte[] unit = Arrays.copyOfRange(testString,start, end); - if(Arrays.equals(unit,repeatUnit)) - numRepeats++; - else - return numRepeats; - } - return numRepeats; - } - - /** - * Helper function for isTandemRepeat that checks that allele matches somewhere on the reference - * @param ref - * @param alt - * @param refBasesStartingAtVCWithoutPad - * @return - */ - protected static boolean isRepeatAllele(final Allele ref, final Allele alt, final String refBasesStartingAtVCWithoutPad) { - if ( ! Allele.oneIsPrefixOfOther(ref, alt) ) - return false; // we require one allele be a prefix of another - - if ( ref.length() > alt.length() ) { // we are a deletion - return basesAreRepeated(ref.getBaseString(), alt.getBaseString(), refBasesStartingAtVCWithoutPad, 2); - } else { // we are an insertion - return basesAreRepeated(alt.getBaseString(), ref.getBaseString(), refBasesStartingAtVCWithoutPad, 1); - } - } - - protected static boolean basesAreRepeated(final String l, final String s, final String ref, final int minNumberOfMatches) { - final String potentialRepeat = l.substring(s.length()); // skip s bases - - for ( int i = 0; i < minNumberOfMatches; i++) { - final int start = i * potentialRepeat.length(); - final int end = (i+1) * potentialRepeat.length(); - if ( ref.length() < end ) - return false; // we ran out of bases to test - final String refSub = ref.substring(start, end); - if ( ! refSub.equals(potentialRepeat) ) - return false; // repeat didn't match, fail - } - - return true; // we passed all tests, we matched - } - /** * Compute the end position for this VariantContext from the alleles themselves * @@ -1368,11 +1005,11 @@ public class VariantContextUtils { final Allele ref = alleles.get(0); if ( ref.isNonReference() ) - throw new ReviewedStingException("computeEndFromAlleles requires first allele to be reference"); + throw new IllegalStateException("computeEndFromAlleles requires first allele to be reference"); if ( VariantContext.hasSymbolicAlleles(alleles) ) { if ( endForSymbolicAlleles == -1 ) - throw new ReviewedStingException("computeEndFromAlleles found a symbolic allele but endForSymbolicAlleles was provided"); + throw new IllegalStateException("computeEndFromAlleles found a symbolic allele but endForSymbolicAlleles was provided"); return endForSymbolicAlleles; } else { return start + Math.max(ref.length() - 1, 0); @@ -1408,80 +1045,4 @@ public class VariantContextUtils { } } - public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) { - - // see whether we need to trim common reference base from all alleles - final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false); - if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 ) - return inputVC; - - final List alleles = new ArrayList(); - final GenotypesContext genotypes = GenotypesContext.create(); - final Map originalToTrimmedAlleleMap = new HashMap(); - - for (final Allele a : inputVC.getAlleles()) { - if (a.isSymbolic()) { - alleles.add(a); - originalToTrimmedAlleleMap.put(a, a); - } else { - // get bases for current allele and create a new one with trimmed bases - final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent); - final Allele trimmedAllele = Allele.create(newBases, a.isReference()); - alleles.add(trimmedAllele); - originalToTrimmedAlleleMap.put(a, trimmedAllele); - } - } - - // now we can recreate new genotypes with trimmed alleles - for ( final Genotype genotype : inputVC.getGenotypes() ) { - final List originalAlleles = genotype.getAlleles(); - final List trimmedAlleles = new ArrayList(); - for ( final Allele a : originalAlleles ) { - if ( a.isCalled() ) - trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); - else - trimmedAlleles.add(Allele.NO_CALL); - } - genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make()); - } - - return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make(); - } - - public static int computeReverseClipping(final List unclippedAlleles, - final byte[] ref, - final int forwardClipping, - final boolean allowFullClip) { - int clipping = 0; - boolean stillClipping = true; - - while ( stillClipping ) { - for ( final Allele a : unclippedAlleles ) { - if ( a.isSymbolic() ) - continue; - - // we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong - // position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine). - if ( a.length() - clipping == 0 ) - return clipping - (allowFullClip ? 0 : 1); - - if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) { - stillClipping = false; - } - else if ( ref.length == clipping ) { - if ( allowFullClip ) - stillClipping = false; - else - return -1; - } - else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) { - stillClipping = false; - } - } - if ( stillClipping ) - clipping++; - } - - return clipping; - } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java b/public/java/src/org/broadinstitute/variant/variantcontext/VariantJEXLContext.java similarity index 97% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java rename to public/java/src/org/broadinstitute/variant/variantcontext/VariantJEXLContext.java index abe85e383..96643b9e9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/VariantJEXLContext.java @@ -21,13 +21,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; import org.apache.commons.jexl2.JexlContext; import org.apache.commons.jexl2.MapContext; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.variant.utils.Utils; +import org.broadinstitute.variant.vcf.VCFConstants; import java.util.Collection; import java.util.HashMap; @@ -275,7 +274,7 @@ class JEXLMap implements Map { if (e.getMessage().contains("undefined variable")) jexl.put(exp,false); else - throw new UserException.CommandLineException(String.format("Invalid JEXL expression detected for %s with message %s", exp.name, e.getMessage())); + throw new IllegalArgumentException(String.format("Invalid JEXL expression detected for %s with message %s", exp.name, e.getMessage())); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java b/public/java/src/org/broadinstitute/variant/variantcontext/writer/BCF2Encoder.java similarity index 93% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java rename to public/java/src/org/broadinstitute/variant/variantcontext/writer/BCF2Encoder.java index 22acc4787..f71f4e577 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/writer/BCF2Encoder.java @@ -22,13 +22,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext.writer; +package org.broadinstitute.variant.variantcontext.writer; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type; -import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.variant.bcf2.BCF2Type; +import org.broadinstitute.variant.bcf2.BCF2Utils; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -80,7 +79,7 @@ public final class BCF2Encoder { case INT32: encodeTypedInt((Integer)value, type); break; case FLOAT: encodeTypedFloat((Double) value); break; case CHAR: encodeTypedString((String) value); break; - default: throw new ReviewedStingException("Illegal type encountered " + type); + default: throw new IllegalArgumentException("Illegal type encountered " + type); } } } @@ -155,7 +154,7 @@ public final class BCF2Encoder { case INT32: encodeRawBytes((Integer) value, type); break; case FLOAT: encodeRawFloat((Double) value); break; case CHAR: encodeRawChar((Byte) value); break; - default: throw new ReviewedStingException("Illegal type encountered " + type); + default: throw new IllegalArgumentException("Illegal type encountered " + type); } } } catch ( ClassCastException e ) { @@ -238,7 +237,7 @@ public final class BCF2Encoder { */ @Requires("o != null") public final BCF2Type encode(final Object o) throws IOException { - if ( o == null ) throw new ReviewedStingException("Generic encode cannot deal with null values"); + if ( o == null ) throw new IllegalArgumentException("Generic encode cannot deal with null values"); if ( o instanceof List ) { final BCF2Type type = determineBCFType(((List) o).get(0)); @@ -262,7 +261,7 @@ public final class BCF2Encoder { else if ( toType instanceof Double ) return BCF2Type.FLOAT; else - throw new ReviewedStingException("No native encoding for Object of type " + arg.getClass().getSimpleName()); + throw new IllegalArgumentException("No native encoding for Object of type " + arg.getClass().getSimpleName()); } private final List stringToBytes(final String v) throws IOException { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java b/public/java/src/org/broadinstitute/variant/variantcontext/writer/BCF2FieldEncoder.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java rename to public/java/src/org/broadinstitute/variant/variantcontext/writer/BCF2FieldEncoder.java index a91eb216d..5f3e66c2a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/writer/BCF2FieldEncoder.java @@ -22,17 +22,16 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext.writer; +package org.broadinstitute.variant.variantcontext.writer; import com.google.java.contract.Ensures; import com.google.java.contract.Invariant; import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type; -import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCompoundHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.bcf2.BCF2Type; +import org.broadinstitute.variant.bcf2.BCF2Utils; +import org.broadinstitute.variant.vcf.VCFCompoundHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderLineCount; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.IOException; import java.util.Collections; @@ -85,7 +84,7 @@ public abstract class BCF2FieldEncoder { this.staticType = staticType; final Integer offset = dict.get(getField()); - if ( offset == null ) throw new ReviewedStingException("Format error: could not find string " + getField() + " in header as required by BCF"); + if ( offset == null ) throw new IllegalStateException("Format error: could not find string " + getField() + " in header as required by BCF"); this.dictionaryOffset = offset; dictionaryOffsetType = BCF2Utils.determineIntegerType(offset); } @@ -260,7 +259,7 @@ public abstract class BCF2FieldEncoder { @Requires("isDynamicallyTyped()") @Ensures("result != null") public BCF2Type getDynamicType(final Object value) { - throw new ReviewedStingException("BUG: cannot get dynamic type for statically typed BCF2 field " + getField()); + throw new IllegalStateException("BUG: cannot get dynamic type for statically typed BCF2 field " + getField()); } // ---------------------------------------------------------------------- @@ -351,7 +350,7 @@ public abstract class BCF2FieldEncoder { public Flag(final VCFCompoundHeaderLine headerLine, final Map dict ) { super(headerLine, dict, BCF2Type.INT8); if ( ! headerLine.isFixedCount() || headerLine.getCount() != 0 ) - throw new ReviewedStingException("Flag encoder only suppports atomic flags for field " + getField()); + throw new IllegalStateException("Flag encoder only supports atomic flags for field " + getField()); } @Override diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java b/public/java/src/org/broadinstitute/variant/variantcontext/writer/BCF2FieldWriter.java similarity index 94% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java rename to public/java/src/org/broadinstitute/variant/variantcontext/writer/BCF2FieldWriter.java index 61c0129bb..c75242e8b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/writer/BCF2FieldWriter.java @@ -22,17 +22,16 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext.writer; +package org.broadinstitute.variant.variantcontext.writer; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type; -import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.bcf2.BCF2Type; +import org.broadinstitute.variant.bcf2.BCF2Utils; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.IOException; import java.util.ArrayList; @@ -208,7 +207,7 @@ public abstract class BCF2FieldWriter { this.ige = ige; if ( ! (fieldEncoder instanceof BCF2FieldEncoder.IntArray) ) - throw new ReviewedStingException("BUG: IntGenotypesWriter requires IntArray encoder for field " + getField()); + throw new IllegalArgumentException("BUG: IntGenotypesWriter requires IntArray encoder for field " + getField()); } @Override @@ -266,7 +265,7 @@ public abstract class BCF2FieldWriter { @Override public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException { if ( vc.getNAlleles() > BCF2Utils.MAX_ALLELES_IN_GENOTYPES ) - throw new ReviewedStingException("Current BCF2 encoder cannot handle sites " + + throw new IllegalStateException("Current BCF2 encoder cannot handle sites " + "with > " + BCF2Utils.MAX_ALLELES_IN_GENOTYPES + " alleles, but you have " + vc.getNAlleles() + " at " + vc.getChr() + ":" + vc.getStart()); @@ -312,7 +311,7 @@ public abstract class BCF2FieldWriter { else if ( a == Allele.NO_CALL ) return -1; else { final Integer o = alleleMapForTriPlus.get(a); - if ( o == null ) throw new ReviewedStingException("BUG: Couldn't find allele offset for allele " + a); + if ( o == null ) throw new IllegalStateException("BUG: Couldn't find allele offset for allele " + a); return o; } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java b/public/java/src/org/broadinstitute/variant/variantcontext/writer/BCF2FieldWriterManager.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java rename to public/java/src/org/broadinstitute/variant/variantcontext/writer/BCF2FieldWriterManager.java index 9c63a69e7..e8d58f2f2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/writer/BCF2FieldWriterManager.java @@ -22,13 +22,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext.writer; +package org.broadinstitute.variant.variantcontext.writer; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.variant.vcf.*; import java.util.HashMap; import java.util.Map; @@ -74,7 +73,7 @@ public class BCF2FieldWriterManager { @Ensures("map.containsKey(field)") private final void add(final Map map, final String field, final T writer) { if ( map.containsKey(field) ) - throw new ReviewedStingException("BUG: field " + field + " already seen in VCFHeader while building BCF2 field encoders"); + throw new IllegalStateException("BUG: field " + field + " already seen in VCFHeader while building BCF2 field encoders"); map.put(field, writer); } @@ -118,7 +117,7 @@ public class BCF2FieldWriterManager { else return new BCF2FieldEncoder.GenericInts(line, dict); default: - throw new ReviewedStingException("Unexpected type for field " + line.getID()); + throw new IllegalArgumentException("Unexpected type for field " + line.getID()); } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/variant/variantcontext/writer/BCF2Writer.java similarity index 91% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java rename to public/java/src/org/broadinstitute/variant/variantcontext/writer/BCF2Writer.java index 536f07f90..3e37ade25 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/writer/BCF2Writer.java @@ -22,23 +22,21 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext.writer; +package org.broadinstitute.variant.variantcontext.writer; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import net.sf.samtools.SAMSequenceDictionary; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec; -import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type; -import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils; -import org.broadinstitute.sting.utils.codecs.bcf2.BCFVersion; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFContigHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.bcf2.BCF2Codec; +import org.broadinstitute.variant.bcf2.BCF2Type; +import org.broadinstitute.variant.bcf2.BCF2Utils; +import org.broadinstitute.variant.bcf2.BCFVersion; +import org.broadinstitute.variant.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFContigHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.vcf.VCFUtils; import java.io.*; import java.util.*; @@ -133,7 +131,7 @@ class BCF2Writer extends IndexingVariantContextWriter { logger.warn("No contig dictionary found in header, falling back to reference sequence dictionary"); createContigDictionary(VCFUtils.makeContigHeaderLines(getRefDict(), null)); } else { - throw new UserException.MalformedBCF2("Cannot write BCF2 file with missing contig lines"); + throw new IllegalStateException("Cannot write BCF2 file with missing contig lines"); } } else { createContigDictionary(header.getContigLines()); @@ -163,7 +161,7 @@ class BCF2Writer extends IndexingVariantContextWriter { BCF2Type.INT32.write(headerBytes.length, outputStream); outputStream.write(headerBytes); } catch (IOException e) { - throw new UserException.CouldNotCreateOutputFile("BCF2 stream", "Got IOException while trying to write BCF2 header", e); + throw new RuntimeException("BCF2 stream: Got IOException while trying to write BCF2 header", e); } } @@ -183,7 +181,7 @@ class BCF2Writer extends IndexingVariantContextWriter { writeBlock(infoBlock, genotypesBlock); } catch ( IOException e ) { - throw new UserException("Error writing record to BCF2 file: " + vc.toString(), e); + throw new RuntimeException("Error writing record to BCF2 file: " + vc.toString(), e); } } @@ -194,7 +192,7 @@ class BCF2Writer extends IndexingVariantContextWriter { outputStream.close(); } catch ( IOException e ) { - throw new UserException("Failed to close BCF2 file"); + throw new RuntimeException("Failed to close BCF2 file"); } super.close(); } @@ -214,7 +212,7 @@ class BCF2Writer extends IndexingVariantContextWriter { private byte[] buildSitesData( VariantContext vc ) throws IOException { final int contigIndex = contigDictionary.get(vc.getChr()); if ( contigIndex == -1 ) - throw new UserException(String.format("Contig %s not found in sequence dictionary from reference", vc.getChr())); + throw new IllegalStateException(String.format("Contig %s not found in sequence dictionary from reference", vc.getChr())); // note use of encodeRawValue to not insert the typing byte encoder.encodeRawValue(contigIndex, BCF2Type.INT32); @@ -309,7 +307,7 @@ class BCF2Writer extends IndexingVariantContextWriter { for ( Allele allele : vc.getAlleles() ) { final byte[] s = allele.getDisplayBases(); if ( s == null ) - throw new ReviewedStingException("BUG: BCF2Writer encountered null padded allele" + allele); + throw new IllegalStateException("BUG: BCF2Writer encountered null padded allele" + allele); encoder.encodeTypedString(s); } } @@ -370,7 +368,7 @@ class BCF2Writer extends IndexingVariantContextWriter { * @param fieldType */ private final void errorUnexpectedFieldToWrite(final VariantContext vc, final String field, final String fieldType) { - throw new UserException("Found field " + field + " in the " + fieldType + " fields of VariantContext at " + + throw new IllegalStateException("Found field " + field + " in the " + fieldType + " fields of VariantContext at " + vc.getChr() + ":" + vc.getStart() + " from " + vc.getSource() + " but this hasn't been defined in the VCFHeader"); } @@ -403,7 +401,7 @@ class BCF2Writer extends IndexingVariantContextWriter { // iterate over strings until we find one that needs 16 bits, and break for ( final String string : strings ) { final Integer got = stringDictionaryMap.get(string); - if ( got == null ) throw new ReviewedStingException("Format error: could not find string " + string + " in header as required by BCF"); + if ( got == null ) throw new IllegalStateException("Format error: could not find string " + string + " in header as required by BCF"); final int offset = got; offsets.add(offset); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/IndexingVariantContextWriter.java b/public/java/src/org/broadinstitute/variant/variantcontext/writer/IndexingVariantContextWriter.java similarity index 81% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/IndexingVariantContextWriter.java rename to public/java/src/org/broadinstitute/variant/variantcontext/writer/IndexingVariantContextWriter.java index df0feda8b..6311d9e9a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/IndexingVariantContextWriter.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/writer/IndexingVariantContextWriter.java @@ -22,21 +22,19 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext.writer; +package org.broadinstitute.variant.variantcontext.writer; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import net.sf.samtools.SAMSequenceDictionary; +import net.sf.samtools.SAMSequenceRecord; import org.broad.tribble.Tribble; import org.broad.tribble.index.DynamicIndexCreator; import org.broad.tribble.index.Index; import org.broad.tribble.index.IndexFactory; import org.broad.tribble.util.LittleEndianOutputStream; -import org.broadinstitute.sting.gatk.refdata.tracks.IndexDictionaryUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.*; @@ -93,16 +91,19 @@ abstract class IndexingVariantContextWriter implements VariantContextWriter { * attempt to close the VCF file */ public void close() { - // try to close the index stream (keep it separate to help debugging efforts) - if ( indexer != null ) { - try { + try { + // try to close the index stream (keep it separate to help debugging efforts) + if ( indexer != null ) { Index index = indexer.finalizeIndex(positionalOutputStream.getPosition()); - IndexDictionaryUtils.setIndexSequenceDictionary(index, refDict); + setIndexSequenceDictionary(index, refDict); index.write(idxStream); idxStream.close(); - } catch (IOException e) { - throw new ReviewedStingException("Unable to close index for " + getStreamName(), e); } + + // close the underlying output stream as well + outputStream.close(); + } catch (IOException e) { + throw new RuntimeException("Unable to close index for " + getStreamName(), e); } } @@ -134,6 +135,17 @@ abstract class IndexingVariantContextWriter implements VariantContextWriter { protected static final String writerName(final File location, final OutputStream stream) { return location == null ? stream.toString() : location.getAbsolutePath(); } + + // a constant we use for marking sequence dictionary entries in the Tribble index property list + private static final String SequenceDictionaryPropertyPredicate = "DICT:"; + + private static void setIndexSequenceDictionary(Index index, SAMSequenceDictionary dict) { + for ( SAMSequenceRecord seq : dict.getSequences() ) { + final String contig = SequenceDictionaryPropertyPredicate + seq.getSequenceName(); + final String length = String.valueOf(seq.getSequenceLength()); + index.addProperty(contig,length); + } + } } final class PositionalOutputStream extends OutputStream { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/IntGenotypeFieldAccessors.java b/public/java/src/org/broadinstitute/variant/variantcontext/writer/IntGenotypeFieldAccessors.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/IntGenotypeFieldAccessors.java rename to public/java/src/org/broadinstitute/variant/variantcontext/writer/IntGenotypeFieldAccessors.java index fc117e5e1..e1d07d6f7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/IntGenotypeFieldAccessors.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/writer/IntGenotypeFieldAccessors.java @@ -22,10 +22,10 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext.writer; +package org.broadinstitute.variant.variantcontext.writer; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.variant.vcf.VCFConstants; +import org.broadinstitute.variant.variantcontext.Genotype; import java.util.HashMap; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/Options.java b/public/java/src/org/broadinstitute/variant/variantcontext/writer/Options.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/Options.java rename to public/java/src/org/broadinstitute/variant/variantcontext/writer/Options.java index 3a5cb23b0..7aa7fb123 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/Options.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/writer/Options.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext.writer; +package org.broadinstitute.variant.variantcontext.writer; /** * Available writer options for VariantContextWriters diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriter.java b/public/java/src/org/broadinstitute/variant/variantcontext/writer/SortingVariantContextWriter.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriter.java rename to public/java/src/org/broadinstitute/variant/variantcontext/writer/SortingVariantContextWriter.java index b51892b31..4de130514 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriter.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/writer/SortingVariantContextWriter.java @@ -23,9 +23,9 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext.writer; +package org.broadinstitute.variant.variantcontext.writer; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; /** * this class writes VCF files, allowing records to be passed in unsorted (up to a certain genomic distance away) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriterBase.java b/public/java/src/org/broadinstitute/variant/variantcontext/writer/SortingVariantContextWriterBase.java similarity index 97% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriterBase.java rename to public/java/src/org/broadinstitute/variant/variantcontext/writer/SortingVariantContextWriterBase.java index 1f3cdd0fe..413afa0fd 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/SortingVariantContextWriterBase.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/writer/SortingVariantContextWriterBase.java @@ -23,10 +23,10 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext.writer; +package org.broadinstitute.variant.variantcontext.writer; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.*; import java.util.concurrent.PriorityBlockingQueue; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java b/public/java/src/org/broadinstitute/variant/variantcontext/writer/VCFWriter.java similarity index 81% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java rename to public/java/src/org/broadinstitute/variant/variantcontext/writer/VCFWriter.java index 974e50ced..175434853 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/writer/VCFWriter.java @@ -22,18 +22,17 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext.writer; +package org.broadinstitute.variant.variantcontext.writer; import net.sf.samtools.SAMSequenceDictionary; import org.broad.tribble.TribbleException; import org.broad.tribble.util.ParsingUtils; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.vcf.*; +import org.broadinstitute.variant.variantcontext.*; import java.io.*; import java.lang.reflect.Array; +import java.nio.charset.Charset; import java.util.*; /** @@ -42,9 +41,6 @@ import java.util.*; class VCFWriter extends IndexingVariantContextWriter { private final static String VERSION_LINE = VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_1.getFormatString() + "=" + VCFHeaderVersion.VCF4_1.getVersionString(); - // the print stream we're writing to - final protected BufferedWriter mWriter; - // should we write genotypes or just sites? final protected boolean doNotWriteGenotypes; @@ -53,15 +49,33 @@ class VCFWriter extends IndexingVariantContextWriter { final private boolean allowMissingFieldsInHeader; + /** + * The VCF writer uses an internal Writer, based by the ByteArrayOutputStream lineBuffer, + * to temp. buffer the header and per-site output before flushing the per line output + * in one go to the super.getOutputStream. This results in high-performance, proper encoding, + * and allows us to avoid flushing explicitly the output stream getOutputStream, which + * allows us to properly compress vcfs in gz format without breaking indexing on the fly + * for uncompressed streams. + */ + private static final int INITIAL_BUFFER_SIZE = 1024 * 16; + private final ByteArrayOutputStream lineBuffer = new ByteArrayOutputStream(INITIAL_BUFFER_SIZE); + private final Writer writer; + + /** + * The encoding used for VCF files. ISO-8859-1 + */ + final private Charset charset; + private IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors(); public VCFWriter(final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes, final boolean allowMissingFieldsInHeader ) { super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing); - mWriter = new BufferedWriter(new OutputStreamWriter(getOutputStream())); // todo -- fix buffer size this.doNotWriteGenotypes = doNotWriteGenotypes; this.allowMissingFieldsInHeader = allowMissingFieldsInHeader; + this.charset = Charset.forName("ISO-8859-1"); + this.writer = new OutputStreamWriter(lineBuffer, charset); } // -------------------------------------------------------------------------------- @@ -70,14 +84,44 @@ class VCFWriter extends IndexingVariantContextWriter { // // -------------------------------------------------------------------------------- + /** + * Write String s to the internal buffered writer. + * + * flushBuffer() must be called to actually write the data to the true output stream. + * + * @param s the string to write + * @throws IOException + */ + private void write(final String s) throws IOException { + writer.write(s); + } + + /** + * Actually write the line buffer contents to the destination output stream. + * + * After calling this function the line buffer is reset, so the contents of the buffer can be reused + * + * @throws IOException + */ + private void flushBuffer() throws IOException { + writer.flush(); + getOutputStream().write(lineBuffer.toByteArray()); + lineBuffer.reset(); + } + @Override public void writeHeader(VCFHeader header) { // note we need to update the mHeader object after this call because they header // may have genotypes trimmed out of it, if doNotWriteGenotypes is true - mHeader = writeHeader(header, mWriter, doNotWriteGenotypes, getVersionLine(), getStreamName()); + try { + mHeader = writeHeader(header, writer, doNotWriteGenotypes, getVersionLine(), getStreamName()); + flushBuffer(); + } catch ( IOException e ) { + throw new RuntimeException("Couldn't write file " + getStreamName(), e); + } } - public static final String getVersionLine() { + public static String getVersionLine() { return VERSION_LINE; } @@ -125,7 +169,7 @@ class VCFWriter extends IndexingVariantContextWriter { writer.flush(); // necessary so that writing to an output stream will work } catch (IOException e) { - throw new ReviewedStingException("IOException writing the VCF header to " + streamNameForError, e); + throw new RuntimeException("IOException writing the VCF header to " + streamNameForError, e); } return header; @@ -138,10 +182,10 @@ class VCFWriter extends IndexingVariantContextWriter { public void close() { // try to close the vcf stream try { - mWriter.flush(); - mWriter.close(); + // TODO -- would it be useful to null out the line buffer so we don't have it around unnecessarily? + writer.close(); } catch (IOException e) { - throw new ReviewedStingException("Unable to close " + getStreamName(), e); + throw new RuntimeException("Unable to close " + getStreamName(), e); } super.close(); @@ -166,51 +210,51 @@ class VCFWriter extends IndexingVariantContextWriter { Map alleleMap = buildAlleleMap(vc); // CHROM - mWriter.write(vc.getChr()); - mWriter.write(VCFConstants.FIELD_SEPARATOR); + write(vc.getChr()); + write(VCFConstants.FIELD_SEPARATOR); // POS - mWriter.write(String.valueOf(vc.getStart())); - mWriter.write(VCFConstants.FIELD_SEPARATOR); + write(String.valueOf(vc.getStart())); + write(VCFConstants.FIELD_SEPARATOR); // ID String ID = vc.getID(); - mWriter.write(ID); - mWriter.write(VCFConstants.FIELD_SEPARATOR); + write(ID); + write(VCFConstants.FIELD_SEPARATOR); // REF String refString = vc.getReference().getDisplayString(); - mWriter.write(refString); - mWriter.write(VCFConstants.FIELD_SEPARATOR); + write(refString); + write(VCFConstants.FIELD_SEPARATOR); // ALT if ( vc.isVariant() ) { Allele altAllele = vc.getAlternateAllele(0); String alt = altAllele.getDisplayString(); - mWriter.write(alt); + write(alt); for (int i = 1; i < vc.getAlternateAlleles().size(); i++) { altAllele = vc.getAlternateAllele(i); alt = altAllele.getDisplayString(); - mWriter.write(","); - mWriter.write(alt); + write(","); + write(alt); } } else { - mWriter.write(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD); + write(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD); } - mWriter.write(VCFConstants.FIELD_SEPARATOR); + write(VCFConstants.FIELD_SEPARATOR); // QUAL if ( !vc.hasLog10PError() ) - mWriter.write(VCFConstants.MISSING_VALUE_v4); + write(VCFConstants.MISSING_VALUE_v4); else - mWriter.write(formatQualValue(vc.getPhredScaledQual())); - mWriter.write(VCFConstants.FIELD_SEPARATOR); + write(formatQualValue(vc.getPhredScaledQual())); + write(VCFConstants.FIELD_SEPARATOR); // FILTER String filters = getFilterString(vc); - mWriter.write(filters); - mWriter.write(VCFConstants.FIELD_SEPARATOR); + write(filters); + write(VCFConstants.FIELD_SEPARATOR); // INFO Map infoFields = new TreeMap(); @@ -229,8 +273,8 @@ class VCFWriter extends IndexingVariantContextWriter { // FORMAT final GenotypesContext gc = vc.getGenotypes(); if ( gc.isLazyWithData() && ((LazyGenotypesContext)gc).getUnparsedGenotypeData() instanceof String ) { - mWriter.write(VCFConstants.FIELD_SEPARATOR); - mWriter.write(((LazyGenotypesContext)gc).getUnparsedGenotypeData().toString()); + write(VCFConstants.FIELD_SEPARATOR); + write(((LazyGenotypesContext) gc).getUnparsedGenotypeData().toString()); } else { List genotypeAttributeKeys = calcVCFGenotypeKeys(vc, mHeader); if ( ! genotypeAttributeKeys.isEmpty() ) { @@ -240,16 +284,17 @@ class VCFWriter extends IndexingVariantContextWriter { final String genotypeFormatString = ParsingUtils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys); - mWriter.write(VCFConstants.FIELD_SEPARATOR); - mWriter.write(genotypeFormatString); + write(VCFConstants.FIELD_SEPARATOR); + write(genotypeFormatString); addGenotypeData(vc, alleleMap, genotypeAttributeKeys); } } - mWriter.write("\n"); + write("\n"); // note that we cannot call flush here if we want block gzipping to work properly // calling flush results in all gzipped blocks for each variant + flushBuffer(); } catch (IOException e) { throw new RuntimeException("Unable to write the VCF object to " + getStreamName(), e); } @@ -305,7 +350,7 @@ class VCFWriter extends IndexingVariantContextWriter { */ private void writeInfoString(Map infoFields) throws IOException { if ( infoFields.isEmpty() ) { - mWriter.write(VCFConstants.EMPTY_INFO_FIELD); + write(VCFConstants.EMPTY_INFO_FIELD); return; } @@ -314,16 +359,16 @@ class VCFWriter extends IndexingVariantContextWriter { if ( isFirst ) isFirst = false; else - mWriter.write(VCFConstants.INFO_FIELD_SEPARATOR); + write(VCFConstants.INFO_FIELD_SEPARATOR); String key = entry.getKey(); - mWriter.write(key); + write(key); if ( !entry.getValue().equals("") ) { VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key); if ( metaData == null || metaData.getCountType() != VCFHeaderLineCount.INTEGER || metaData.getCount() != 0 ) { - mWriter.write("="); - mWriter.write(entry.getValue()); + write("="); + write(entry.getValue()); } } } @@ -342,7 +387,7 @@ class VCFWriter extends IndexingVariantContextWriter { final int ploidy = vc.getMaxPloidy(2); for ( String sample : mHeader.getGenotypeSamples() ) { - mWriter.write(VCFConstants.FIELD_SEPARATOR); + write(VCFConstants.FIELD_SEPARATOR); Genotype g = vc.getGenotype(sample); if ( g == null ) g = GenotypeBuilder.createMissing(sample, ploidy); @@ -351,12 +396,12 @@ class VCFWriter extends IndexingVariantContextWriter { for ( String field : genotypeFormatKeys ) { if ( field.equals(VCFConstants.GENOTYPE_KEY) ) { if ( !g.isAvailable() ) { - throw new ReviewedStingException("GTs cannot be missing for some samples if they are available for others in the record"); + throw new IllegalStateException("GTs cannot be missing for some samples if they are available for others in the record"); } writeAllele(g.getAllele(0), alleleMap); for (int i = 1; i < g.getPloidy(); i++) { - mWriter.write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED); + write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED); writeAllele(g.getAllele(i), alleleMap); } @@ -420,8 +465,8 @@ class VCFWriter extends IndexingVariantContextWriter { for (int i = 0; i < attrs.size(); i++) { if ( i > 0 || genotypeFormatKeys.contains(VCFConstants.GENOTYPE_KEY) ) - mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR); - mWriter.write(attrs.get(i)); + write(VCFConstants.GENOTYPE_FIELD_SEPARATOR); + write(attrs.get(i)); } } } @@ -435,7 +480,7 @@ class VCFWriter extends IndexingVariantContextWriter { String encoding = alleleMap.get(allele); if ( encoding == null ) throw new TribbleException.InternalCodecException("Allele " + allele + " is not an allele in the variant context"); - mWriter.write(encoding); + write(encoding); } /** @@ -552,10 +597,9 @@ class VCFWriter extends IndexingVariantContextWriter { private final void fieldIsMissingFromHeaderError(final VariantContext vc, final String id, final String field) { if ( !allowMissingFieldsInHeader) - throw new UserException.MalformedVCFHeader("Key " + id + " found in VariantContext field " + field + throw new IllegalStateException("Key " + id + " found in VariantContext field " + field + " at " + vc.getChr() + ":" + vc.getStart() - + " but this key isn't defined in the VCFHeader. The GATK now requires all VCFs to have" - + " complete VCF headers by default. This error can be disabled with the engine argument" - + " -U LENIENT_VCF_PROCESSING"); + + " but this key isn't defined in the VCFHeader. We require all VCFs to have" + + " complete VCF headers by default."); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWriter.java b/public/java/src/org/broadinstitute/variant/variantcontext/writer/VariantContextWriter.java similarity index 56% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWriter.java rename to public/java/src/org/broadinstitute/variant/variantcontext/writer/VariantContextWriter.java index 0ad78c7e0..4313b044d 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWriter.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/writer/VariantContextWriter.java @@ -1,7 +1,7 @@ -package org.broadinstitute.sting.utils.variantcontext.writer; +package org.broadinstitute.variant.variantcontext.writer; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.variantcontext.VariantContext; /** * this class writes VCF files diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWriterFactory.java b/public/java/src/org/broadinstitute/variant/variantcontext/writer/VariantContextWriterFactory.java similarity index 93% rename from public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWriterFactory.java rename to public/java/src/org/broadinstitute/variant/variantcontext/writer/VariantContextWriterFactory.java index 035aff7d6..0c98567a1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWriterFactory.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/writer/VariantContextWriterFactory.java @@ -22,10 +22,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext.writer; +package org.broadinstitute.variant.variantcontext.writer; import net.sf.samtools.SAMSequenceDictionary; -import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; import java.io.FileNotFoundException; @@ -107,7 +106,7 @@ public class VariantContextWriterFactory { } /** - * Returns a output stream writing to location, or throws a UserException if this fails + * Returns a output stream writing to location, or throws an exception if this fails * @param location * @return */ @@ -115,7 +114,7 @@ public class VariantContextWriterFactory { try { return new FileOutputStream(location); } catch (FileNotFoundException e) { - throw new UserException.CouldNotCreateOutputFile(location, "Unable to create VCF writer", e); + throw new RuntimeException(location + ": Unable to create VCF writer", e); } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/variant/vcf/AbstractVCFCodec.java similarity index 97% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java rename to public/java/src/org/broadinstitute/variant/vcf/AbstractVCFCodec.java index 652f7f96f..3bad6b064 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/variant/vcf/AbstractVCFCodec.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; import org.apache.log4j.Logger; import org.broad.tribble.AsciiFeatureCodec; @@ -8,9 +8,7 @@ import org.broad.tribble.TribbleException; import org.broad.tribble.readers.LineReader; import org.broad.tribble.util.BlockCompressedInputStream; import org.broad.tribble.util.ParsingUtils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import java.io.FileInputStream; import java.io.FileNotFoundException; @@ -148,7 +146,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec sampleNames.add(strings[arrayIndex++]); if ( sawFormatTag && sampleNames.size() == 0 ) - throw new UserException.MalformedVCFHeader("The FORMAT field was provided but there is no genotype/sample data"); + throw new TribbleException.InvalidHeader("The FORMAT field was provided but there is no genotype/sample data"); } else { if ( str.startsWith(VCFConstants.INFO_HEADER_START) ) { @@ -203,7 +201,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null; // our header cannot be null, we need the genotype sample names and counts - if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record"); + if (header == null) throw new TribbleException("VCF Header cannot be null when decoding a record"); if (parts == null) parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)]; @@ -213,8 +211,8 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec // if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data) if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) || (header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) ) - throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) + - " tokens, and saw " + nParts + " )", lineNo); + throw new TribbleException("Line " + lineNo + ": there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) + + " tokens, and saw " + nParts + " )"); return parseVCFLine(parts, includeGenotypes); } @@ -692,10 +690,10 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec protected void generateException(String message) { - throw new UserException.MalformedVCF(message, lineNo); + throw new TribbleException(String.format("The provided VCF file is malformed at approximately line number %d: %s", lineNo, message)); } protected static void generateException(String message, int lineNo) { - throw new UserException.MalformedVCF(message, lineNo); + throw new TribbleException(String.format("The provided VCF file is malformed at approximately line number %d: %s", lineNo, message)); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java b/public/java/src/org/broadinstitute/variant/vcf/VCF3Codec.java similarity index 97% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java rename to public/java/src/org/broadinstitute/variant/vcf/VCF3Codec.java index 2c103b473..6cbd26de9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCF3Codec.java @@ -22,12 +22,10 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; import org.broad.tribble.TribbleException; import org.broad.tribble.readers.LineReader; -import org.broad.tribble.util.ParsingUtils; -import org.broadinstitute.sting.utils.variantcontext.*; import java.io.IOException; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java b/public/java/src/org/broadinstitute/variant/vcf/VCFCodec.java similarity index 99% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java rename to public/java/src/org/broadinstitute/variant/vcf/VCFCodec.java index f12f13dc7..6a31727ee 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCFCodec.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; import org.broad.tribble.TribbleException; import org.broad.tribble.readers.LineReader; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java b/public/java/src/org/broadinstitute/variant/vcf/VCFCompoundHeaderLine.java similarity index 94% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java rename to public/java/src/org/broadinstitute/variant/vcf/VCFCompoundHeaderLine.java index 5273806a7..68252f01b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCFCompoundHeaderLine.java @@ -22,14 +22,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; import org.apache.log4j.Logger; import org.broad.tribble.TribbleException; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.GenotypeLikelihoods; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.LinkedHashMap; @@ -65,7 +63,7 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF public boolean isFixedCount() { return countType == VCFHeaderLineCount.INTEGER; } public int getCount() { if ( ! isFixedCount() ) - throw new ReviewedStingException("Asking for header line count when type is not an integer"); + throw new TribbleException("Asking for header line count when type is not an integer"); return count; } @@ -91,7 +89,7 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF final int ploidy = vc.getMaxPloidy(2); return GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), ploidy); default: - throw new ReviewedStingException("Unknown count type: " + countType); + throw new TribbleException("Unknown count type: " + countType); } } @@ -172,7 +170,7 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF } if ( count < 0 && countType == VCFHeaderLineCount.INTEGER ) - throw new UserException.MalformedVCFHeader("Count < 0 for fixed size VCF header field " + name); + throw new TribbleException.InvalidHeader("Count < 0 for fixed size VCF header field " + name); try { type = VCFHeaderLineType.valueOf(mapping.get("Type")); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java b/public/java/src/org/broadinstitute/variant/vcf/VCFConstants.java similarity index 99% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java rename to public/java/src/org/broadinstitute/variant/vcf/VCFConstants.java index dac58eb10..09e74cff6 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCFConstants.java @@ -23,7 +23,7 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; import java.util.Locale; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFContigHeaderLine.java b/public/java/src/org/broadinstitute/variant/vcf/VCFContigHeaderLine.java similarity index 97% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFContigHeaderLine.java rename to public/java/src/org/broadinstitute/variant/vcf/VCFContigHeaderLine.java index 35cc75af2..f4bcd69bf 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFContigHeaderLine.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCFContigHeaderLine.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java b/public/java/src/org/broadinstitute/variant/vcf/VCFFilterHeaderLine.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java rename to public/java/src/org/broadinstitute/variant/vcf/VCFFilterHeaderLine.java index bfc484a7e..ff5352404 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFilterHeaderLine.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCFFilterHeaderLine.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; import java.util.Arrays; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java b/public/java/src/org/broadinstitute/variant/vcf/VCFFormatHeaderLine.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java rename to public/java/src/org/broadinstitute/variant/vcf/VCFFormatHeaderLine.java index 5fc3187c5..5d749f37d 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFFormatHeaderLine.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCFFormatHeaderLine.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; /** diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java b/public/java/src/org/broadinstitute/variant/vcf/VCFHeader.java similarity index 97% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java rename to public/java/src/org/broadinstitute/variant/vcf/VCFHeader.java index 44a3e9af3..46f9187a1 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCFHeader.java @@ -22,11 +22,11 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; import org.apache.log4j.Logger; +import org.broad.tribble.TribbleException; import org.broad.tribble.util.ParsingUtils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.*; @@ -124,7 +124,7 @@ public class VCFHeader { this(metaData); if ( genotypeSampleNames.size() != new HashSet(genotypeSampleNames).size() ) - throw new ReviewedStingException("BUG: VCF header has duplicate sample names"); + throw new TribbleException.InvalidHeader("BUG: VCF header has duplicate sample names"); mGenotypeSampleNames.addAll(genotypeSampleNames); samplesWereAlreadySorted = ParsingUtils.isSorted(genotypeSampleNames); @@ -234,7 +234,7 @@ public class VCFHeader { if ( hasFormatLine(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && ! hasFormatLine(VCFConstants.GENOTYPE_PL_KEY) ) { logger.warn("Found " + VCFConstants.GENOTYPE_LIKELIHOODS_KEY + " format, but no " - + VCFConstants.GENOTYPE_PL_KEY + " field. As the GATK now only manages PL fields internally" + + VCFConstants.GENOTYPE_PL_KEY + " field. We now only manage PL fields internally" + " automatically adding a corresponding PL field to your VCF header"); addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification")); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLine.java b/public/java/src/org/broadinstitute/variant/vcf/VCFHeaderLine.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLine.java rename to public/java/src/org/broadinstitute/variant/vcf/VCFHeaderLine.java index 9b5886c65..3333fd455 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLine.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCFHeaderLine.java @@ -23,7 +23,7 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; import org.broad.tribble.TribbleException; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineCount.java b/public/java/src/org/broadinstitute/variant/vcf/VCFHeaderLineCount.java similarity index 72% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineCount.java rename to public/java/src/org/broadinstitute/variant/vcf/VCFHeaderLineCount.java index d615c7c78..2bef149a2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineCount.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCFHeaderLineCount.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; /** * the count encodings we use for fields in VCF header lines diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineTranslator.java b/public/java/src/org/broadinstitute/variant/vcf/VCFHeaderLineTranslator.java similarity index 93% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineTranslator.java rename to public/java/src/org/broadinstitute/variant/vcf/VCFHeaderLineTranslator.java index dcc38a6f9..1b20c52ff 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineTranslator.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCFHeaderLineTranslator.java @@ -1,6 +1,6 @@ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broad.tribble.TribbleException; import java.util.*; @@ -77,10 +77,10 @@ class VCF4Parser implements VCFLineParser { index = 0; if ( expectedTagOrder != null ) { if ( ret.size() > expectedTagOrder.size() ) - throw new UserException.MalformedVCFHeader("unexpected tag count " + ret.size() + " in line " + valueLine); + throw new TribbleException.InvalidHeader("unexpected tag count " + ret.size() + " in line " + valueLine); for ( String str : ret.keySet() ) { if ( !expectedTagOrder.get(index).equals(str) ) - throw new UserException.MalformedVCFHeader("Unexpected tag " + str + " in line " + valueLine); + throw new TribbleException.InvalidHeader("Unexpected tag " + str + " in line " + valueLine); index++; } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineType.java b/public/java/src/org/broadinstitute/variant/vcf/VCFHeaderLineType.java similarity index 74% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineType.java rename to public/java/src/org/broadinstitute/variant/vcf/VCFHeaderLineType.java index 83e272415..0defd09c5 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderLineType.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCFHeaderLineType.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; /** * the type encodings we use for fields in VCF header lines diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderVersion.java b/public/java/src/org/broadinstitute/variant/vcf/VCFHeaderVersion.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderVersion.java rename to public/java/src/org/broadinstitute/variant/vcf/VCFHeaderVersion.java index c65ce17b9..16229857e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderVersion.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCFHeaderVersion.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; import org.broad.tribble.TribbleException; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFIDHeaderLine.java b/public/java/src/org/broadinstitute/variant/vcf/VCFIDHeaderLine.java similarity index 96% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFIDHeaderLine.java rename to public/java/src/org/broadinstitute/variant/vcf/VCFIDHeaderLine.java index 65321881a..c18c1b6ea 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFIDHeaderLine.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCFIDHeaderLine.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; /** an interface for ID-based header lines **/ public interface VCFIDHeaderLine { diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java b/public/java/src/org/broadinstitute/variant/vcf/VCFInfoHeaderLine.java similarity index 94% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java rename to public/java/src/org/broadinstitute/variant/vcf/VCFInfoHeaderLine.java index 9f249c531..7db527efe 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFInfoHeaderLine.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCFInfoHeaderLine.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; /** diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java b/public/java/src/org/broadinstitute/variant/vcf/VCFSimpleHeaderLine.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java rename to public/java/src/org/broadinstitute/variant/vcf/VCFSimpleHeaderLine.java index c9699e7b5..94a1bd20b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFSimpleHeaderLine.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCFSimpleHeaderLine.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; import java.util.LinkedHashMap; import java.util.List; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFStandardHeaderLines.java b/public/java/src/org/broadinstitute/variant/vcf/VCFStandardHeaderLines.java similarity index 96% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFStandardHeaderLines.java rename to public/java/src/org/broadinstitute/variant/vcf/VCFStandardHeaderLines.java index b2e8cc100..5b00cfa37 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFStandardHeaderLines.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCFStandardHeaderLines.java @@ -22,12 +22,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broad.tribble.TribbleException; import java.util.*; @@ -76,7 +76,7 @@ public class VCFStandardHeaderLines { /** * Adds header lines for each of the format fields in IDs to header, returning the set of * IDs without standard descriptions, unless throwErrorForMissing is true, in which - * case this situation results in a ReviewedStingException + * case this situation results in a TribbleException * * @param IDs * @return @@ -126,7 +126,7 @@ public class VCFStandardHeaderLines { /** * Adds header lines for each of the info fields in IDs to header, returning the set of * IDs without standard descriptions, unless throwErrorForMissing is true, in which - * case this situation results in a ReviewedStingException + * case this situation results in a TribbleException * * @param IDs * @return @@ -246,7 +246,7 @@ public class VCFStandardHeaderLines { @Ensures({"standards.containsKey(line.getID())"}) public void add(final T line) { if ( standards.containsKey(line.getID()) ) - throw new ReviewedStingException("Attempting to add multiple standard header lines for ID " + line.getID()); + throw new TribbleException("Attempting to add multiple standard header lines for ID " + line.getID()); standards.put(line.getID(), line); } @@ -255,7 +255,7 @@ public class VCFStandardHeaderLines { public T get(final String ID, final boolean throwErrorForMissing) { final T x = standards.get(ID); if ( throwErrorForMissing && x == null ) - throw new ReviewedStingException("Couldn't find a standard VCF header line for field " + ID); + throw new TribbleException("Couldn't find a standard VCF header line for field " + ID); return x; } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/variant/vcf/VCFUtils.java old mode 100755 new mode 100644 similarity index 57% rename from public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java rename to public/java/src/org/broadinstitute/variant/vcf/VCFUtils.java index a8aefb703..ccb9935cb --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCFUtils.java @@ -1,160 +1,15 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; import org.apache.commons.io.FilenameUtils; import org.apache.log4j.Logger; -import org.broad.tribble.Feature; -import org.broad.tribble.FeatureCodecHeader; -import org.broad.tribble.readers.PositionalBufferedStream; -import org.broadinstitute.sting.commandline.RodBinding; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; import java.util.*; -/** - * A set of static utility methods for common operations on VCF files/records. - */ public class VCFUtils { - /** - * Constructor access disallowed...static utility methods only! - */ - private VCFUtils() { } - - public static Map getVCFHeadersFromRods(GenomeAnalysisEngine toolkit, List> rodBindings) { - // Collect the eval rod names - final Set names = new TreeSet(); - for ( final RodBinding evalRod : rodBindings ) - names.add(evalRod.getName()); - return getVCFHeadersFromRods(toolkit, names); - } - - public static Map getVCFHeadersFromRods(GenomeAnalysisEngine toolkit) { - return getVCFHeadersFromRods(toolkit, (Collection)null); - } - - public static Map getVCFHeadersFromRods(GenomeAnalysisEngine toolkit, Collection rodNames) { - Map data = new HashMap(); - - // iterate to get all of the sample names - List dataSources = toolkit.getRodDataSources(); - for ( ReferenceOrderedDataSource source : dataSources ) { - // ignore the rod if it's not in our list - if ( rodNames != null && !rodNames.contains(source.getName()) ) - continue; - - if ( source.getHeader() != null && source.getHeader() instanceof VCFHeader ) - data.put(source.getName(), (VCFHeader)source.getHeader()); - } - - return data; - } - - public static Map getVCFHeadersFromRodPrefix(GenomeAnalysisEngine toolkit,String prefix) { - Map data = new HashMap(); - - // iterate to get all of the sample names - List dataSources = toolkit.getRodDataSources(); - for ( ReferenceOrderedDataSource source : dataSources ) { - // ignore the rod if lacks the prefix - if ( ! source.getName().startsWith(prefix) ) - continue; - - if ( source.getHeader() != null && source.getHeader() instanceof VCFHeader ) - data.put(source.getName(), (VCFHeader)source.getHeader()); - } - - return data; - } - - /** - * Gets the header fields from all VCF rods input by the user - * - * @param toolkit GATK engine - * - * @return a set of all fields - */ - public static Set getHeaderFields(GenomeAnalysisEngine toolkit) { - return getHeaderFields(toolkit, null); - } - - /** - * Gets the header fields from all VCF rods input by the user - * - * @param toolkit GATK engine - * @param rodNames names of rods to use, or null if we should use all possible ones - * - * @return a set of all fields - */ - public static Set getHeaderFields(GenomeAnalysisEngine toolkit, Collection rodNames) { - - // keep a map of sample name to occurrences encountered - TreeSet fields = new TreeSet(); - - // iterate to get all of the sample names - List dataSources = toolkit.getRodDataSources(); - for ( ReferenceOrderedDataSource source : dataSources ) { - // ignore the rod if it's not in our list - if ( rodNames != null && !rodNames.contains(source.getName()) ) - continue; - - if ( source.getRecordType().equals(VariantContext.class)) { - VCFHeader header = (VCFHeader)source.getHeader(); - if ( header != null ) - fields.addAll(header.getMetaDataInSortedOrder()); - } - } - - return fields; - } - - /** Only displays a warning if a logger is provided and an identical warning hasn't been already issued */ - private static final class HeaderConflictWarner { - Logger logger; - Set alreadyIssued = new HashSet(); - - private HeaderConflictWarner(final Logger logger) { - this.logger = logger; - } - - public void warn(final VCFHeaderLine line, final String msg) { - if ( logger != null && ! alreadyIssued.contains(line.getKey()) ) { - alreadyIssued.add(line.getKey()); - logger.warn(msg); - } - } - } public static Set smartMergeHeaders(Collection headers, Logger logger) throws IllegalStateException { HashMap map = new HashMap(); // from KEY.NAME -> line @@ -236,16 +91,6 @@ public class VCFUtils { return rsID; } - /** - * Add / replace the contig header lines in the VCFHeader with the information in the GATK engine - * - * @param header the header to update - * @param engine the GATK engine containing command line arguments and the master sequence dictionary - */ - public static VCFHeader withUpdatedContigs(final VCFHeader header, final GenomeAnalysisEngine engine) { - return VCFUtils.withUpdatedContigs(header, engine.getArguments().referenceFile, engine.getMasterSequenceDictionary()); - } - /** * Add / replace the contig header lines in the VCFHeader with the in the reference file and master reference dictionary * @@ -323,32 +168,20 @@ public class VCFUtils { return assembly; } - /** - * Read all of the VCF records from source into memory, returning the header and the VariantContexts - * - * @param source the file to read, must be in VCF4 format - * @return - * @throws IOException - */ - public static Pair> readVCF(final File source) throws IOException { - // read in the features - final List vcs = new ArrayList(); - final VCFCodec codec = new VCFCodec(); - PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source)); - FeatureCodecHeader header = codec.readHeader(pbs); - pbs.close(); + /** Only displays a warning if a logger is provided and an identical warning hasn't been already issued */ + private static final class HeaderConflictWarner { + Logger logger; + Set alreadyIssued = new HashSet(); - pbs = new PositionalBufferedStream(new FileInputStream(source)); - pbs.skip(header.getHeaderEnd()); - - final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue(); - - while ( ! pbs.isDone() ) { - final VariantContext vc = codec.decode(pbs); - if ( vc != null ) - vcs.add(vc); + private HeaderConflictWarner(final Logger logger) { + this.logger = logger; } - return new Pair>(vcfHeader, vcs); + public void warn(final VCFHeaderLine line, final String msg) { + if ( logger != null && ! alreadyIssued.contains(line.getKey()) ) { + alreadyIssued.add(line.getKey()); + logger.warn(msg); + } + } } -} \ No newline at end of file +} diff --git a/public/java/test/org/broadinstitute/sting/WalkerTest.java b/public/java/test/org/broadinstitute/sting/WalkerTest.java index fa9f9e8a7..b36400a92 100755 --- a/public/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java @@ -34,12 +34,12 @@ import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; +import org.broadinstitute.variant.bcf2.BCF2Utils; +import org.broadinstitute.variant.vcf.VCFCodec; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.variantcontext.VariantContextTestProvider; +import org.broadinstitute.variant.variantcontext.VariantContextTestProvider; import org.testng.Assert; import org.testng.annotations.AfterSuite; import org.testng.annotations.BeforeMethod; diff --git a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java index be2e91557..67031a586 100755 --- a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java @@ -28,7 +28,7 @@ package org.broadinstitute.sting.commandline; import org.apache.commons.io.FileUtils; import org.broad.tribble.Feature; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; diff --git a/public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java index 206f32532..307bf41a3 100644 --- a/public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.commandline; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import org.testng.Assert; import org.testng.annotations.Test; import org.testng.annotations.BeforeMethod; diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java index 2f73e373c..c520399e4 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java @@ -36,11 +36,10 @@ import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; import org.testng.Assert; import org.testng.annotations.*; import java.util.*; diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java index 48e4ff4ed..c01ed61c0 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java @@ -32,11 +32,11 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.codecs.table.BedTableCodec; import org.broadinstitute.sting.utils.codecs.table.TableFeature; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; +import org.broadinstitute.variant.vcf.VCF3Codec; +import org.broadinstitute.variant.vcf.VCFCodec; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContext; import org.testng.Assert; import org.testng.annotations.BeforeMethod; import org.testng.annotations.DataProvider; diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java index 6264758ad..60a941a9e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java @@ -29,8 +29,8 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMSequenceDictionary; import org.broad.tribble.Tribble; import org.broad.tribble.index.Index; -import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; +import org.broadinstitute.variant.vcf.VCF3Codec; +import org.broadinstitute.variant.vcf.VCFCodec; import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIteratorUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIteratorUnitTest.java index c00fbbcdb..c4e566135 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIteratorUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIteratorUnitTest.java @@ -29,7 +29,7 @@ import org.broad.tribble.Feature; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; +import org.broadinstitute.variant.vcf.VCFCodec; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java index 20071acca..d20b70b42 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java @@ -168,7 +168,7 @@ public class GATKReportUnitTest extends BaseTest { table.set("RZ", "SomeFloat", 535646345.657453464576); table.set("RZ", "TrueFalse", true); - report1.addTable("Table3", "blah", 1, true); + report1.addTable("Table3", "blah", 1, true, false); report1.getTable("Table3").addColumn("a"); report1.getTable("Table3").addRowIDMapping("q", 2); report1.getTable("Table3").addRowIDMapping("5", 3); diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java index 69907d485..185eca85c 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java @@ -2,6 +2,10 @@ package org.broadinstitute.sting.gatk.traversals; import com.google.java.contract.PreconditionError; import net.sf.samtools.*; +import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.gatk.datasources.reads.*; +import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; @@ -12,11 +16,8 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard; -import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.executive.WindowMaker; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; import org.broadinstitute.sting.utils.GenomeLoc; @@ -101,7 +102,8 @@ public class TraverseActiveRegionsTest extends BaseTest { private GenomeLocParser genomeLocParser; private List intervals; - private List reads; + + private static final String testBAM = "TraverseActiveRegionsTest.bam"; @BeforeClass private void init() throws FileNotFoundException { @@ -110,6 +112,13 @@ public class TraverseActiveRegionsTest extends BaseTest { genomeLocParser = new GenomeLocParser(dictionary); // TODO: test shard boundaries + // TODO: reads with indels + // TODO: reads which span many regions + // TODO: reads which are partially between intervals (in/outside extension) + // TODO: duplicate reads + + // TODO: should we assign reads which are completely outside intervals but within extension? + intervals = new ArrayList(); intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20)); @@ -117,24 +126,34 @@ public class TraverseActiveRegionsTest extends BaseTest { intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999)); intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999)); intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000)); - intervals.add(genomeLocParser.createGenomeLoc("1", 249250600, 249250621)); intervals.add(genomeLocParser.createGenomeLoc("2", 1, 100)); intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 10100)); intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList(); - reads = new ArrayList(); + List reads = new ArrayList(); reads.add(buildSAMRecord("simple", "1", 100, 200)); reads.add(buildSAMRecord("overlap_equal", "1", 10, 20)); reads.add(buildSAMRecord("overlap_unequal", "1", 10, 21)); reads.add(buildSAMRecord("boundary_equal", "1", 1990, 2009)); reads.add(buildSAMRecord("boundary_unequal", "1", 1990, 2008)); + reads.add(buildSAMRecord("boundary_1_pre", "1", 1950, 2000)); + reads.add(buildSAMRecord("boundary_1_post", "1", 1999, 2050)); reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990)); reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000)); - reads.add(buildSAMRecord("end_of_chr1", "1", 249250600, 249250700)); reads.add(buildSAMRecord("simple20", "20", 10025, 10075)); - // required by LocusIteratorByState, and I prefer to list them in test case order above - ReadUtils.sortReadsByCoordinate(reads); + createBAM(reads); + } + + private void createBAM(List reads) { + File outFile = new File(testBAM); + outFile.deleteOnExit(); + + SAMFileWriter out = new SAMFileWriterFactory().makeBAMWriter(reads.get(0).getHeader(), true, outFile); + for (GATKSAMRecord read : ReadUtils.sortReadsByCoordinate(reads)) { + out.addAlignment(read); + } + out.close(); } @Test @@ -148,7 +167,7 @@ public class TraverseActiveRegionsTest extends BaseTest { private List getIsActiveIntervals(DummyActiveRegionWalker walker, List intervals) { List activeIntervals = new ArrayList(); - for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) { + for (LocusShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) { t.traverse(walker, dataProvider, 0); activeIntervals.addAll(walker.isActiveCalls); } @@ -230,73 +249,26 @@ public class TraverseActiveRegionsTest extends BaseTest { // overlap_unequal: Primary in 1:1-999 // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999 // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999 + // boundary_1_pre: Primary in 1:1000-1999, Non-Primary in 1:2000-2999 + // boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999 // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999 // outside_intervals: none - // end_of_chr1: Primary in 1:249250600-249250621 // simple20: Primary in 20:10000-10100 Map activeRegions = getActiveRegions(walker, intervals); ActiveRegion region; region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999)); - - getRead(region, "simple"); - getRead(region, "overlap_equal"); - getRead(region, "overlap_unequal"); - verifyReadNotPlaced(region, "boundary_equal"); - verifyReadNotPlaced(region, "boundary_unequal"); - verifyReadNotPlaced(region, "extended_and_np"); - verifyReadNotPlaced(region, "outside_intervals"); - verifyReadNotPlaced(region, "end_of_chr1"); - verifyReadNotPlaced(region, "simple20"); + verifyReadMapping(region, "simple", "overlap_equal", "overlap_unequal"); region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999)); - - verifyReadNotPlaced(region, "simple"); - verifyReadNotPlaced(region, "overlap_equal"); - verifyReadNotPlaced(region, "overlap_unequal"); - verifyReadNotPlaced(region, "boundary_equal"); - getRead(region, "boundary_unequal"); - getRead(region, "extended_and_np"); - verifyReadNotPlaced(region, "outside_intervals"); - verifyReadNotPlaced(region, "end_of_chr1"); - verifyReadNotPlaced(region, "simple20"); + verifyReadMapping(region, "boundary_unequal", "extended_and_np", "boundary_1_pre"); region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999)); - - verifyReadNotPlaced(region, "simple"); - verifyReadNotPlaced(region, "overlap_equal"); - verifyReadNotPlaced(region, "overlap_unequal"); - getRead(region, "boundary_equal"); - verifyReadNotPlaced(region, "boundary_unequal"); - verifyReadNotPlaced(region, "extended_and_np"); - verifyReadNotPlaced(region, "outside_intervals"); - verifyReadNotPlaced(region, "end_of_chr1"); - verifyReadNotPlaced(region, "simple20"); - - region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621)); - - verifyReadNotPlaced(region, "simple"); - verifyReadNotPlaced(region, "overlap_equal"); - verifyReadNotPlaced(region, "overlap_unequal"); - verifyReadNotPlaced(region, "boundary_equal"); - verifyReadNotPlaced(region, "boundary_unequal"); - verifyReadNotPlaced(region, "extended_and_np"); - verifyReadNotPlaced(region, "outside_intervals"); - getRead(region, "end_of_chr1"); - verifyReadNotPlaced(region, "simple20"); + verifyReadMapping(region, "boundary_equal", "boundary_1_post"); region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100)); - - verifyReadNotPlaced(region, "simple"); - verifyReadNotPlaced(region, "overlap_equal"); - verifyReadNotPlaced(region, "overlap_unequal"); - verifyReadNotPlaced(region, "boundary_equal"); - verifyReadNotPlaced(region, "boundary_unequal"); - verifyReadNotPlaced(region, "extended_and_np"); - verifyReadNotPlaced(region, "outside_intervals"); - verifyReadNotPlaced(region, "end_of_chr1"); - getRead(region, "simple20"); + verifyReadMapping(region, "simple20"); } @Test @@ -314,73 +286,26 @@ public class TraverseActiveRegionsTest extends BaseTest { // overlap_unequal: Primary in 1:1-999 // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999 // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999 + // boundary_1_pre: Primary in 1:1000-1999, Non-Primary in 1:2000-2999 + // boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999 // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999 // outside_intervals: none - // end_of_chr1: Primary in 1:249250600-249250621 // simple20: Primary in 20:10000-10100 Map activeRegions = getActiveRegions(walker, intervals); ActiveRegion region; region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999)); - - getRead(region, "simple"); - getRead(region, "overlap_equal"); - getRead(region, "overlap_unequal"); - verifyReadNotPlaced(region, "boundary_equal"); - verifyReadNotPlaced(region, "boundary_unequal"); - getRead(region, "extended_and_np"); - verifyReadNotPlaced(region, "outside_intervals"); - verifyReadNotPlaced(region, "end_of_chr1"); - verifyReadNotPlaced(region, "simple20"); + verifyReadMapping(region, "simple", "overlap_equal", "overlap_unequal", "extended_and_np"); region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999)); - - verifyReadNotPlaced(region, "simple"); - verifyReadNotPlaced(region, "overlap_equal"); - verifyReadNotPlaced(region, "overlap_unequal"); - getRead(region, "boundary_equal"); - getRead(region, "boundary_unequal"); - getRead(region, "extended_and_np"); - verifyReadNotPlaced(region, "outside_intervals"); - verifyReadNotPlaced(region, "end_of_chr1"); - verifyReadNotPlaced(region, "simple20"); + verifyReadMapping(region, "boundary_equal", "boundary_unequal", "extended_and_np", "boundary_1_pre", "boundary_1_post"); region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999)); - - verifyReadNotPlaced(region, "simple"); - verifyReadNotPlaced(region, "overlap_equal"); - verifyReadNotPlaced(region, "overlap_unequal"); - getRead(region, "boundary_equal"); - getRead(region, "boundary_unequal"); - verifyReadNotPlaced(region, "extended_and_np"); - verifyReadNotPlaced(region, "outside_intervals"); - verifyReadNotPlaced(region, "end_of_chr1"); - verifyReadNotPlaced(region, "simple20"); - - region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621)); - - verifyReadNotPlaced(region, "simple"); - verifyReadNotPlaced(region, "overlap_equal"); - verifyReadNotPlaced(region, "overlap_unequal"); - verifyReadNotPlaced(region, "boundary_equal"); - verifyReadNotPlaced(region, "boundary_unequal"); - verifyReadNotPlaced(region, "extended_and_np"); - verifyReadNotPlaced(region, "outside_intervals"); - getRead(region, "end_of_chr1"); - verifyReadNotPlaced(region, "simple20"); + verifyReadMapping(region, "boundary_equal", "boundary_unequal", "boundary_1_pre", "boundary_1_post"); region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100)); - - verifyReadNotPlaced(region, "simple"); - verifyReadNotPlaced(region, "overlap_equal"); - verifyReadNotPlaced(region, "overlap_unequal"); - verifyReadNotPlaced(region, "boundary_equal"); - verifyReadNotPlaced(region, "boundary_unequal"); - verifyReadNotPlaced(region, "extended_and_np"); - verifyReadNotPlaced(region, "outside_intervals"); - verifyReadNotPlaced(region, "end_of_chr1"); - getRead(region, "simple20"); + verifyReadMapping(region, "simple20"); } @Test @@ -399,73 +324,26 @@ public class TraverseActiveRegionsTest extends BaseTest { // overlap_unequal: Primary in 1:1-999 // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999 // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999 + // boundary_1_pre: Primary in 1:1000-1999, Non-Primary in 1:2000-2999 + // boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999 // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999 // outside_intervals: none - // end_of_chr1: Primary in 1:249250600-249250621 // simple20: Primary in 20:10000-10100 Map activeRegions = getActiveRegions(walker, intervals); ActiveRegion region; region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999)); - - getRead(region, "simple"); - getRead(region, "overlap_equal"); - getRead(region, "overlap_unequal"); - verifyReadNotPlaced(region, "boundary_equal"); - verifyReadNotPlaced(region, "boundary_unequal"); - getRead(region, "extended_and_np"); - verifyReadNotPlaced(region, "outside_intervals"); - verifyReadNotPlaced(region, "end_of_chr1"); - verifyReadNotPlaced(region, "simple20"); + verifyReadMapping(region, "simple", "overlap_equal", "overlap_unequal", "extended_and_np"); region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999)); - - verifyReadNotPlaced(region, "simple"); - verifyReadNotPlaced(region, "overlap_equal"); - verifyReadNotPlaced(region, "overlap_unequal"); - getRead(region, "boundary_equal"); - getRead(region, "boundary_unequal"); - getRead(region, "extended_and_np"); - verifyReadNotPlaced(region, "outside_intervals"); - verifyReadNotPlaced(region, "end_of_chr1"); - verifyReadNotPlaced(region, "simple20"); + verifyReadMapping(region, "boundary_equal", "boundary_unequal", "extended_and_np", "boundary_1_pre", "boundary_1_post"); region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999)); - - verifyReadNotPlaced(region, "simple"); - verifyReadNotPlaced(region, "overlap_equal"); - verifyReadNotPlaced(region, "overlap_unequal"); - getRead(region, "boundary_equal"); - getRead(region, "boundary_unequal"); - getRead(region, "extended_and_np"); - verifyReadNotPlaced(region, "outside_intervals"); - verifyReadNotPlaced(region, "end_of_chr1"); - verifyReadNotPlaced(region, "simple20"); - - region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621)); - - verifyReadNotPlaced(region, "simple"); - verifyReadNotPlaced(region, "overlap_equal"); - verifyReadNotPlaced(region, "overlap_unequal"); - verifyReadNotPlaced(region, "boundary_equal"); - verifyReadNotPlaced(region, "boundary_unequal"); - verifyReadNotPlaced(region, "extended_and_np"); - verifyReadNotPlaced(region, "outside_intervals"); - getRead(region, "end_of_chr1"); - verifyReadNotPlaced(region, "simple20"); + verifyReadMapping(region, "boundary_equal", "boundary_unequal", "extended_and_np", "boundary_1_pre", "boundary_1_post"); region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100)); - - verifyReadNotPlaced(region, "simple"); - verifyReadNotPlaced(region, "overlap_equal"); - verifyReadNotPlaced(region, "overlap_unequal"); - verifyReadNotPlaced(region, "boundary_equal"); - verifyReadNotPlaced(region, "boundary_unequal"); - verifyReadNotPlaced(region, "extended_and_np"); - verifyReadNotPlaced(region, "outside_intervals"); - verifyReadNotPlaced(region, "end_of_chr1"); - getRead(region, "simple20"); + verifyReadMapping(region, "simple20"); } @Test @@ -473,25 +351,19 @@ public class TraverseActiveRegionsTest extends BaseTest { // TODO } - private void verifyReadNotPlaced(ActiveRegion region, String readName) { + private void verifyReadMapping(ActiveRegion region, String... reads) { + Collection wantReads = new ArrayList(Arrays.asList(reads)); for (SAMRecord read : region.getReads()) { - if (read.getReadName().equals(readName)) - Assert.fail("Read " + readName + " found in active region " + region); - } - } - - private SAMRecord getRead(ActiveRegion region, String readName) { - for (SAMRecord read : region.getReads()) { - if (read.getReadName().equals(readName)) - return read; + String regionReadName = read.getReadName(); + Assert.assertTrue(wantReads.contains(regionReadName), "Read " + regionReadName + " assigned to active region " + region); + wantReads.remove(regionReadName); } - Assert.fail("Read " + readName + " not assigned to active region " + region); - return null; + Assert.assertTrue(wantReads.isEmpty(), "Reads missing in active region " + region); } private Map getActiveRegions(DummyActiveRegionWalker walker, List intervals) { - for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) + for (LocusShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) t.traverse(walker, dataProvider, 0); t.endTraversal(walker, 0); @@ -536,7 +408,7 @@ public class TraverseActiveRegionsTest extends BaseTest { // copied from LocusViewTemplate protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) { - SAMFileHeader header = new SAMFileHeader(); + SAMFileHeader header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test"); header.setSequenceDictionary(dictionary); GATKSAMRecord record = new GATKSAMRecord(header); @@ -548,23 +420,28 @@ public class TraverseActiveRegionsTest extends BaseTest { int len = alignmentEnd - alignmentStart + 1; cigar.add(new CigarElement(len, CigarOperator.M)); record.setCigar(cigar); - record.setReadBases(new byte[len]); + record.setReadString(new String(new char[len]).replace("\0", "A")); record.setBaseQualities(new byte[len]); return record; } - private List createDataProviders(List intervals) { + private List createDataProviders(List intervals, String bamFile) { GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); engine.setGenomeLocParser(genomeLocParser); t.initialize(engine); - StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList(reads)); - Shard shard = new MockLocusShard(genomeLocParser, intervals); + Collection samFiles = new ArrayList(); + SAMReaderID readerID = new SAMReaderID(new File(bamFile), new Tags()); + samFiles.add(readerID); + + SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser); List providers = new ArrayList(); - for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, iterator, shard.getGenomeLocs())) { - providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList())); + for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new LocusShardBalancer())) { + for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) { + providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList())); + } } return providers; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/PileupWalkerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/PileupWalkerIntegrationTest.java index e16ef3125..b457698e9 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/PileupWalkerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/PileupWalkerIntegrationTest.java @@ -6,6 +6,9 @@ import org.testng.annotations.Test; import java.util.Arrays; public class PileupWalkerIntegrationTest extends WalkerTest { + String gatkSpeedupArgs="-T Pileup -I " + validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam " + + "-R " + hg19Reference + " -o %s "; + @Test public void testGnarleyFHSPileup() { String gatk_args = "-T Pileup -I " + validationDataLocation + "FHS_Pileup_Test.bam " @@ -39,4 +42,31 @@ public class PileupWalkerIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(SingleReadAligningOffChromosome1MD5)); executeTest("Testing single read spanning off chromosome 1 unindexed", spec); } + + /************************/ + + //testing speedup to GATKBAMIndex + + + @Test + public void testPileupOnLargeBamChr20(){ + WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:1-76,050", 1, Arrays.asList("8702701350de11a6d28204acefdc4775")); + executeTest("Testing single on big BAM at start of chromosome 20", spec); + } + @Test + public void testPileupOnLargeBamMid20(){ + WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:10,000,000-10,001,100", 1, Arrays.asList("818cf5a8229efe6f89fc1cd8145ccbe3")); + executeTest("Testing single on big BAM somewhere in chromosome 20", spec); + } + @Test + public void testPileupOnLargeBamEnd20(){ + WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:62,954,114-63,025,520", 1, Arrays.asList("22471ea4a12e5139aef62bf8ff2a5b63")); + executeTest("Testing single at end of chromosome 20", spec); + } + @Test + public void testPileupOnLargeBam20Many(){ + WalkerTestSpec spec = new WalkerTestSpec(gatkSpeedupArgs + "-L 20:1-76,050 -L 20:20,000,000-20,000,100 -L 20:40,000,000-40,000,100 -L 20:30,000,000-30,000,100 -L 20:50,000,000-50,000,100 -L 20:62,954,114-63,025,520 ", + 1, Arrays.asList("08d899ed7c5a76ef3947bf67338acda1")); + executeTest("Testing single on big BAM many places", spec); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java index c1c22aceb..d50a8f0ae 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java @@ -30,8 +30,8 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.variant.vcf.VCFConstants; +import org.broadinstitute.variant.variantcontext.Allele; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java index f7f7999be..39b81f4a2 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java @@ -29,7 +29,7 @@ import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.QualityUtils; @@ -37,10 +37,8 @@ import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import java.util.*; @@ -56,11 +54,11 @@ public class ArtificialReadPileupTestProvider { final String artificialReadName = "synth"; final int artificialRefStart = 1; final int artificialMappingQuality = 60; - Map sample2RG = new HashMap(); + Map sample2RG = new HashMap(); List sampleRGs; List sampleNames = new ArrayList(); private String sampleName(int i) { return sampleNames.get(i); } - private SAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); } + private GATKSAMReadGroupRecord sampleRG(String name) { return sample2RG.get(name); } public final int locStart = 105; // start position where we desire artificial variant private final int readLength = 10; // desired read length in pileup public final int readOffset = 4; @@ -78,7 +76,7 @@ public class ArtificialReadPileupTestProvider { for ( int i = 0; i < numSamples; i++ ) { sampleNames.add(String.format("%s%04d", SAMPLE_PREFIX, i)); - SAMReadGroupRecord rg = createRG(sampleName(i)); + GATKSAMReadGroupRecord rg = createRG(sampleName(i)); sampleRGs.add(rg); sample2RG.put(sampleName(i), rg); } @@ -137,8 +135,8 @@ public class ArtificialReadPileupTestProvider { return contexts; } - private SAMReadGroupRecord createRG(String name) { - SAMReadGroupRecord rg = new SAMReadGroupRecord(name); + private GATKSAMReadGroupRecord createRG(String name) { + GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(name); rg.setPlatform("ILLUMINA"); rg.setSample(name); return rg; @@ -192,7 +190,7 @@ public class ArtificialReadPileupTestProvider { read.setMappingQuality(artificialMappingQuality); read.setReferenceName(loc.getContig()); read.setReadNegativeStrandFlag(false); - read.setAttribute("RG", sampleRG(sample).getReadGroupId()); + read.setReadGroup(sampleRG(sample)); pileupElements.add(new PileupElement(read,readOffset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases,Math.abs(eventLength))); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java index 85528f58b..12662c748 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java @@ -1,39 +1,17 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMReadGroupRecord; -import org.apache.commons.lang.ArrayUtils; import org.apache.log4j.Logger; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; -import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.variantcontext.Allele; import java.util.*; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; import org.testng.Assert; -import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeSuite; -import org.testng.annotations.DataProvider; import org.testng.annotations.Test; /** diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java index 9b0fbf650..4208f6c07 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java @@ -29,7 +29,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("f4b0b5471e03306ee2fad27d88b217b6","f8721f4f5d3bae2848ae15c3f120709b") + Arrays.asList("af979bcb353edda8dee2127605c71daf","1ea9994f937012e8de599ec7bcd62a0e") ); executeTest("testTrueNegativeMV", spec); } @@ -48,7 +48,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("dbc64776dcc9e01a468b61e4e0db8277","547fdfef393f3045a96d245ef6af8acb") + Arrays.asList("1dc36ff8d1d5f5d2c1c1bf21517263bf","547fdfef393f3045a96d245ef6af8acb") ); executeTest("testTruePositiveMV", spec); } @@ -67,7 +67,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("37793e78861bb0bc070884da67dc10e6","9529e2bf214d72e792d93fbea22a3b91") + Arrays.asList("ae60f2db6102ca1f4e93cd18d0634d7a","9529e2bf214d72e792d93fbea22a3b91") ); executeTest("testFalsePositiveMV", spec); } @@ -86,7 +86,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("e4da7639bb542d6440975da12b94973f","8c157d79dd00063d2932f0d2b96f53d8") + Arrays.asList("590ee56e745984296f73e4277277eac7","8c157d79dd00063d2932f0d2b96f53d8") ); executeTest("testSpecialCases", spec); } @@ -108,7 +108,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("ab92b714471a000285577d540e1fdc2e","343e418850ae4a687ebef2acd55fcb07") + Arrays.asList("78158d738917b8f0b7a736a1739b2cc5","343e418850ae4a687ebef2acd55fcb07") ); executeTest("testPriorOption", spec); } @@ -149,7 +149,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-fatherAlleleFirst" ), 2, - Arrays.asList("4b937c1b4e96602a7479b07b59254d06","52ffa82428e63ade22ea37b72ae58492") + Arrays.asList("dc6afb769b55e6038677fa590b2b2e89","52ffa82428e63ade22ea37b72ae58492") ); executeTest("testFatherAlleleFirst", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalkerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalkerUnitTest.java index f6c12f443..24903bba4 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalkerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalkerUnitTest.java @@ -37,9 +37,9 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.Variant import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manager.StratificationManager; import org.broadinstitute.sting.gatk.walkers.varianteval.util.EvaluationContext; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; import org.testng.Assert; import org.testng.annotations.BeforeMethod; import org.testng.annotations.DataProvider; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java index 21d49638f..a400a004a 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java @@ -2,12 +2,8 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.PositionalBufferedStream; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.variant.vcf.*; import org.testng.Assert; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderUnitTest; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.testng.annotations.Test; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index a1d673b56..f21b662e2 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -262,7 +262,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { @Test() public void testFileWithoutInfoLineInHeader() { - testFileWithoutInfoLineInHeader("testFileWithoutInfoLineInHeader", UserException.class); + testFileWithoutInfoLineInHeader("testFileWithoutInfoLineInHeader", IllegalStateException.class); } @Test() diff --git a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java index 13db1d39e..7350e0076 100644 --- a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java @@ -30,9 +30,9 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/InputProducerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/InputProducerUnitTest.java index 489adab6b..9ccfb9229 100644 --- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/InputProducerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/InputProducerUnitTest.java @@ -1,19 +1,13 @@ package org.broadinstitute.sting.utils.nanoScheduler; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.MultiThreadedErrorTracker; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.util.ArrayList; import java.util.Arrays; -import java.util.Iterator; import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.LinkedBlockingDeque; -import java.util.concurrent.Semaphore; /** * UnitTests for the InputProducer @@ -42,34 +36,23 @@ public class InputProducerUnitTest extends BaseTest { final List elements = new ArrayList(nElements); for ( int i = 0; i < nElements; i++ ) elements.add(i); - final LinkedBlockingDeque.InputValue> readQueue = - new LinkedBlockingDeque.InputValue>(queueSize); - - final InputProducer ip = new InputProducer(elements.iterator(), new MultiThreadedErrorTracker(), readQueue); - - final ExecutorService es = Executors.newSingleThreadExecutor(); + final InputProducer ip = new InputProducer(elements.iterator()); Assert.assertFalse(ip.allInputsHaveBeenRead(), "InputProvider said that all inputs have been read, but I haven't started reading yet"); Assert.assertEquals(ip.getNumInputValues(), -1, "InputProvider told me that the queue was done, but I haven't started reading yet"); - es.submit(ip); - int lastValue = -1; int nRead = 0; - while ( true ) { + while ( ip.hasNext() ) { final int nTotalElements = ip.getNumInputValues(); - final int observedQueueSize = readQueue.size(); - Assert.assertTrue(observedQueueSize <= queueSize, - "Reader is enqueuing more elements " + observedQueueSize + " than allowed " + queueSize); - if ( nRead + observedQueueSize < nElements ) + if ( nRead < nElements ) Assert.assertEquals(nTotalElements, -1, "getNumInputValues should have returned -1 with not all elements read"); // note, cannot test else case because elements input could have emptied between calls - final InputProducer.InputValue value = readQueue.take(); + final InputProducer.InputValue value = ip.next(); if ( value.isEOFMarker() ) { Assert.assertEquals(nRead, nElements, "Number of input values " + nRead + " not all that are expected " + nElements); - Assert.assertEquals(readQueue.size(), 0, "Last queue element found but queue contains more values!"); break; } else { Assert.assertTrue(lastValue < value.getValue(), "Read values coming out of order!"); @@ -82,65 +65,5 @@ public class InputProducerUnitTest extends BaseTest { Assert.assertTrue(ip.allInputsHaveBeenRead(), "InputProvider said that all inputs haven't been read, but I read them all"); Assert.assertEquals(ip.getNumInputValues(), nElements, "Wrong number of total elements getNumInputValues"); - es.shutdownNow(); - } - - @Test(enabled = true, dataProvider = "InputProducerTest", timeOut = NanoSchedulerUnitTest.NANO_SCHEDULE_MAX_RUNTIME) - public void testInputProducerLocking(final int nElements, final int queueSize) throws InterruptedException { - final List elements = new ArrayList(nElements); - for ( int i = 0; i < nElements; i++ ) elements.add(i); - - final LinkedBlockingDeque.InputValue> readQueue = - new LinkedBlockingDeque.InputValue>(); - - final InputProducer ip = new InputProducer(elements.iterator(), new MultiThreadedErrorTracker(), readQueue); - - final ExecutorService es = Executors.newSingleThreadExecutor(); - es.submit(ip); - - ip.waitForDone(); - - Assert.assertEquals(ip.getNumInputValues(), nElements, "InputProvider told me that the queue was done, but I haven't started reading yet"); - Assert.assertEquals(readQueue.size(), nElements + 1, "readQueue should have had all elements read into it"); - } - - final static class BlockingIterator implements Iterator { - final Semaphore blockNext = new Semaphore(0); - final Semaphore blockOnNext = new Semaphore(0); - final Iterator underlyingIterator; - - BlockingIterator(Iterator underlyingIterator) { - this.underlyingIterator = underlyingIterator; - } - - public void allowNext() { - blockNext.release(1); - } - - public void blockTillNext() throws InterruptedException { - blockOnNext.acquire(1); - } - - @Override - public boolean hasNext() { - return underlyingIterator.hasNext(); - } - - @Override - public T next() { - try { - blockNext.acquire(1); - T value = underlyingIterator.next(); - blockOnNext.release(1); - return value; - } catch (InterruptedException ex) { - throw new RuntimeException(ex); - } - } - - @Override - public void remove() { - throw new UnsupportedOperationException("x"); - } } } diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/MapResultUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/MapResultUnitTest.java new file mode 100644 index 000000000..93fe9578f --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/MapResultUnitTest.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.nanoScheduler; + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** +* UnitTests for the InputProducer +* +* User: depristo +* Date: 8/24/12 +* Time: 11:25 AM +* To change this template use File | Settings | File Templates. +*/ +public class MapResultUnitTest { + @DataProvider(name = "CompareTester") + public Object[][] createCompareTester() { + List tests = new ArrayList(); + + for ( int id1 = 0; id1 < 10; id1++ ) { + for ( int id2 = 0; id2 < 10; id2++ ) { + tests.add(new Object[]{ id1, id2, Integer.valueOf(id1).compareTo(id2)}); + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(enabled = true, dataProvider = "CompareTester") + public void testInputProducer(final int id1, final int id2, final int comp ) throws InterruptedException { + final MapResult mr1 = new MapResult(id1, id1); + final MapResult mr2 = new MapResult(id2, id2); + Assert.assertEquals(mr1.compareTo(mr2), comp, "Compare MapResultsUnitTest failed"); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java index 61e8ec0a1..52cd904db 100644 --- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java @@ -101,7 +101,7 @@ public class NanoSchedulerUnitTest extends BaseTest { public int nExpectedCallbacks() { int nElements = Math.max(end - start, 0); - return nElements / bufferSize; + return nElements / bufferSize / NanoScheduler.UPDATE_PROGRESS_FREQ; } public Map2x makeMap() { return addDelays ? new Map2xWithDelays() : new Map2x(); } diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/ReducerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/ReducerUnitTest.java index 6c17aa78d..4fd875c0e 100644 --- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/ReducerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/ReducerUnitTest.java @@ -11,10 +11,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.PriorityBlockingQueue; -import java.util.concurrent.TimeUnit; +import java.util.concurrent.*; /** * UnitTests for Reducer @@ -30,19 +27,17 @@ public class ReducerUnitTest extends BaseTest { List tests = new ArrayList(); for ( final int groupSize : Arrays.asList(-1, 1, 5, 50, 500, 5000, 50000) ) { - for ( final boolean setJobIDAtStart : Arrays.asList(true, false) ) { - for ( final int nElements : Arrays.asList(0, 1, 3, 5) ) { - if ( groupSize < nElements ) { - for ( final List> jobs : Utils.makePermutations(makeJobs(nElements), nElements, false) ) { - tests.add(new Object[]{ new ListOfJobs(jobs), setJobIDAtStart, groupSize }); - } + for ( final int nElements : Arrays.asList(0, 1, 3, 5) ) { + if ( groupSize < nElements ) { + for ( final List> jobs : Utils.makePermutations(makeJobs(nElements), nElements, false) ) { + tests.add(new Object[]{ new ListOfJobs(jobs), groupSize }); } } + } - for ( final int nElements : Arrays.asList(10, 100, 1000, 10000, 100000, 1000000) ) { - if ( groupSize < nElements ) { - tests.add(new Object[]{ new ListOfJobs(makeJobs(nElements)), setJobIDAtStart, groupSize }); - } + for ( final int nElements : Arrays.asList(10, 100, 1000, 10000, 100000, 1000000) ) { + if ( groupSize < nElements ) { + tests.add(new Object[]{ new ListOfJobs(makeJobs(nElements)), groupSize }); } } } @@ -80,15 +75,11 @@ public class ReducerUnitTest extends BaseTest { } @Test(enabled = true, dataProvider = "ReducerThreadTest", timeOut = NanoSchedulerUnitTest.NANO_SCHEDULE_MAX_RUNTIME) - public void testReducerThread(final List> jobs, final boolean setJobIDAtStart, final int groupSize) throws Exception { - runTests(jobs, setJobIDAtStart, groupSize); - } - - private void runTests( final List> allJobs, boolean setJobIDAtStart, int groupSize ) throws Exception { + public void testReducerThread(final List> allJobs, int groupSize) throws Exception { if ( groupSize == -1 ) groupSize = allJobs.size(); - final PriorityBlockingQueue> mapResultsQueue = new PriorityBlockingQueue>(); + final MapResultsQueue mapResultsQueue = new MapResultsQueue(); final List>> jobGroups = Utils.groupList(allJobs, groupSize); final ReduceSumTest reduce = new ReduceSumTest(); @@ -98,68 +89,93 @@ public class ReducerUnitTest extends BaseTest { final ExecutorService es = Executors.newSingleThreadExecutor(); es.submit(waitingThread); + int lastJobID = -1; int nJobsSubmitted = 0; int jobGroupCount = 0; final int lastJobGroupCount = jobGroups.size() - 1; - setJobIDAtStart = setJobIDAtStart && groupSize == 1; for ( final List> jobs : jobGroups ) { //logger.warn("Processing job group " + jobGroupCount + " with " + jobs.size() + " jobs"); for ( final MapResult job : jobs ) { - mapResultsQueue.add(job); + lastJobID = Math.max(lastJobID, job.getJobID()); + mapResultsQueue.put(job); nJobsSubmitted++; } if ( jobGroupCount == lastJobGroupCount ) { - mapResultsQueue.add(new MapResult()); + mapResultsQueue.put(new MapResult(lastJobID+1)); nJobsSubmitted++; } - Assert.assertFalse(reducer.latchIsReleased(), "Latch should be closed at the start"); - - if ( jobGroupCount == 0 && setJobIDAtStart ) { - // only can do the setJobID if jobs cannot be submitted out of order - reducer.setTotalJobCount(allJobs.size()); - Assert.assertFalse(reducer.latchIsReleased(), "Latch should be closed even after setting last job if we haven't processed anything"); - } - - final int nReduced = reducer.reduceAsMuchAsPossible(mapResultsQueue); + final int nReduced = reducer.reduceAsMuchAsPossible(mapResultsQueue, true); Assert.assertTrue(nReduced <= nJobsSubmitted, "Somehow reduced more jobs than submitted"); - if ( setJobIDAtStart ) { - final boolean submittedLastJob = jobGroupCount == lastJobGroupCount; - Assert.assertEquals(reducer.latchIsReleased(), submittedLastJob, - "When last job is set, latch should only be released if the last job has been submitted"); - } else { - Assert.assertEquals(reducer.latchIsReleased(), false, "When last job isn't set, latch should never be release"); - } - jobGroupCount++; } - if ( setJobIDAtStart ) - Assert.assertTrue(reducer.latchIsReleased(), "Latch should be released after reducing with last job id being set"); - else { - Assert.assertFalse(reducer.latchIsReleased(), "Latch should be closed after reducing without last job id being set"); - reducer.setTotalJobCount(allJobs.size()); - Assert.assertTrue(reducer.latchIsReleased(), "Latch should be released after reducing after setting last job id "); - } - Assert.assertEquals(reduce.nRead, allJobs.size(), "number of read values not all of the values in the reducer queue"); es.shutdown(); es.awaitTermination(1, TimeUnit.HOURS); } - @Test(expectedExceptions = IllegalStateException.class) - private void runSettingJobIDTwice() throws Exception { - final PriorityBlockingQueue> mapResultsQueue = new PriorityBlockingQueue>(); - + @Test(timeOut = 1000, invocationCount = 100) + private void testNonBlockingReduce() throws Exception { final Reducer reducer = new Reducer(new ReduceSumTest(), new MultiThreadedErrorTracker(), 0); + final MapResultsQueue mapResultsQueue = new MapResultsQueue(); + mapResultsQueue.put(new MapResult(0, 0)); + mapResultsQueue.put(new MapResult(1, 1)); - reducer.setTotalJobCount(10); - reducer.setTotalJobCount(15); + final CountDownLatch latch = new CountDownLatch(1); + final ExecutorService es = Executors.newSingleThreadExecutor(); + + es.submit(new Runnable() { + @Override + public void run() { + reducer.acquireReduceLock(true); + latch.countDown(); + } + }); + + latch.await(); + final int nReduced = reducer.reduceAsMuchAsPossible(mapResultsQueue, false); + Assert.assertEquals(nReduced, 0, "The reducer lock was already held but we did some work"); + es.shutdown(); + es.awaitTermination(1, TimeUnit.HOURS); } + @Test(timeOut = 10000, invocationCount = 100) + private void testBlockingReduce() throws Exception { + final Reducer reducer = new Reducer(new ReduceSumTest(), new MultiThreadedErrorTracker(), 0); + final MapResultsQueue mapResultsQueue = new MapResultsQueue(); + mapResultsQueue.put(new MapResult(0, 0)); + mapResultsQueue.put(new MapResult(1, 1)); + + final CountDownLatch latch = new CountDownLatch(1); + final ExecutorService es = Executors.newSingleThreadExecutor(); + + es.submit(new Runnable() { + @Override + public void run() { + reducer.acquireReduceLock(true); + latch.countDown(); + try { + Thread.sleep(100); + } catch ( InterruptedException e ) { + ; + } finally { + reducer.releaseReduceLock(); + } + } + }); + + latch.await(); + final int nReduced = reducer.reduceAsMuchAsPossible(mapResultsQueue, true); + Assert.assertEquals(nReduced, 2, "The reducer should have blocked until the lock was freed and reduced 2 values"); + es.shutdown(); + es.awaitTermination(1, TimeUnit.HOURS); + } + + public class ReduceSumTest implements NSReduceFunction { int nRead = 0; int lastValue = -1; @@ -188,12 +204,8 @@ public class ReducerUnitTest extends BaseTest { @Override public void run() { - try { - final int observedSum = reducer.waitForFinalReduce(); - Assert.assertEquals(observedSum, expectedSum, "Reduce didn't sum to expected value"); - } catch ( InterruptedException ex ) { - Assert.fail("Got interrupted"); - } + final int observedSum = reducer.getReduceResult(); + Assert.assertEquals(observedSum, expectedSum, "Reduce didn't sum to expected value"); } } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java index 715acad03..0f0c74362 100644 --- a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java @@ -106,6 +106,11 @@ public class RecalDatumUnitTest extends BaseTest { Assert.assertEquals(datum.getEstimatedQReportedAsByte(), cfg.getReportedQual()); BaseTest.assertEqualsDoubleSmart(datum.getEmpiricalQuality(), cfg.getErrorRatePhredScaled()); BaseTest.assertEqualsDoubleSmart(datum.getEmpiricalErrorRate(), cfg.getErrorRate()); + + final double e = datum.getEmpiricalQuality(); + Assert.assertTrue(datum.getEmpiricalQualityAsByte() >= Math.floor(e)); + Assert.assertTrue(datum.getEmpiricalQualityAsByte() <= Math.ceil(e)); + Assert.assertNotNull(datum.toString()); } @Test(dataProvider = "RecalDatumTestProvider") @@ -145,10 +150,32 @@ public class RecalDatumUnitTest extends BaseTest { cfg.exTotal++; assertBasicFeaturesOfRecalDatum(datum, cfg); + datum = cfg.makeRecalDatum(); + datum.increment(false); + cfg.exTotal++; + assertBasicFeaturesOfRecalDatum(datum, cfg); + + datum = cfg.makeRecalDatum(); + datum.incrementNumObservations(2); + cfg.exTotal += 2; + assertBasicFeaturesOfRecalDatum(datum, cfg); + + datum = cfg.makeRecalDatum(); + datum.incrementNumMismatches(2); + cfg.exError += 2; + assertBasicFeaturesOfRecalDatum(datum, cfg); + + datum = cfg.makeRecalDatum(); datum.increment(10, 5); cfg.exError += 5; cfg.exTotal += 10; assertBasicFeaturesOfRecalDatum(datum, cfg); } + + @Test + public void testNoObs() { + final RecalDatum rd = new RecalDatum(0, 0, (byte)10); + Assert.assertEquals(rd.getEmpiricalErrorRate(), 0.0); + } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalUtilsUnitTest.java new file mode 100644 index 000000000..500a41e74 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalUtilsUnitTest.java @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.recalibration; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.collections.NestedIntegerArray; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; + +public final class RecalUtilsUnitTest extends BaseTest { + private class Row { + int rg, qual, ne, no; + + private Row(final Row copy) { + this(copy.rg, copy.qual, copy.ne, copy.no); + } + + private Row(int rg, int qual, int ne, int no) { + this.rg = rg; + this.qual = qual; + this.ne = ne; + this.no = no; + } + + @Override + public String toString() { + return "Row{" + + "" + rg + + ", " + qual + + ", " + ne + + ", " + no + + '}'; + } + } + + @DataProvider(name = "CombineTablesProvider") + public Object[][] createCombineTablesProvider() { + List tests = new ArrayList(); + + final List rows = new ArrayList(); + for ( final int rg : Arrays.asList(0, 1) ) { + for ( final int qual : Arrays.asList(0, 1) ) { + rows.add(new Row(rg, qual, 1, 10)); + } + } + + logger.warn("Number of rows " + rows.size()); + + List> permutations = new LinkedList>(); + permutations.addAll(Utils.makePermutations(rows, 1, false)); + permutations.addAll(Utils.makePermutations(rows, 2, false)); + permutations.addAll(Utils.makePermutations(rows, 3, false)); + + // adding 1 row to 2 + for ( final List table1 : permutations ) { + for ( final Row table2 : rows ) { + tests.add(new Object[]{table1, Arrays.asList(table2)}); + } + } + + // adding 2 rows to 1 + for ( final List table1 : permutations ) { + for ( final Row table2 : rows ) { + tests.add(new Object[]{Arrays.asList(table2), table1}); + } + } + + for ( final List table1 : permutations ) { + for ( final List table2 : permutations ) { + tests.add(new Object[]{table1, table2}); + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "CombineTablesProvider") + public void testCombineTables(final List table1, final List table2) { + final NestedIntegerArray nia1 = makeTable(table1); + final NestedIntegerArray nia2 = makeTable(table2); + final List expectedRows = makeExpected(table1, table2); + final NestedIntegerArray expected = makeTable(expectedRows); + RecalUtils.combineTables(nia1, nia2); + + Assert.assertEquals(nia1.getDimensions(), expected.getDimensions()); + Assert.assertEquals(nia1.getAllValues().size(), expected.getAllValues().size()); + + for ( final NestedIntegerArray.Leaf leaf : expected.getAllLeaves() ) { + final RecalDatum actual = nia1.get(leaf.keys); + Assert.assertEquals(actual.getNumMismatches(), leaf.value.getNumMismatches()); + Assert.assertEquals(actual.getNumObservations(), leaf.value.getNumObservations()); + } + } + + public List makeExpected(final List table1, final List table2) { + final List combined = new LinkedList(); + for ( final Row t1 : table1 ) combined.add(new Row(t1)); + for ( final Row t2 : table2 ) { + combine(combined, t2); + } + return combined; + } + + private void combine(final List combined, final Row row) { + for ( final Row c : combined ) { + if ( c.rg == row.rg && c.qual == row.qual ) { + c.ne += row.ne; + c.no += row.no; + return; + } + } + + combined.add(new Row(row)); + } + + public NestedIntegerArray makeTable(final List rows) { + final NestedIntegerArray x = new NestedIntegerArray(3, 3); + for ( final Row r : rows ) + x.put(new RecalDatum(r.no, r.ne, (byte)10), r.rg, r.qual); + return x; + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java index d597b9f2c..b190f2ff2 100644 --- a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java @@ -21,6 +21,14 @@ import java.util.*; * @since 4/21/12 */ public class RecalibrationReportUnitTest { + private static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) { + final Random random = new Random(); + final int nObservations = random.nextInt(maxObservations); + final int nErrors = random.nextInt(maxErrors); + final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE); + return new RecalDatum(nObservations, nErrors, (byte)qual); + } + @Test(enabled = false) public void testOutput() { final int length = 100; @@ -86,12 +94,12 @@ public class RecalibrationReportUnitTest { final int[] covariates = rc.getKeySet(offset, errorMode); final int randomMax = errorMode == EventType.BASE_SUBSTITUTION ? 10000 : 100000; - rgTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.index); - qualTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.index); + rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.index); + qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.index); nKeys += 2; for (int j = 0; j < optionalCovariates.size(); j++) { final NestedIntegerArray covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j); - covTable.put(RecalDatum.createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], j, covariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j], errorMode.index); + covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], j, covariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j], errorMode.index); nKeys++; } } diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTablesUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTablesUnitTest.java new file mode 100644 index 000000000..93e52ae83 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTablesUnitTest.java @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.recalibration; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.collections.NestedIntegerArray; +import org.broadinstitute.sting.utils.recalibration.covariates.*; +import org.testng.Assert; +import org.testng.annotations.Test; + +public final class RecalibrationTablesUnitTest extends BaseTest { + @Test + public void basicTest() { + final Covariate[] covariates = RecalibrationTestUtils.makeInitializedStandardCovariates(); + final int numReadGroups = 6; + final RecalibrationTables tables = new RecalibrationTables(covariates, numReadGroups); + + final Covariate qualCov = covariates[1]; + final Covariate cycleCov = covariates[2]; + final Covariate contextCov = covariates[3]; + + Assert.assertEquals(tables.numTables(), covariates.length); + + Assert.assertNotNull(tables.getReadGroupTable()); + Assert.assertEquals(tables.getReadGroupTable(), tables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE.index)); + testDimensions(tables.getReadGroupTable(), numReadGroups); + + Assert.assertNotNull(tables.getQualityScoreTable()); + Assert.assertEquals(tables.getQualityScoreTable(), tables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE.index)); + testDimensions(tables.getQualityScoreTable(), numReadGroups, qualCov.maximumKeyValue() + 1); + + Assert.assertNotNull(tables.getTable(2)); + testDimensions(tables.getTable(2), numReadGroups, qualCov.maximumKeyValue() + 1, cycleCov.maximumKeyValue() + 1); + + Assert.assertNotNull(tables.getTable(3)); + testDimensions(tables.getTable(3), numReadGroups, qualCov.maximumKeyValue() + 1, contextCov.maximumKeyValue() + 1); + } + + private void testDimensions(final NestedIntegerArray table, final int ... dimensions) { + final int[] dim = new int[dimensions.length+1]; + System.arraycopy(dimensions, 0, dim, 0, dimensions.length); + dim[dimensions.length] = EventType.values().length; + Assert.assertEquals(table.getDimensions().length, dim.length); + + for ( int i = 0; i < dim.length; i++ ) { + Assert.assertEquals(table.getDimensions()[i], dim[i], "Table dimensions not expected at dim " + i); + } + } + + @Test + public void basicMakeQualityScoreTable() { + final Covariate[] covariates = RecalibrationTestUtils.makeInitializedStandardCovariates(); + final int numReadGroups = 6; + final RecalibrationTables tables = new RecalibrationTables(covariates, numReadGroups); + + final Covariate qualCov = covariates[1]; + final NestedIntegerArray copy = tables.makeQualityScoreTable(); + testDimensions(copy, numReadGroups, qualCov.maximumKeyValue()+1); + Assert.assertEquals(copy.getAllValues().size(), 0); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTestUtils.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTestUtils.java new file mode 100644 index 000000000..bf3917e70 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTestUtils.java @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.recalibration; + +import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; +import org.broadinstitute.sting.utils.recalibration.covariates.*; + +/** + * Created with IntelliJ IDEA. + * User: depristo + * Date: 12/23/12 + * Time: 1:06 PM + * To change this template use File | Settings | File Templates. + */ +public class RecalibrationTestUtils { + public static Covariate[] makeInitializedStandardCovariates() { + final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); + final Covariate[] covariates = new Covariate[4]; + covariates[0] = new ReadGroupCovariate(); + covariates[1] = new QualityScoreCovariate(); + covariates[2] = new ContextCovariate(); + covariates[3] = new CycleCovariate(); + for ( Covariate cov : covariates ) cov.initialize(RAC); + return covariates; + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VCFJarClassLoadingUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VCFJarClassLoadingUnitTest.java deleted file mode 100644 index 50fbea708..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VCFJarClassLoadingUnitTest.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.variantcontext; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.testng.annotations.Test; - -import java.io.File; -import java.io.FileNotFoundException; -import java.net.MalformedURLException; -import java.net.URL; -import java.net.URLClassLoader; - -/** - * Test to ensure that, given only the VCF jar and its expected dependencies, core VCF classes will load. - */ -public class VCFJarClassLoadingUnitTest { - @Test - public void testVCFJarClassLoading() throws ClassNotFoundException, MalformedURLException { - URL[] jarURLs; - - try { - jarURLs = new URL[] { getVCFJarFile().toURI().toURL(), getTribbleJarFile().toURI().toURL() }; - } - catch ( FileNotFoundException e ) { - throw new ReviewedStingException("Could not find the VCF jar and/or its dependencies", e); - } - - ClassLoader classLoader = new URLClassLoader(jarURLs, null); - classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.VariantContext"); - classLoader.loadClass("org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec"); - classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCFCodec"); - classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec"); - classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter"); - classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.VCFWriter"); - classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.BCF2Writer"); - } - - /** - * Locates the tribble jar within the dist directory. - * - * Makes the horrible assumption that tests will always be run from the root of a Sting clone, - * but this is much less problematic than using the classpath to locate tribble, since - * the classpath won't explicitly contain tribble when we're testing the fully-packaged - * GATK jar. - * - * @return The tribble jar file, if found - * @throws FileNotFoundException If we couldn't locate a tribble jar within the dist directory - */ - private File getTribbleJarFile() throws FileNotFoundException { - File distDir = new File("dist"); - if ( ! distDir.isDirectory() ) { - throw new FileNotFoundException("The dist directory does not exist"); - } - - for ( File distDirEntry : distDir.listFiles() ) { - if ( distDirEntry.getName().startsWith("tribble") && distDirEntry.getName().endsWith(".jar") ) { - return distDirEntry; - } - } - - throw new FileNotFoundException("Could not find a tribble jar file in the dist directory."); - } - - /** - * Locates the vcf jar within the dist directory. - * - * Makes the horrible assumption that tests will always be run from the root of a Sting clone, - * but this is much less problematic than using the classpath to locate vcf.jar, since - * the classpath won't explicitly contain vcf.jar when we're testing the fully-packaged - * GATK jar. - * - * @return The vcf jar file, if found - * @throws FileNotFoundException If we couldn't locate a vcf jar within the dist directory - */ - private File getVCFJarFile() throws FileNotFoundException { - File vcfJar = new File("dist/vcf.jar"); - - if ( ! vcfJar.exists() ) { - throw new FileNotFoundException("Could not find dist/vcf.jar"); - } - - return vcfJar; - } -} diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java b/public/java/test/org/broadinstitute/variant/bcf2/BCF2EncoderDecoderUnitTest.java similarity index 99% rename from public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java rename to public/java/test/org/broadinstitute/variant/bcf2/BCF2EncoderDecoderUnitTest.java index 77050c069..5b8062b67 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java +++ b/public/java/test/org/broadinstitute/variant/bcf2/BCF2EncoderDecoderUnitTest.java @@ -23,7 +23,7 @@ */ // our package -package org.broadinstitute.sting.utils.codecs.bcf2; +package org.broadinstitute.variant.bcf2; // the imports for unit testing. @@ -31,7 +31,7 @@ package org.broadinstitute.sting.utils.codecs.bcf2; import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.variantcontext.writer.BCF2Encoder; +import org.broadinstitute.variant.variantcontext.writer.BCF2Encoder; import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.DataProvider; diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2UtilsUnitTest.java b/public/java/test/org/broadinstitute/variant/bcf2/BCF2UtilsUnitTest.java similarity index 97% rename from public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2UtilsUnitTest.java rename to public/java/test/org/broadinstitute/variant/bcf2/BCF2UtilsUnitTest.java index ae76a374a..8a182367d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2UtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/variant/bcf2/BCF2UtilsUnitTest.java @@ -22,17 +22,14 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.codecs.bcf2; +package org.broadinstitute.variant.bcf2; -import org.broad.tribble.readers.PositionalBufferedStream; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.variant.vcf.*; -import java.io.*; import java.util.*; import org.testng.Assert; -import org.testng.annotations.BeforeSuite; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; diff --git a/public/java/test/org/broadinstitute/sting/utils/BaseUtilsUnitTest.java b/public/java/test/org/broadinstitute/variant/utils/BaseUtilsUnitTest.java similarity index 97% rename from public/java/test/org/broadinstitute/sting/utils/BaseUtilsUnitTest.java rename to public/java/test/org/broadinstitute/variant/utils/BaseUtilsUnitTest.java index 2f5aea951..8112f510d 100755 --- a/public/java/test/org/broadinstitute/sting/utils/BaseUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/variant/utils/BaseUtilsUnitTest.java @@ -1,5 +1,6 @@ -package org.broadinstitute.sting.utils; +package org.broadinstitute.variant.utils; +import org.broadinstitute.sting.utils.MathUtils; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.testng.annotations.Test; diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java b/public/java/test/org/broadinstitute/variant/variantcontext/AlleleUnitTest.java similarity index 98% rename from public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java rename to public/java/test/org/broadinstitute/variant/variantcontext/AlleleUnitTest.java index 65398c373..3ce335a00 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/AlleleUnitTest.java +++ b/public/java/test/org/broadinstitute/variant/variantcontext/AlleleUnitTest.java @@ -24,14 +24,13 @@ */ // our package -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; // the imports for unit testing. import org.testng.Assert; import org.testng.annotations.BeforeSuite; -import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; // public Allele(byte[] bases, boolean isRef) { diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoodsUnitTest.java b/public/java/test/org/broadinstitute/variant/variantcontext/GenotypeLikelihoodsUnitTest.java similarity index 97% rename from public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoodsUnitTest.java rename to public/java/test/org/broadinstitute/variant/variantcontext/GenotypeLikelihoodsUnitTest.java index 4ce32cee7..51e600fa5 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoodsUnitTest.java +++ b/public/java/test/org/broadinstitute/variant/variantcontext/GenotypeLikelihoodsUnitTest.java @@ -23,13 +23,14 @@ */ // our package -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; // the imports for unit testing. -import org.broadinstitute.sting.utils.BaseUtils; +import org.broad.tribble.TribbleException; +import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.Assert; @@ -70,7 +71,7 @@ public class GenotypeLikelihoodsUnitTest { Assert.assertEquals(gl.getAsString(), vPLString); } - @Test (expectedExceptions = UserException.MalformedVCF.class) + @Test (expectedExceptions = TribbleException.class) public void testErrorBadFormat() { GenotypeLikelihoods gl = GenotypeLikelihoods.fromPLField("adf,b,c"); gl.getAsVector(); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeUnitTest.java b/public/java/test/org/broadinstitute/variant/variantcontext/GenotypeUnitTest.java similarity index 97% rename from public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeUnitTest.java rename to public/java/test/org/broadinstitute/variant/variantcontext/GenotypeUnitTest.java index e511a7f30..7be215b2d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeUnitTest.java +++ b/public/java/test/org/broadinstitute/variant/variantcontext/GenotypeUnitTest.java @@ -23,7 +23,7 @@ */ // our package -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; // the imports for unit testing. @@ -34,9 +34,6 @@ import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.Test; -import java.util.Arrays; -import java.util.List; - public class GenotypeUnitTest extends BaseTest { Allele A, Aref, T; diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypesContextUnitTest.java b/public/java/test/org/broadinstitute/variant/variantcontext/GenotypesContextUnitTest.java similarity index 99% rename from public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypesContextUnitTest.java rename to public/java/test/org/broadinstitute/variant/variantcontext/GenotypesContextUnitTest.java index 6ccfd87ce..04af64bd3 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypesContextUnitTest.java +++ b/public/java/test/org/broadinstitute/variant/variantcontext/GenotypesContextUnitTest.java @@ -23,7 +23,7 @@ */ // our package -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; // the imports for unit testing. @@ -33,7 +33,6 @@ import org.broad.tribble.util.ParsingUtils; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.Utils; import org.testng.Assert; -import org.testng.annotations.BeforeMethod; import org.testng.annotations.BeforeSuite; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java b/public/java/test/org/broadinstitute/variant/variantcontext/VariantContextBenchmark.java similarity index 76% rename from public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java rename to public/java/test/org/broadinstitute/variant/variantcontext/VariantContextBenchmark.java index 0e5522e3a..bbf6976e7 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextBenchmark.java +++ b/public/java/test/org/broadinstitute/variant/variantcontext/VariantContextBenchmark.java @@ -22,21 +22,16 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; import com.google.caliper.Param; import com.google.caliper.SimpleBenchmark; import com.google.caliper.runner.CaliperMain; -import net.sf.picard.reference.ReferenceSequenceFile; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; -import org.broad.tribble.readers.AsciiLineReader; -import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; -import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.variant.vcf.VCFCodec; -import java.io.*; import java.util.*; /** @@ -235,7 +230,7 @@ public class VariantContextBenchmark extends SimpleBenchmark { toMerge.add(new VariantContextBuilder(vc).genotypes(gc).make()); } - VariantContextUtils.simpleMerge(b37GenomeLocParser, toMerge, null, + VariantContextUtils.simpleMerge(toMerge, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, true, false, "set", false, true); @@ -258,54 +253,54 @@ public class VariantContextBenchmark extends SimpleBenchmark { // public void timeV13(int rep) { // for ( int i = 0; i < rep; i++ ) { -// FunctionToBenchmark func = getV13FunctionToBenchmark(); -// FeatureCodec codec = new org.broadinstitute.sting.utils.variantcontext.v13.VCFCodec(); +// FunctionToBenchmark func = getV13FunctionToBenchmark(); +// FeatureCodec codec = new org.broadinstitute.variant.variantcontext.v13.VCFCodec(); // runBenchmark(codec, func); // } // } // -// public FunctionToBenchmark getV13FunctionToBenchmark() { +// public FunctionToBenchmark getV13FunctionToBenchmark() { // switch ( operation ) { // case READ: -// return new FunctionToBenchmark() { -// public void run(final org.broadinstitute.sting.utils.variantcontext.v13.VariantContext vc) { +// return new FunctionToBenchmark() { +// public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // ; // empty operation // } // }; // case SUBSET_TO_SAMPLES: -// return new FunctionToBenchmark() { +// return new FunctionToBenchmark() { // List samples; -// public void run(final org.broadinstitute.sting.utils.variantcontext.v13.VariantContext vc) { +// public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // if ( samples == null ) // samples = new ArrayList(vc.getSampleNames()).subList(0, nSamplesToTake); -// org.broadinstitute.sting.utils.variantcontext.v13.VariantContext sub = vc.subContextFromGenotypes(vc.getGenotypes(samples).values()); +// org.broadinstitute.variant.variantcontext.v13.VariantContext sub = vc.subContextFromGenotypes(vc.getGenotypes(samples).values()); // sub.getNSamples(); // } // }; // // case GET_TYPE: -// return new FunctionToBenchmark() { -// public void run(final org.broadinstitute.sting.utils.variantcontext.v13.VariantContext vc) { +// return new FunctionToBenchmark() { +// public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // vc.getType(); // } // }; // case GET_ID: -// return new FunctionToBenchmark() { -// public void run(final org.broadinstitute.sting.utils.variantcontext.v13.VariantContext vc) { +// return new FunctionToBenchmark() { +// public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // vc.getID(); // } // }; // case GET_GENOTYPES: -// return new FunctionToBenchmark() { -// public void run(final org.broadinstitute.sting.utils.variantcontext.v13.VariantContext vc) { +// return new FunctionToBenchmark() { +// public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // vc.getGenotypes().size(); // } // }; // // case GET_GENOTYPES_FOR_SAMPLES: -// return new FunctionToBenchmark() { +// return new FunctionToBenchmark() { // Set samples; -// public void run(final org.broadinstitute.sting.utils.variantcontext.v13.VariantContext vc) { +// public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // if ( samples == null ) // samples = new HashSet(new ArrayList(vc.getSampleNames()).subList(0, nSamplesToTake)); // vc.getGenotypes(samples).size(); @@ -313,60 +308,60 @@ public class VariantContextBenchmark extends SimpleBenchmark { // }; // // case GET_ATTRIBUTE_STRING: -// return new FunctionToBenchmark() { -// public void run(final org.broadinstitute.sting.utils.variantcontext.v13.VariantContext vc) { +// return new FunctionToBenchmark() { +// public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // vc.getExtendedAttribute("AN", null); // } // }; // // case GET_ATTRIBUTE_INT: -// return new FunctionToBenchmark() { -// public void run(final org.broadinstitute.sting.utils.variantcontext.v13.VariantContext vc) { +// return new FunctionToBenchmark() { +// public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // vc.getAttributeAsInt("AC", 0); // } // }; // // case GET_N_SAMPLES: -// return new FunctionToBenchmark() { -// public void run(final org.broadinstitute.sting.utils.variantcontext.v13.VariantContext vc) { +// return new FunctionToBenchmark() { +// public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // vc.getNSamples(); // } // }; // // case GET_GENOTYPES_IN_ORDER_OF_NAME: -// return new FunctionToBenchmark() { -// public void run(final org.broadinstitute.sting.utils.variantcontext.v13.VariantContext vc) { +// return new FunctionToBenchmark() { +// public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // ; // TODO - TEST IS BROKEN // //vc.getGenotypesOrderedByName(); // } // }; // // case CALC_GENOTYPE_COUNTS: -// return new FunctionToBenchmark() { -// public void run(final org.broadinstitute.sting.utils.variantcontext.v13.VariantContext vc) { +// return new FunctionToBenchmark() { +// public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // vc.getHetCount(); // } // }; // // case MERGE: -// return new FunctionToBenchmark() { -// public void run(final org.broadinstitute.sting.utils.variantcontext.v13.VariantContext vc) { -// List toMerge = new ArrayList(); +// return new FunctionToBenchmark() { +// public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { +// List toMerge = new ArrayList(); // // for ( int i = 0; i < dupsToMerge; i++ ) { -// Map gc = new HashMap(); -// for ( final org.broadinstitute.sting.utils.variantcontext.v13.Genotype g : vc.getGenotypes().values() ) { +// Map gc = new HashMap(); +// for ( final org.broadinstitute.variant.variantcontext.v13.Genotype g : vc.getGenotypes().values() ) { // String name = g.getSampleName()+"_"+i; -// gc.put(name, new org.broadinstitute.sting.utils.variantcontext.v13.Genotype(name, +// gc.put(name, new org.broadinstitute.variant.variantcontext.v13.Genotype(name, // g.getAlleles(), g.getLog10PError(), g.getFilters(), g.getAttributes(), g.isPhased(), g.getLikelihoods().getAsVector())); -// toMerge.add(org.broadinstitute.sting.utils.variantcontext.v13.VariantContext.modifyGenotypes(vc, gc)); +// toMerge.add(org.broadinstitute.variant.variantcontext.v13.VariantContext.modifyGenotypes(vc, gc)); // } // } // -// org.broadinstitute.sting.utils.variantcontext.v13.VariantContextUtils.simpleMerge(b37GenomeLocParser, +// org.broadinstitute.variant.variantcontext.v13.VariantContextUtils.simpleMerge(b37GenomeLocParser, // toMerge, null, -// org.broadinstitute.sting.utils.variantcontext.v13.VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, -// org.broadinstitute.sting.utils.variantcontext.v13.VariantContextUtils.GenotypeMergeType.UNSORTED, +// org.broadinstitute.variant.variantcontext.v13.VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, +// org.broadinstitute.variant.variantcontext.v13.VariantContextUtils.GenotypeMergeType.UNSORTED, // true, false, "set", false, true); // } // }; diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/variant/variantcontext/VariantContextTestProvider.java similarity index 99% rename from public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java rename to public/java/test/org/broadinstitute/variant/variantcontext/VariantContextTestProvider.java index c57b2a44d..a88a76947 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/variant/variantcontext/VariantContextTestProvider.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; import org.apache.log4j.Logger; import org.broad.tribble.FeatureCodec; @@ -30,11 +30,11 @@ import org.broad.tribble.FeatureCodecHeader; import org.broad.tribble.readers.PositionalBufferedStream; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec; -import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.variant.bcf2.BCF2Codec; +import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.variantcontext.writer.Options; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.writer.Options; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.testng.Assert; import java.io.File; diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/variant/variantcontext/VariantContextUnitTest.java similarity index 96% rename from public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java rename to public/java/test/org/broadinstitute/variant/variantcontext/VariantContextUnitTest.java index 19620b8df..06c1bc771 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/variant/variantcontext/VariantContextUnitTest.java @@ -1,21 +1,19 @@ // our package -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; // the imports for unit testing. import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.testng.annotations.BeforeSuite; -import org.testng.annotations.BeforeTest; import org.testng.annotations.BeforeMethod; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import org.testng.Assert; -import java.lang.reflect.Array; import java.util.*; @@ -478,30 +476,30 @@ public class VariantContextUnitTest extends BaseTest { Pair,byte[]> result; byte[] refBytes = "TATCATCATCGGA".getBytes(); - Assert.assertEquals(VariantContextUtils.findNumberofRepetitions( "ATG".getBytes(), "ATGATGATGATG".getBytes()),4); - Assert.assertEquals(VariantContextUtils.findNumberofRepetitions( "G".getBytes(), "ATGATGATGATG".getBytes()),0); - Assert.assertEquals(VariantContextUtils.findNumberofRepetitions( "T".getBytes(), "T".getBytes()),1); - Assert.assertEquals(VariantContextUtils.findNumberofRepetitions( "AT".getBytes(), "ATGATGATCATG".getBytes()),1); - Assert.assertEquals(VariantContextUtils.findNumberofRepetitions( "CCC".getBytes(), "CCCCCCCC".getBytes()),2); + Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("ATG".getBytes(), "ATGATGATGATG".getBytes()),4); + Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("G".getBytes(), "ATGATGATGATG".getBytes()),0); + Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("T".getBytes(), "T".getBytes()),1); + Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("AT".getBytes(), "ATGATGATCATG".getBytes()),1); + Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("CCC".getBytes(), "CCCCCCCC".getBytes()),2); - Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("ATG".getBytes()),3); - Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AAA".getBytes()),1); - Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("CACACAC".getBytes()),7); - Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("CACACA".getBytes()),2); - Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("CATGCATG".getBytes()),4); - Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7); + Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("ATG".getBytes()),3); + Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("AAA".getBytes()),1); + Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("CACACAC".getBytes()),7); + Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("CACACA".getBytes()),2); + Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("CATGCATG".getBytes()),4); + Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7); // A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4 VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make(); - result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); + result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes); Assert.assertEquals(result.getFirst().toArray()[0],3); Assert.assertEquals(result.getFirst().toArray()[1],4); Assert.assertEquals(result.getSecond().length,3); // ATC*,A,ATCATC vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+3, Arrays.asList(Allele.create("AATC", true),nullA,atcatc)).make(); - result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); + result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes); Assert.assertEquals(result.getFirst().toArray()[0],3); Assert.assertEquals(result.getFirst().toArray()[1],2); Assert.assertEquals(result.getFirst().toArray()[2],4); @@ -510,7 +508,7 @@ public class VariantContextUnitTest extends BaseTest { // simple non-tandem deletion: CCCC*, - refBytes = "TCCCCCCCCATG".getBytes(); vc = new VariantContextBuilder("foo", delLoc, 10, 14, Arrays.asList(ccccR,nullA)).make(); - result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); + result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes); Assert.assertEquals(result.getFirst().toArray()[0],8); Assert.assertEquals(result.getFirst().toArray()[1],4); Assert.assertEquals(result.getSecond().length,1); @@ -518,7 +516,7 @@ public class VariantContextUnitTest extends BaseTest { // CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9 refBytes = "TCCCCCCCAGAGAGAG".getBytes(); vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(ccccR,cc, nullA,cccccc)).make(); - result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); + result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes); Assert.assertEquals(result.getFirst().toArray()[0],7); Assert.assertEquals(result.getFirst().toArray()[1],5); Assert.assertEquals(result.getFirst().toArray()[2],3); @@ -528,7 +526,7 @@ public class VariantContextUnitTest extends BaseTest { // GAGA*,-,GAGAGAGA refBytes = "TGAGAGAGAGATTT".getBytes(); vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(gagaR, nullA,gagagaga)).make(); - result = VariantContextUtils.getNumTandemRepeatUnits(vc,refBytes); + result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes); Assert.assertEquals(result.getFirst().toArray()[0],5); Assert.assertEquals(result.getFirst().toArray()[1],3); Assert.assertEquals(result.getFirst().toArray()[2],7); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/variant/variantcontext/VariantContextUtilsUnitTest.java similarity index 98% rename from public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java rename to public/java/test/org/broadinstitute/variant/variantcontext/VariantContextUtilsUnitTest.java index 3ad438b26..f68d24670 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/variant/variantcontext/VariantContextUtilsUnitTest.java @@ -21,7 +21,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.BaseTest; @@ -29,6 +29,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.DataProvider; @@ -189,7 +190,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { final List priority = vcs2priority(inputs); - final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + final VariantContext merged = VariantContextUtils.simpleMerge( inputs, priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false); @@ -247,7 +248,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { inputs.add(new VariantContextBuilder(snpVC1).id(id).make()); } - final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + final VariantContext merged = VariantContextUtils.simpleMerge( inputs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false); @@ -364,7 +365,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { @Test(dataProvider = "mergeFiltered") public void testMergeFiltered(MergeFilteredTest cfg) { final List priority = vcs2priority(cfg.inputs); - final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + final VariantContext merged = VariantContextUtils.simpleMerge( cfg.inputs, priority, cfg.type, VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); // test alleles are equal @@ -490,7 +491,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { @Test(dataProvider = "mergeGenotypes") public void testMergeGenotypes(MergeGenotypesTest cfg) { - final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + final VariantContext merged = VariantContextUtils.simpleMerge( cfg.inputs, cfg.priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); @@ -531,7 +532,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, -1)); final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, -2)); - final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + final VariantContext merged = VariantContextUtils.simpleMerge( Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false); @@ -539,12 +540,12 @@ public class VariantContextUtilsUnitTest extends BaseTest { Assert.assertEquals(merged.getSampleNames(), new HashSet(Arrays.asList("s1.1", "s1.2"))); } - @Test(expectedExceptions = UserException.class) + @Test(expectedExceptions = IllegalStateException.class) public void testMergeGenotypesRequireUnique() { final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, -1)); final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, -2)); - final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + final VariantContext merged = VariantContextUtils.simpleMerge( Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE, false, false, "set", false, false); } @@ -563,7 +564,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS); VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS); - final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser, + final VariantContext merged = VariantContextUtils.simpleMerge( Arrays.asList(vc1, vc2), priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false); @@ -654,7 +655,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { public void testRepeatDetectorTest(RepeatDetectorTest cfg) { // test alleles are equal - Assert.assertEquals(VariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat); + Assert.assertEquals(GATKVariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat); } // -------------------------------------------------------------------------------- @@ -700,7 +701,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { @Test(dataProvider = "ReverseClippingPositionTestProvider") public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) { - int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false); + int result = GATKVariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false); Assert.assertEquals(result, cfg.expectedClip); } @@ -778,7 +779,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { @Test(dataProvider = "SplitBiallelics") public void testSplitBiallelicsNoGenotypes(final VariantContext vc, final List expectedBiallelics) { - final List biallelics = VariantContextUtils.splitVariantContextToBiallelics(vc); + final List biallelics = GATKVariantContextUtils.splitVariantContextToBiallelics(vc); Assert.assertEquals(biallelics.size(), expectedBiallelics.size()); for ( int i = 0; i < biallelics.size(); i++ ) { final VariantContext actual = biallelics.get(i); @@ -799,7 +800,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { final VariantContext vcWithGenotypes = new VariantContextBuilder(vc).genotypes(genotypes).make(); - final List biallelics = VariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes); + final List biallelics = GATKVariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes); for ( int i = 0; i < biallelics.size(); i++ ) { final VariantContext actual = biallelics.get(i); Assert.assertEquals(actual.getNSamples(), vcWithGenotypes.getNSamples()); // not dropping any samples diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java b/public/java/test/org/broadinstitute/variant/variantcontext/VariantJEXLContextUnitTest.java similarity index 97% rename from public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java rename to public/java/test/org/broadinstitute/variant/variantcontext/VariantJEXLContextUnitTest.java index 8f03f1d38..c29b87aad 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContextUnitTest.java +++ b/public/java/test/org/broadinstitute/variant/variantcontext/VariantJEXLContextUnitTest.java @@ -21,10 +21,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext; +package org.broadinstitute.variant.variantcontext; import net.sf.samtools.SAMFileHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java b/public/java/test/org/broadinstitute/variant/variantcontext/writer/VCFWriterUnitTest.java similarity index 95% rename from public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java rename to public/java/test/org/broadinstitute/variant/variantcontext/writer/VCFWriterUnitTest.java index 5876efa12..048dee245 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java +++ b/public/java/test/org/broadinstitute/variant/variantcontext/writer/VCFWriterUnitTest.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.variantcontext.writer; +package org.broadinstitute.variant.variantcontext.writer; import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broad.tribble.AbstractFeatureReader; @@ -31,14 +31,14 @@ import org.broad.tribble.Tribble; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderVersion; +import org.broadinstitute.variant.vcf.VCFCodec; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.broadinstitute.variant.vcf.VCFHeaderLine; +import org.broadinstitute.variant.vcf.VCFHeaderVersion; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.variant.variantcontext.*; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWritersUnitTest.java b/public/java/test/org/broadinstitute/variant/variantcontext/writer/VariantContextWritersUnitTest.java similarity index 93% rename from public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWritersUnitTest.java rename to public/java/test/org/broadinstitute/variant/variantcontext/writer/VariantContextWritersUnitTest.java index adf3eb235..03087051a 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VariantContextWritersUnitTest.java +++ b/public/java/test/org/broadinstitute/variant/variantcontext/writer/VariantContextWritersUnitTest.java @@ -23,7 +23,7 @@ */ // our package -package org.broadinstitute.sting.utils.variantcontext.writer; +package org.broadinstitute.variant.variantcontext.writer; // the imports for unit testing. @@ -33,12 +33,12 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMSequenceDictionary; import org.broad.tribble.FeatureCodec; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.variant.bcf2.BCF2Codec; +import org.broadinstitute.variant.vcf.VCFCodec; +import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextTestProvider; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextTestProvider; import org.testng.annotations.BeforeSuite; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java b/public/java/test/org/broadinstitute/variant/vcf/IndexFactoryUnitTest.java similarity index 88% rename from public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java rename to public/java/test/org/broadinstitute/variant/vcf/IndexFactoryUnitTest.java index 41ea587be..b51927e0a 100755 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java +++ b/public/java/test/org/broadinstitute/variant/vcf/IndexFactoryUnitTest.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; import net.sf.samtools.SAMSequenceDictionary; import org.broad.tribble.AbstractFeatureReader; @@ -9,10 +9,10 @@ import org.broad.tribble.index.IndexFactory; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.writer.Options; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; -import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.writer.Options; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderUnitTest.java b/public/java/test/org/broadinstitute/variant/vcf/VCFHeaderUnitTest.java similarity index 99% rename from public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderUnitTest.java rename to public/java/test/org/broadinstitute/variant/vcf/VCFHeaderUnitTest.java index 62d584ef6..2ff6ebbbf 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderUnitTest.java +++ b/public/java/test/org/broadinstitute/variant/vcf/VCFHeaderUnitTest.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.PositionalBufferedStream; diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/variant/vcf/VCFIntegrationTest.java similarity index 97% rename from public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java rename to public/java/test/org/broadinstitute/variant/vcf/VCFIntegrationTest.java index b9ce58992..1f8e5d563 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/variant/vcf/VCFIntegrationTest.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -97,7 +97,7 @@ public class VCFIntegrationTest extends WalkerTest { @Test public void testFailingOnVCFWithoutHeaders() { - runVCFWithoutHeaders("", "", UserException.class, false); + runVCFWithoutHeaders("", "", IllegalStateException.class, false); } @Test diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFStandardHeaderLinesUnitTest.java b/public/java/test/org/broadinstitute/variant/vcf/VCFStandardHeaderLinesUnitTest.java similarity index 99% rename from public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFStandardHeaderLinesUnitTest.java rename to public/java/test/org/broadinstitute/variant/vcf/VCFStandardHeaderLinesUnitTest.java index b6e6c4f42..4e22603c0 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFStandardHeaderLinesUnitTest.java +++ b/public/java/test/org/broadinstitute/variant/vcf/VCFStandardHeaderLinesUnitTest.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.codecs.vcf; +package org.broadinstitute.variant.vcf; import org.broadinstitute.sting.BaseTest; import org.testng.annotations.DataProvider; diff --git a/public/packages/GATKEngine.xml b/public/packages/GATKEngine.xml index d0b4a52b5..27d2afa47 100644 --- a/public/packages/GATKEngine.xml +++ b/public/packages/GATKEngine.xml @@ -30,6 +30,8 @@ + + diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index dc6cae197..3bd7514f2 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -44,7 +44,7 @@ class GATKResourcesBundle extends QScript { var exampleFASTA: Reference = _ var refs: List[Reference] = _ - class Resource(val file: File, val name: String, val ref: Reference, val useName: Boolean = true, val makeSites: Boolean = true ) { + class Resource(val file: File, val name: String, val ref: Reference, val useName: Boolean = true, val makeSites: Boolean = true, val makeCallsIfBam: Boolean = true ) { def destname(target: Reference): String = { if ( useName ) return name + "." + target.name + "." + getExtension(file) @@ -68,6 +68,7 @@ class GATKResourcesBundle extends QScript { def isVCF(file: File) = file.getName.endsWith(".vcf") def isBAM(file: File) = file.getName.endsWith(".bam") + def isOUT(file: File) = file.getName.endsWith(".out") def isFASTA(file: File) = file.getName.endsWith(".fasta") var RESOURCES: List[Resource] = Nil @@ -94,7 +95,7 @@ class GATKResourcesBundle extends QScript { addResource(new Resource(DATAROOT + "dbsnp_132_b37.vcf", "dbsnp_132", b37, true, false)) addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false)) - addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false)) + addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false, false, false)) } def initializeStandardDataFiles() = { @@ -172,7 +173,7 @@ class GATKResourcesBundle extends QScript { // exampleFASTA file // addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false)) - addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false)) + addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false, false, false)) } def createBundleDirectories(dir: File) = { @@ -184,6 +185,15 @@ class GATKResourcesBundle extends QScript { } } + def createCurrentLink(bundleDir: File) = { + + val currentLink = new File(BUNDLE_ROOT + "/current") + + if ( currentLink.exists ) currentLink.delete() + + add(new linkFile(bundleDir, currentLink)) + } + def script = { if ( TEST ) initializeTestDataFiles(); @@ -201,8 +211,10 @@ class GATKResourcesBundle extends QScript { } else if ( isBAM(resource.file) ) { val f = copyBundleFile(resource, resource.ref) add(new IndexBAM(f)) - @Output val outvcf: File = swapExt(f.getParent, f, ".bam", ".vcf") - add(new UG(resource.file, resource.ref.file, outvcf)) + if ( resource.makeCallsIfBam ) { + @Output val outvcf: File = swapExt(f.getParent, f, ".bam", ".vcf") + add(new UG(resource.file, resource.ref.file, outvcf)) + } } else if ( isVCF(resource.file) ) { for ( destRef <- refs ) { val out = destFile(BUNDLE_DIR, destRef, resource.destname(destRef)) @@ -240,6 +252,9 @@ class GATKResourcesBundle extends QScript { //throw new ReviewedStingException("Unknown file type: " + resource) } } + + createCurrentLink(BUNDLE_DIR) + } else { createBundleDirectories(DOWNLOAD_DIR) createDownloadsFromBundle(BUNDLE_DIR, DOWNLOAD_DIR) @@ -249,7 +264,6 @@ class GATKResourcesBundle extends QScript { def createDownloadsFromBundle(in: File, out: File) { Console.printf("Visiting %s%n", in) - // todo -- ignore some of the other files too (e.g. *.out); will test next time we make a bundle if (! in.getName.startsWith(".")) { if ( in.isDirectory ) { out.mkdirs @@ -261,7 +275,7 @@ class GATKResourcesBundle extends QScript { if ( isBAM(in) ) { add(new cpFile(in, out)) add(new md5sum(out)) - } else { + } else if ( !isOUT(in) ) { add(new GzipFile(in, out + ".gz")) add(new md5sum(out + ".gz")) } @@ -299,6 +313,10 @@ class GATKResourcesBundle extends QScript { def commandLine = "cp %s %s".format(in.getAbsolutePath, out.getAbsolutePath) } + class linkFile(@Input val in: File, @Output val out: File) extends CommandLineFunction { + def commandLine = "ln -s %s %s".format(in.getAbsolutePath, out.getAbsolutePath) + } + class md5sum(@Input val in: File) extends CommandLineFunction { @Output val o: File = new File(in.getAbsolutePath + ".md5") def commandLine = "md5sum %s > %s".format(in.getAbsolutePath, o) diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala index 637174557..f899af86d 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala @@ -110,95 +110,103 @@ class QCommandLine extends CommandLineProgram with Logging { * functions, and then builds and runs a QGraph based on the dependencies. */ def execute = { - ClassFieldCache.parsingEngine = this.parser + var success = false + var result = 1 + try { + ClassFieldCache.parsingEngine = this.parser - if (settings.qSettings.runName == null) - settings.qSettings.runName = FilenameUtils.removeExtension(scripts.head.getName) - if (IOUtils.isDefaultTempDir(settings.qSettings.tempDirectory)) - settings.qSettings.tempDirectory = IOUtils.absolute(settings.qSettings.runDirectory, ".queue/tmp") - qGraph.initializeWithSettings(settings) + if (settings.qSettings.runName == null) + settings.qSettings.runName = FilenameUtils.removeExtension(scripts.head.getName) + if (IOUtils.isDefaultTempDir(settings.qSettings.tempDirectory)) + settings.qSettings.tempDirectory = IOUtils.absolute(settings.qSettings.runDirectory, ".queue/tmp") + qGraph.initializeWithSettings(settings) - for (commandPlugin <- allCommandPlugins) { - loadArgumentsIntoObject(commandPlugin) - } - - for (commandPlugin <- allCommandPlugins) { - if (commandPlugin.statusMessenger != null) - commandPlugin.statusMessenger.started() - } - - qGraph.messengers = allCommandPlugins.filter(_.statusMessenger != null).map(_.statusMessenger).toSeq - - // TODO: Default command plugin argument? - val remoteFileConverter = ( - for (commandPlugin <- allCommandPlugins if (commandPlugin.remoteFileConverter != null)) - yield commandPlugin.remoteFileConverter - ).headOption.getOrElse(null) - - if (remoteFileConverter != null) - loadArgumentsIntoObject(remoteFileConverter) - - val allQScripts = qScriptPluginManager.createAllTypes() - for (script <- allQScripts) { - logger.info("Scripting " + qScriptPluginManager.getName(script.getClass.asSubclass(classOf[QScript]))) - loadArgumentsIntoObject(script) - allCommandPlugins.foreach(_.initScript(script)) - // TODO: Pulling inputs can be time/io expensive! Some scripts are using the files to generate functions-- even for dry runs-- so pull it all down for now. - //if (settings.run) - script.pullInputs() - script.qSettings = settings.qSettings - try { - script.script() - } catch { - case e: Exception => - throw new UserException.CannotExecuteQScript(script.getClass.getSimpleName + ".script() threw the following exception: " + e, e) + for (commandPlugin <- allCommandPlugins) { + loadArgumentsIntoObject(commandPlugin) } - if (remoteFileConverter != null) { - if (remoteFileConverter.convertToRemoteEnabled) - script.mkRemoteOutputs(remoteFileConverter) - } - - script.functions.foreach(qGraph.add(_)) - logger.info("Added " + script.functions.size + " functions") - } - // Execute the job graph - qGraph.run() - - val functionsAndStatus = qGraph.getFunctionsAndStatus - val success = qGraph.success - - // walk over each script, calling onExecutionDone - for (script <- allQScripts) { - val scriptFunctions = functionsAndStatus.filterKeys(f => script.functions.contains(f)) - script.onExecutionDone(scriptFunctions, success) - } - - logger.info("Script %s with %d total jobs".format(if (success) "completed successfully" else "failed", functionsAndStatus.size)) - - // write the final complete job report - logger.info("Writing final jobs report...") - qGraph.writeJobsReport() - - if (!success) { - logger.info("Done with errors") - qGraph.logFailed() - for (commandPlugin <- allCommandPlugins) + for (commandPlugin <- allCommandPlugins) { if (commandPlugin.statusMessenger != null) - commandPlugin.statusMessenger.exit("Done with errors: %s".format(qGraph.formattedStatusCounts)) - 1 - } else { - if (settings.run) { - allQScripts.foreach(_.pushOutputs()) - for (commandPlugin <- allCommandPlugins) - if (commandPlugin.statusMessenger != null) { - val allInputs = allQScripts.map(_.remoteInputs) - val allOutputs = allQScripts.map(_.remoteOutputs) - commandPlugin.statusMessenger.done(allInputs, allOutputs) - } + commandPlugin.statusMessenger.started() + } + + qGraph.messengers = allCommandPlugins.filter(_.statusMessenger != null).map(_.statusMessenger).toSeq + + // TODO: Default command plugin argument? + val remoteFileConverter = ( + for (commandPlugin <- allCommandPlugins if (commandPlugin.remoteFileConverter != null)) + yield commandPlugin.remoteFileConverter + ).headOption.getOrElse(null) + + if (remoteFileConverter != null) + loadArgumentsIntoObject(remoteFileConverter) + + val allQScripts = qScriptPluginManager.createAllTypes() + for (script <- allQScripts) { + logger.info("Scripting " + qScriptPluginManager.getName(script.getClass.asSubclass(classOf[QScript]))) + loadArgumentsIntoObject(script) + allCommandPlugins.foreach(_.initScript(script)) + // TODO: Pulling inputs can be time/io expensive! Some scripts are using the files to generate functions-- even for dry runs-- so pull it all down for now. + //if (settings.run) + script.pullInputs() + script.qSettings = settings.qSettings + try { + script.script() + } catch { + case e: Exception => + throw new UserException.CannotExecuteQScript(script.getClass.getSimpleName + ".script() threw the following exception: " + e, e) + } + + if (remoteFileConverter != null) { + if (remoteFileConverter.convertToRemoteEnabled) + script.mkRemoteOutputs(remoteFileConverter) + } + + script.functions.foreach(qGraph.add(_)) + logger.info("Added " + script.functions.size + " functions") + } + // Execute the job graph + qGraph.run() + + val functionsAndStatus = qGraph.getFunctionsAndStatus + + // walk over each script, calling onExecutionDone + for (script <- allQScripts) { + val scriptFunctions = functionsAndStatus.filterKeys(f => script.functions.contains(f)) + script.onExecutionDone(scriptFunctions, success) + } + + logger.info("Script %s with %d total jobs".format(if (success) "completed successfully" else "failed", functionsAndStatus.size)) + + // write the final complete job report + logger.info("Writing final jobs report...") + qGraph.writeJobsReport() + + if (qGraph.success) { + if (settings.run) { + allQScripts.foreach(_.pushOutputs()) + for (commandPlugin <- allCommandPlugins) + if (commandPlugin.statusMessenger != null) { + val allInputs = allQScripts.map(_.remoteInputs) + val allOutputs = allQScripts.map(_.remoteOutputs) + commandPlugin.statusMessenger.done(allInputs, allOutputs) + } + } + success = true + result = 0 + } + } finally { + if (!success) { + logger.info("Done with errors") + qGraph.logFailed() + if (settings.run) { + for (commandPlugin <- allCommandPlugins) + if (commandPlugin.statusMessenger != null) + commandPlugin.statusMessenger.exit("Done with errors: %s".format(qGraph.formattedStatusCounts)) + } } - 0 } + result } /** diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala index eb8be183a..d709d1fb4 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala @@ -124,7 +124,7 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon } /** - * Pull all remote files to the local disk. + * Pull all remote files to the local disk */ def pullInputs() { val inputs = ClassFieldCache.getFieldFiles(this, inputFields) @@ -135,7 +135,7 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon } /** - * Push all remote files from the local disk. + * Push all remote files from the local disk */ def pushOutputs() { val outputs = ClassFieldCache.getFieldFiles(this, outputFields) @@ -145,28 +145,17 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon } } - /** - * List out the remote outputs - * @return the RemoteFile outputs by argument source - */ - def remoteInputs: Map[String, Seq[RemoteFile]] = tagMap(remoteFieldMap(inputFields)) - - /** - * List out the remote outputs - * @return the RemoteFile outputs by argument source - */ - def remoteOutputs: Map[String, Seq[RemoteFile]] = tagMap(remoteFieldMap(outputFields)) - - private def tagMap(remoteFieldMap: Map[ArgumentSource, Seq[RemoteFile]]): Map[String, Seq[RemoteFile]] = { - remoteFieldMap.collect{ case (k, v) => ClassFieldCache.fullName(k) -> v }.toMap - } - - private def remoteFieldMap(fields: Seq[ArgumentSource]): Map[ArgumentSource, Seq[RemoteFile]] = { - fields.map(field => (field -> filterRemoteFiles(ClassFieldCache.getFieldFiles(this, field)))).filter(tuple => !tuple._2.isEmpty).toMap - } - private def filterRemoteFiles(fields: Seq[File]): Seq[RemoteFile] = fields.filter(field => field != null && field.isInstanceOf[RemoteFile]).map(_.asInstanceOf[RemoteFile]) + /** + * @return the inputs or null if there are no inputs + */ + def remoteInputs: AnyRef = null + + /** + * @return the outputs or null if there are no outputs + */ + def remoteOutputs: AnyRef = null /** The complete list of fields. */ def functionFields: Seq[ArgumentSource] = ClassFieldCache.classFunctionFields(this.getClass) diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala index a1133b944..a69f68b8e 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala @@ -7,7 +7,7 @@ import org.broadinstitute.sting.queue.util.RemoteFile */ trait QStatusMessenger { def started() - def done(inputs: Seq[Map[String, Seq[RemoteFile]]], outputs: Seq[Map[String, Seq[RemoteFile]]]) + def done(inputs: Seq[_], outputs: Seq[_]) def exit(message: String) def started(job: String) diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/VCF_BAM_utilities.scala b/public/scala/src/org/broadinstitute/sting/queue/util/VCF_BAM_utilities.scala index 3fe867981..b627114cf 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/VCF_BAM_utilities.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/VCF_BAM_utilities.scala @@ -4,10 +4,10 @@ import java.io.File import org.apache.commons.io.FilenameUtils import scala.io.Source._ import net.sf.samtools.SAMFileReader -import org.broadinstitute.sting.utils.codecs.vcf.{VCFHeader, VCFCodec} +import org.broadinstitute.variant.vcf.{VCFHeader, VCFCodec} import scala.collection.JavaConversions._ import org.broad.tribble.{FeatureCodec, AbstractFeatureReader} -import org.broadinstitute.sting.utils.variantcontext.VariantContext +import org.broadinstitute.variant.variantcontext.VariantContext object VCF_BAM_utilities {