diff --git a/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R b/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R index aff783b8e..d5ee3626f 100644 --- a/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R +++ b/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R @@ -12,7 +12,7 @@ if ( onCMDLine ) { inputFileName = args[1] outputPDF = args[2] } else { - inputFileName = "Q-8271@gsa2.jobreport.txt" + inputFileName = "Q-26618@gsa4.jobreport.txt" #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt" #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt" outputPDF = NA @@ -129,9 +129,11 @@ plotGroup <- function(groupTable) { # as above, but averaging over all iterations groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration") if ( dim(sub)[1] > 1 ) { - sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) - textplot(as.data.frame(sum), show.rownames=F) - title(paste("Job summary for", name, "averaging over all iterations"), cex=3) + try({ # need a try here because we will fail to reduce when there's just a single iteration + sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) + textplot(as.data.frame(sum), show.rownames=F) + title(paste("Job summary for", name, "averaging over all iterations"), cex=3) + }, silent=T) } } @@ -193,6 +195,7 @@ plotJobsGantt(gatkReportData, F, F) plotProgressByTime(gatkReportData) plotTimeByHost(gatkReportData) for ( group in gatkReportData ) { + print(group) plotGroup(group) } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 2729941bc..2e243b847 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -64,6 +64,9 @@ import java.util.concurrent.*; public class SAMDataSource { final private static GATKSamRecordFactory factory = new GATKSamRecordFactory(); + /** If true, we will load SAMReaders in parallel */ + final private static boolean USE_PARALLEL_LOADING = false; + /** Backing support for reads. */ protected final ReadProperties readProperties; @@ -97,11 +100,6 @@ public class SAMDataSource { */ private final Map readerPositions = new HashMap(); - /** - * Cached representation of the merged header used to generate a merging iterator. - */ - private final SamFileHeaderMerger headerMerger; - /** * The merged header. */ @@ -297,9 +295,8 @@ public class SAMDataSource { initializeReaderPositions(readers); - headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true); - mergedHeader = headerMerger.getMergedHeader(); - hasReadGroupCollisions = headerMerger.hasReadGroupCollisions(); + mergedHeader = readers.getMergedHeader(); + hasReadGroupCollisions = readers.hasReadGroupCollisions(); readProperties = new ReadProperties( samFiles, @@ -324,9 +321,9 @@ public class SAMDataSource { List readGroups = reader.getFileHeader().getReadGroups(); for(SAMReadGroupRecord readGroup: readGroups) { - if(headerMerger.hasReadGroupCollisions()) { - mappingToMerged.put(readGroup.getReadGroupId(),headerMerger.getReadGroupId(reader,readGroup.getReadGroupId())); - mergedToOriginalReadGroupMappings.put(headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()),readGroup.getReadGroupId()); + if(hasReadGroupCollisions) { + mappingToMerged.put(readGroup.getReadGroupId(),readers.getReadGroupId(id,readGroup.getReadGroupId())); + mergedToOriginalReadGroupMappings.put(readers.getReadGroupId(id,readGroup.getReadGroupId()),readGroup.getReadGroupId()); } else { mappingToMerged.put(readGroup.getReadGroupId(),readGroup.getReadGroupId()); mergedToOriginalReadGroupMappings.put(readGroup.getReadGroupId(),readGroup.getReadGroupId()); @@ -559,7 +556,7 @@ public class SAMDataSource { */ private StingSAMIterator getIterator(SAMReaders readers, Shard shard, boolean enableVerification) { // Set up merging to dynamically merge together multiple BAMs. - MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readers.values(),true); + MergingSamRecordIterator mergingIterator = readers.createMergingIterator(); for(SAMReaderID id: getReaderIDs()) { CloseableIterator iterator = null; @@ -704,6 +701,11 @@ public class SAMDataSource { * A collection of readers derived from a reads metadata structure. */ private class SAMReaders implements Iterable { + /** + * Cached representation of the merged header used to generate a merging iterator. + */ + private final SamFileHeaderMerger headerMerger; + /** * Internal storage for a map of id -> reader. */ @@ -720,68 +722,133 @@ public class SAMDataSource { * @param validationStringency validation stringency. */ public SAMReaders(Collection readerIDs, SAMFileReader.ValidationStringency validationStringency) { - final int N_THREADS = 8; - int totalNumberOfFiles = readerIDs.size(); + final int totalNumberOfFiles = readerIDs.size(); int readerNumber = 1; + final SimpleTimer timer = new SimpleTimer().start(); - ExecutorService executor = Executors.newFixedThreadPool(N_THREADS); - final List inits = new ArrayList(totalNumberOfFiles); - Queue> futures = new LinkedList>(); - for (SAMReaderID readerID: readerIDs) { - logger.debug("Enqueuing for initialization: " + readerID.samFile); - final ReaderInitializer init = new ReaderInitializer(readerID); - inits.add(init); - futures.add(executor.submit(init)); - } - - final SimpleTimer timer = new SimpleTimer(); - try { - final int MAX_WAIT = 30 * 1000; - final int MIN_WAIT = 1 * 1000; - - timer.start(); - while ( ! futures.isEmpty() ) { - final int prevSize = futures.size(); - final double waitTime = prevSize * (0.5 / N_THREADS); // about 0.5 seconds to load each file - final int waitTimeInMS = Math.min(MAX_WAIT, Math.max((int) (waitTime * 1000), MIN_WAIT)); - Thread.sleep(waitTimeInMS); - - Queue> pending = new LinkedList>(); - for ( final Future initFuture : futures ) { - if ( initFuture.isDone() ) { - final ReaderInitializer init = initFuture.get(); - if (threadAllocation.getNumIOThreads() > 0) { - inputStreams.put(init.readerID, init.blockInputStream); // get from initializer - } - logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, init.readerID)); - readers.put(init.readerID, init.reader); - } else { - pending.add(initFuture); - } + if ( totalNumberOfFiles > 0 ) logger.info("Initializing SAMRecords " + (USE_PARALLEL_LOADING ? "in parallel" : "in serial")); + if ( ! USE_PARALLEL_LOADING ) { + final int tickSize = 50; + int nExecutedTotal = 0; + long lastTick = timer.currentTime(); + for(final SAMReaderID readerID: readerIDs) { + final ReaderInitializer init = new ReaderInitializer(readerID).call(); + if (threadAllocation.getNumIOThreads() > 0) { + inputStreams.put(init.readerID, init.blockInputStream); // get from initializer } - final int pendingSize = pending.size(); - final int nExecutedInTick = prevSize - pendingSize; - final int nExecutedTotal = totalNumberOfFiles - pendingSize; - final double totalTimeInSeconds = timer.getElapsedTime(); - final double nTasksPerSecond = nExecutedTotal / (1.0*totalTimeInSeconds); - final int nRemaining = pendingSize; - final double estTimeToComplete = pendingSize / nTasksPerSecond; - logger.info(String.format("Init %d BAMs in last %d s, %d of %d in %.2f s / %.2f m (%.2f tasks/s). %d remaining with est. completion in %.2f s / %.2f m", - nExecutedInTick, (int)(waitTimeInMS / 1000.0), - nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond, - nRemaining, estTimeToComplete, estTimeToComplete / 60)); - - futures = pending; + logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, readerID.samFile)); + readers.put(init.readerID,init.reader); + if ( ++nExecutedTotal % tickSize == 0) { + double tickInSec = (timer.currentTime() - lastTick) / 1000.0; + printReaderPerformance(nExecutedTotal, tickSize, totalNumberOfFiles, timer, tickInSec); + lastTick = timer.currentTime(); + } } - } catch ( InterruptedException e ) { - throw new ReviewedStingException("Interrupted SAMReader initialization", e); - } catch ( ExecutionException e ) { - throw new ReviewedStingException("Execution exception during SAMReader initialization", e); + } else { + final int N_THREADS = 8; + + final ExecutorService executor = Executors.newFixedThreadPool(N_THREADS); + final List inits = new ArrayList(totalNumberOfFiles); + Queue> futures = new LinkedList>(); + for (final SAMReaderID readerID: readerIDs) { + logger.debug("Enqueuing for initialization: " + readerID.samFile); + final ReaderInitializer init = new ReaderInitializer(readerID); + inits.add(init); + futures.add(executor.submit(init)); + } + + try { + final int MAX_WAIT = 30 * 1000; + final int MIN_WAIT = 1 * 1000; + + while ( ! futures.isEmpty() ) { + final int prevSize = futures.size(); + final double waitTime = prevSize * (0.5 / N_THREADS); // about 0.5 seconds to load each file + final int waitTimeInMS = Math.min(MAX_WAIT, Math.max((int) (waitTime * 1000), MIN_WAIT)); + Thread.sleep(waitTimeInMS); + + Queue> pending = new LinkedList>(); + for ( final Future initFuture : futures ) { + if ( initFuture.isDone() ) { + final ReaderInitializer init = initFuture.get(); + if (threadAllocation.getNumIOThreads() > 0) { + inputStreams.put(init.readerID, init.blockInputStream); // get from initializer + } + logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, init.readerID)); + readers.put(init.readerID, init.reader); + } else { + pending.add(initFuture); + } + } + + final int nExecutedTotal = totalNumberOfFiles - pending.size(); + final int nExecutedInTick = prevSize - pending.size(); + printReaderPerformance(nExecutedTotal, nExecutedInTick, totalNumberOfFiles, timer, waitTimeInMS / 1000.0); + futures = pending; + } + } catch ( InterruptedException e ) { + throw new ReviewedStingException("Interrupted SAMReader initialization", e); + } catch ( ExecutionException e ) { + throw new ReviewedStingException("Execution exception during SAMReader initialization", e); + } + + executor.shutdown(); } - logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime())); - executor.shutdown(); + if ( totalNumberOfFiles > 0 ) logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime())); + + Collection headers = new LinkedList(); + for(SAMFileReader reader: readers.values()) + headers.add(reader.getFileHeader()); + headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,headers,true); + } + + final private void printReaderPerformance(final int nExecutedTotal, + final int nExecutedInTick, + final int totalNumberOfFiles, + final SimpleTimer timer, + final double tickDurationInSec) { + final int pendingSize = totalNumberOfFiles - nExecutedTotal; + final double totalTimeInSeconds = timer.getElapsedTime(); + final double nTasksPerSecond = nExecutedTotal / (1.0*totalTimeInSeconds); + final int nRemaining = pendingSize; + final double estTimeToComplete = pendingSize / nTasksPerSecond; + logger.info(String.format("Init %d BAMs in last %.2f s, %d of %d in %.2f s / %.2f m (%.2f tasks/s). %d remaining with est. completion in %.2f s / %.2f m", + nExecutedInTick, tickDurationInSec, + nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond, + nRemaining, estTimeToComplete, estTimeToComplete / 60)); + } + + /** + * Return the header derived from the merging of these BAM files. + * @return the merged header. + */ + public SAMFileHeader getMergedHeader() { + return headerMerger.getMergedHeader(); + } + + /** + * Do multiple read groups collide in this dataset? + * @return True if multiple read groups collide; false otherwis. + */ + public boolean hasReadGroupCollisions() { + return headerMerger.hasReadGroupCollisions(); + } + + /** + * Get the newly mapped read group ID for the given read group. + * @param readerID Reader for which to discern the transformed ID. + * @param originalReadGroupID Original read group. + * @return Remapped read group. + */ + public String getReadGroupId(final SAMReaderID readerID, final String originalReadGroupID) { + SAMFileHeader header = readers.get(readerID).getFileHeader(); + return headerMerger.getReadGroupId(header,originalReadGroupID); + } + + public MergingSamRecordIterator createMergingIterator() { + return new MergingSamRecordIterator(headerMerger,readers.values(),true); } /** @@ -833,25 +900,6 @@ public class SAMDataSource { public boolean isEmpty() { return readers.isEmpty(); } - - /** - * Gets all the actual readers out of this data structure. - * @return A collection of the readers. - */ - public Collection values() { - return readers.values(); - } - - /** - * Gets all the actual readers out of this data structure. - * @return A collection of the readers. - */ - public Collection headers() { - ArrayList headers = new ArrayList(readers.size()); - for (SAMFileReader reader : values()) - headers.add(reader.getFileHeader()); - return headers; - } } class ReaderInitializer implements Callable { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java index d1148cbd5..ff3867120 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java @@ -38,9 +38,9 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.clipreads.ClippingOp; -import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation; -import org.broadinstitute.sting.utils.clipreads.ReadClipper; +import org.broadinstitute.sting.utils.clipping.ClippingOp; +import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; +import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java index 6cc8923e8..ecdde1e4f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java @@ -63,27 +63,26 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen // Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT private double calculateTDT( final VariantContext vc, final Set triosToTest ) { - final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM); - final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM); + final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM) + calculateNChildren(vc, triosToTest, HET, HOM, HET); + final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM) + calculateNChildren(vc, triosToTest, HOM, HOM, HET); final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, REF, HET, HET); final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HOM, HET, HET); - final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET); - final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET); + final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET) + calculateNChildren(vc, triosToTest, REF, HET, REF); + final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET) + calculateNChildren(vc, triosToTest, HET, HET, REF); final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB); final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB); return (numer * numer) / denom; } - private double calculateNChildren( final VariantContext vc, final Set triosToTest, final int childIdx, final int momIdx, final int dadIdx ) { - final double likelihoodVector[] = new double[triosToTest.size() * 2]; + private double calculateNChildren( final VariantContext vc, final Set triosToTest, final int childIdx, final int parent1Idx, final int parent2Idx ) { + final double likelihoodVector[] = new double[triosToTest.size()]; int iii = 0; for( final Sample child : triosToTest ) { final double[] momGL = vc.getGenotype(child.getMaternalID()).getLikelihoods().getAsVector(); final double[] dadGL = vc.getGenotype(child.getPaternalID()).getLikelihoods().getAsVector(); final double[] childGL = vc.getGenotype(child.getID()).getLikelihoods().getAsVector(); - likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx]; - likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx]; + likelihoodVector[iii++] = momGL[parent1Idx] + dadGL[parent2Idx] + childGL[childIdx]; } return MathUtils.sumLog10(likelihoodVector); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 94902e828..69560c7cb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -170,6 +170,9 @@ public class VariantAnnotator extends RodWalker implements Ann @Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio") public double minGenotypeQualityP = 0.0; + @Argument(fullName="requireStrictAlleleMatch", shortName="strict", doc="If provided only comp tracks that exactly match both reference and alternate alleles will be counted as concordant", required=false) + private boolean requireStrictAlleleMatch = false; + private VariantAnnotatorEngine engine; private Collection indelBufferContext; @@ -194,11 +197,6 @@ public class VariantAnnotator extends RodWalker implements Ann System.exit(0); } - @Override - public SampleDB getSampleDB() { - return super.getSampleDB(); - } - /** * Prepare the output file and the list of available features. */ @@ -216,6 +214,7 @@ public class VariantAnnotator extends RodWalker implements Ann else engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, annotationsToExclude, this, getToolkit()); engine.initializeExpressions(expressionsToUse); + engine.setRequireStrictAlleleMatch(requireStrictAlleleMatch); // setup the header fields // note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index d4442dc5d..98d2fe17b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -47,9 +47,11 @@ public class VariantAnnotatorEngine { private List requestedGenotypeAnnotations; private List requestedExpressions = new ArrayList(); - private HashMap, String> dbAnnotations = new HashMap, String>(); - private AnnotatorCompatibleWalker walker; - private GenomeAnalysisEngine toolkit; + private final HashMap, String> dbAnnotations = new HashMap, String>(); + private final AnnotatorCompatibleWalker walker; + private final GenomeAnalysisEngine toolkit; + + private boolean requireStrictAlleleMatch = false; protected static class VAExpression { @@ -163,6 +165,10 @@ public class VariantAnnotatorEngine { return descriptions; } + public void setRequireStrictAlleleMatch( final boolean requireStrictAlleleMatch ) { + this.requireStrictAlleleMatch = requireStrictAlleleMatch; + } + public VariantContext annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { Map infoAnnotations = new LinkedHashMap(vc.getAttributes()); @@ -197,7 +203,7 @@ public class VariantAnnotatorEngine { } else { boolean overlapsComp = false; for ( VariantContext comp : tracker.getValues(dbSet.getKey(), ref.getLocus()) ) { - if ( !comp.isFiltered() ) { + if ( !comp.isFiltered() && ( !requireStrictAlleleMatch || comp.getAlleles().equals(vc.getAlleles()) ) ) { overlapsComp = true; break; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java index 66106f658..ed80dce0d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java @@ -34,9 +34,20 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; */ public class AlleleFrequencyCalculationResult { + // IMPORTANT NOTE: + // These 2 arrays are intended to contain the likelihoods/posterior probabilities for each alternate allele over each possible frequency (from 0 to 2N). + // For any given alternate allele and frequency, the likelihoods are marginalized over values for all other alternate alleles. What this means is that + // the likelihoods at cell index zero (AF=0) in the array is actually that of the site's being polymorphic (because although this alternate allele may + // be at AF=0, it is marginalized over all other alternate alleles which are not necessarily at AF=0). + // In the bi-allelic case (where there are no other alternate alleles over which to marginalize), + // the value at cell index zero will be equal to AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED. final double[][] log10AlleleFrequencyLikelihoods; final double[][] log10AlleleFrequencyPosteriors; + // These 2 variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles) + double log10LikelihoodOfAFzero = 0.0; + double log10PosteriorOfAFzero = 0.0; + AlleleFrequencyCalculationResult(int maxAltAlleles, int numChr) { log10AlleleFrequencyLikelihoods = new double[maxAltAlleles][numChr+1]; log10AlleleFrequencyPosteriors = new double[maxAltAlleles][numChr+1]; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index bc27916dd..aa743f86f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -407,14 +407,19 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { nonRefAlleles++; } - // update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs - for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) { - int AC = set.ACcounts.getCounts()[i]; - result.log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyLikelihoods[i][AC], log10LofK); + // for k=0, we don't want to put that value into the likelihoods/posteriors matrix, but instead want to set the value in the results object + if ( nonRefAlleles == 0 ) { + result.log10LikelihoodOfAFzero = log10LofK; + result.log10PosteriorOfAFzero = log10LofK + log10AlleleFrequencyPriors[0][0]; + } else { + // update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs + for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) { + int AC = set.ACcounts.getCounts()[i]; + result.log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyLikelihoods[i][AC], log10LofK); - // for k=0 we still want to use theta - final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][AC]; - result.log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior); + final double prior = log10AlleleFrequencyPriors[nonRefAlleles-1][AC]; + result.log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior); + } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index dc85e95e4..c58966999 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -75,8 +75,9 @@ public class UnifiedGenotyperEngine { // the model used for calculating p(non-ref) private ThreadLocal afcm = new ThreadLocal(); - // the allele frequency likelihoods (allocated once as an optimization) + // the allele frequency likelihoods and posteriors (allocated once as an optimization) private ThreadLocal alleleFrequencyCalculationResult = new ThreadLocal(); + private ThreadLocal posteriorsArray = new ThreadLocal(); // because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything private final double[][] log10AlleleFrequencyPriorsSNPs; @@ -289,7 +290,9 @@ public class UnifiedGenotyperEngine { if ( afcm.get() == null ) { afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); alleleFrequencyCalculationResult.set(new AlleleFrequencyCalculationResult(UAC.MAX_ALTERNATE_ALLELES, N)); + posteriorsArray.set(new double[N + 2]); } + AlleleFrequencyCalculationResult AFresult = alleleFrequencyCalculationResult.get(); // don't try to genotype too many alternate alleles if ( vc.getAlternateAlleles().size() > UAC.MAX_ALTERNATE_ALLELES ) { @@ -307,24 +310,20 @@ public class UnifiedGenotyperEngine { } // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) - clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods); - clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors); - afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get()); + clearAFarray(AFresult.log10AlleleFrequencyLikelihoods); + clearAFarray(AFresult.log10AlleleFrequencyPosteriors); + afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult); // is the most likely frequency conformation AC=0 for all alternate alleles? boolean bestGuessIsRef = true; - // which alternate allele has the highest MLE AC? - int indexOfHighestAlt = -1; - int alleleCountOfHighestAlt = -1; - // determine which alternate alleles have AF>0 boolean[] altAllelesToUse = new boolean[vc.getAlternateAlleles().size()]; for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) { - int indexOfBestAC = MathUtils.maxElementIndex(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[i]); + int indexOfBestAC = MathUtils.maxElementIndex(AFresult.log10AlleleFrequencyPosteriors[i]); // if the most likely AC is not 0, then this is a good alternate allele to use - if ( indexOfBestAC != 0 ) { + if ( indexOfBestAC != 0 && (AFresult.log10AlleleFrequencyPosteriors[i][0] != AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED || AFresult.log10AlleleFrequencyPosteriors[i][indexOfBestAC] > AFresult.log10PosteriorOfAFzero) ) { altAllelesToUse[i] = true; bestGuessIsRef = false; } @@ -332,35 +331,28 @@ public class UnifiedGenotyperEngine { else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { altAllelesToUse[i] = true; } - - // keep track of the "best" alternate allele to use - if ( indexOfBestAC > alleleCountOfHighestAlt) { - alleleCountOfHighestAlt = indexOfBestAC; - indexOfHighestAlt = i; - } } - // calculate p(f>0) - // TODO -- right now we just calculate it for the alt allele with highest AF, but the likelihoods need to be combined correctly over all AFs - double[] normalizedPosteriors = MathUtils.normalizeFromLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[indexOfHighestAlt]); - double sum = 0.0; - for (int i = 1; i <= N; i++) - sum += normalizedPosteriors[i]; - double PofF = Math.min(sum, 1.0); // deal with precision errors + // calculate p(f>0): + // because the likelihoods are marginalized for each alternate allele, we only need to compare log10PosteriorOfAFzero against any one of them + final double[] normalizedPosteriors = generateNormalizedPosteriors(AFresult, posteriorsArray.get()); + final double PofF = 1.0 - normalizedPosteriors[0]; double phredScaledConfidence; if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]); if ( Double.isInfinite(phredScaledConfidence) ) - phredScaledConfidence = -10.0 * alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0]; + phredScaledConfidence = -10.0 * AFresult.log10PosteriorOfAFzero; } else { phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF); if ( Double.isInfinite(phredScaledConfidence) ) { - sum = 0.0; + double sum = AFresult.log10AlleleFrequencyPosteriors[0][0]; + if ( sum == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) + sum = 0.0; for (int i = 1; i <= N; i++) { - if ( alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) + if ( AFresult.log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) break; - sum += alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i]; + sum += AFresult.log10AlleleFrequencyPosteriors[0][i]; } phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum); } @@ -374,7 +366,7 @@ public class UnifiedGenotyperEngine { } // strip out any alleles that aren't going to be used in the VariantContext - List myAlleles; + final List myAlleles; if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) { myAlleles = new ArrayList(vc.getAlleles().size()); myAlleles.add(vc.getReference()); @@ -388,8 +380,8 @@ public class UnifiedGenotyperEngine { } // start constructing the resulting VC - GenomeLoc loc = genomeLocParser.createGenomeLoc(vc); - VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles); + final GenomeLoc loc = genomeLocParser.createGenomeLoc(vc); + final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles); builder.log10PError(phredScaledConfidence/-10.0); if ( ! passesCallThreshold(phredScaledConfidence) ) builder.filters(filter); @@ -400,14 +392,14 @@ public class UnifiedGenotyperEngine { } // create the genotypes - GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse, myAlleles); + final GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse, myAlleles); // print out stats if we have a writer if ( verboseWriter != null && !limitedContext ) printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors, model); // *** note that calculating strand bias involves overwriting data structures, so we do that last - HashMap attributes = new HashMap(); + final HashMap attributes = new HashMap(); // if the site was downsampled, record that fact if ( !limitedContext && rawContext.hasPileupBeenDownsampled() ) @@ -418,31 +410,31 @@ public class UnifiedGenotyperEngine { // the overall lod VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model); - clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods); - clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors); - afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get()); - //double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; - double overallLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1); + clearAFarray(AFresult.log10AlleleFrequencyLikelihoods); + clearAFarray(AFresult.log10AlleleFrequencyPosteriors); + afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult); + //double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0]; + double overallLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0); //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); // the forward lod VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model); - clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods); - clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors); - afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get()); - //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); - double forwardLog10PofNull = alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0]; - double forwardLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1); + clearAFarray(AFresult.log10AlleleFrequencyLikelihoods); + clearAFarray(AFresult.log10AlleleFrequencyPosteriors); + afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult); + //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); + double forwardLog10PofNull = AFresult.log10PosteriorOfAFzero; + double forwardLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0); //if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF); // the reverse lod VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model); - clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods); - clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors); - afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get()); - //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); - double reverseLog10PofNull = alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0]; - double reverseLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1); + clearAFarray(AFresult.log10AlleleFrequencyLikelihoods); + clearAFarray(AFresult.log10AlleleFrequencyPosteriors); + afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult); + //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); + double reverseLog10PofNull = AFresult.log10PosteriorOfAFzero; + double reverseLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0); //if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF); double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF; @@ -455,7 +447,8 @@ public class UnifiedGenotyperEngine { strandScore *= 10.0; //logger.debug(String.format("SLOD=%f", strandScore)); - attributes.put("SB", strandScore); + if ( !Double.isNaN(strandScore) ) + attributes.put("SB", strandScore); } // finish constructing the resulting VC @@ -478,6 +471,12 @@ public class UnifiedGenotyperEngine { return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); } + private double[] generateNormalizedPosteriors(AlleleFrequencyCalculationResult AFresult, double[] normalizedPosteriors) { + normalizedPosteriors[0] = AFresult.log10PosteriorOfAFzero; + System.arraycopy(AFresult.log10AlleleFrequencyPosteriors[0], 0, normalizedPosteriors, 1, normalizedPosteriors.length-1); + return MathUtils.normalizeFromLog10(normalizedPosteriors); + } + private Map getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) { Map stratifiedContexts = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 339b8e0e6..7cc5b1625 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -175,7 +175,7 @@ public class VariantRecalibrator extends RodWalker implements TreeR @Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't update the AC, AF, or AN values in the INFO field after selecting", required=false) private boolean KEEP_ORIGINAL_CHR_COUNTS = false; - @Hidden - @Argument(fullName="family_structure_file", shortName="familyFile", doc="use -family unless you know what you're doing", required=false) - private File FAMILY_STRUCTURE_FILE = null; - - /** - * String formatted as dad+mom=child where these parameters determine which sample names are examined. - */ - @Argument(fullName="family_structure", shortName="family", doc="string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) - private String FAMILY_STRUCTURE = ""; - /** * This activates the mendelian violation module that will select all variants that correspond to a mendelian violation following the rules given by the family structure. */ @@ -286,13 +278,21 @@ public class SelectVariants extends RodWalker implements TreeR private double fractionGenotypes = 0; /** - * This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria. + * This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria. * When specified one or more times, a particular type of variant is selected. * - */ + */ @Argument(fullName="selectTypeToInclude", shortName="selectType", doc="Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times", required=false) private List TYPES_TO_INCLUDE = new ArrayList(); + /** + * If provided, we will only include variants whose ID field is present in this list of ids. The matching + * is exact string matching. The file format is just one ID per line + * + */ + @Argument(fullName="keepIDs", shortName="IDs", doc="Only emit sites whose ID is found in this file (one ID per line)", required=false) + private File rsIDFile = null; + @Hidden @Argument(fullName="outMVFile", shortName="outMVFile", doc="", required=false) @@ -313,9 +313,9 @@ public class SelectVariants extends RodWalker implements TreeR } public enum NumberAlleleRestriction { - ALL, - BIALLELIC, - MULTIALLELIC + ALL, + BIALLELIC, + MULTIALLELIC } private ArrayList selectedTypes = new ArrayList(); @@ -339,17 +339,13 @@ public class SelectVariants extends RodWalker implements TreeR private int positionToAdd = 0; private RandomVariantStructure [] variantArray; - - /* Variables used for random selection with AF boosting */ - private ArrayList afBreakpoints = null; - private ArrayList afBoosts = null; - double bkDelta = 0.0; - private PrintStream outMVFileStream = null; - //Random number generator for the genotypes to remove + //Random number generator for the genotypes to remove private Random randomGenotypes = new Random(); + private Set IDsToKeep = null; + /** * Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher */ @@ -437,7 +433,18 @@ public class SelectVariants extends RodWalker implements TreeR if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + 100.0*fractionRandom + "% of the variants at random from the variant track"); - + /** load in the IDs file to a hashset for matching */ + if ( rsIDFile != null ) { + IDsToKeep = new HashSet(); + try { + for ( final String line : new XReadLines(rsIDFile).readLines() ) { + IDsToKeep.add(line.trim()); + } + logger.info("Selecting only variants with one of " + IDsToKeep.size() + " IDs from " + rsIDFile); + } catch ( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(rsIDFile, e); + } + } } /** @@ -460,20 +467,23 @@ public class SelectVariants extends RodWalker implements TreeR } for (VariantContext vc : vcs) { + if ( IDsToKeep != null && ! IDsToKeep.contains(vc.getID()) ) + continue; + if (MENDELIAN_VIOLATIONS && mv.countViolations(this.getSampleDB().getFamilies(samples),vc) < 1) - break; + break; if (outMVFile != null){ for( String familyId : mv.getViolationFamilies()){ for(Sample sample : this.getSampleDB().getFamily(familyId)){ if(sample.getParents().size() > 0){ - outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " + - "childG=%s childGL=%s\n",vc.getChr(), vc.getStart(), - vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)), - sample.getMaternalID(), sample.getPaternalID(), sample.getID(), - vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(), - vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(), - vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() ); + outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " + + "childG=%s childGL=%s\n",vc.getChr(), vc.getStart(), + vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)), + sample.getMaternalID(), sample.getPaternalID(), sample.getID(), + vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(), + vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(), + vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() ); } } @@ -483,12 +493,12 @@ public class SelectVariants extends RodWalker implements TreeR if (DISCORDANCE_ONLY) { Collection compVCs = tracker.getValues(discordanceTrack, context.getLocation()); if (!isDiscordant(vc, compVCs)) - return 0; + continue; } if (CONCORDANCE_ONLY) { Collection compVCs = tracker.getValues(concordanceTrack, context.getLocation()); if (!isConcordant(vc, compVCs)) - return 0; + continue; } if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic()) @@ -502,21 +512,22 @@ public class SelectVariants extends RodWalker implements TreeR VariantContext sub = subsetRecord(vc, samples); if ( (sub.isPolymorphicInSamples() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) { + boolean failedJexlMatch = false; for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) { if ( !VariantContextUtils.match(sub, jexl) ) { - return 0; + failedJexlMatch = true; + break; } } - if (SELECT_RANDOM_NUMBER) { - randomlyAddVariant(++variantNumber, sub); + if ( !failedJexlMatch ) { + if (SELECT_RANDOM_NUMBER) { + randomlyAddVariant(++variantNumber, sub); + } + else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { + vcfWriter.add(sub); + } } - else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { - vcfWriter.add(sub); - } - - } - } return 1; @@ -647,7 +658,7 @@ public class SelectVariants extends RodWalker implements TreeR // if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate) if ( vc.getAlleles().size() != sub.getAlleles().size() ) - newGC = VariantContextUtils.stripPLs(sub.getGenotypes()); + newGC = VariantContextUtils.stripPLs(sub.getGenotypes()); //Remove a fraction of the genotypes if needed if(fractionGenotypes>0){ @@ -655,10 +666,10 @@ public class SelectVariants extends RodWalker implements TreeR for ( Genotype genotype : newGC ) { //Set genotype to no call if it falls in the fraction. if(fractionGenotypes>0 && randomGenotypes.nextDouble() alleles = new ArrayList(2); - alleles.add(Allele.create((byte)'.')); - alleles.add(Allele.create((byte)'.')); - genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap(),false)); + ArrayList alleles = new ArrayList(2); + alleles.add(Allele.create((byte)'.')); + alleles.add(Allele.create((byte)'.')); + genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap(),false)); } else{ genotypes.add(genotype); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index f1f61d071..4b3aa4864 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -127,12 +127,12 @@ public class VariantsToTable extends RodWalker { * multi-allelic INFO field values can be lists of values. */ @Advanced - @Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false) - public boolean keepMultiAllelic = false; + @Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false) + public boolean keepMultiAllelic = false; @Hidden @Argument(fullName="logACSum", shortName="logACSum", doc="Log sum of AC instead of max value in case of multiallelic variants", required=false) - public boolean logACSum = false; + public boolean logACSum = false; /** * By default, this tool throws a UserException when it encounters a field without a value in some record. This diff --git a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java index d8176ff4e..d753da1c8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java +++ b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java @@ -109,7 +109,7 @@ public class RScriptExecutor { List tempFiles = new ArrayList(); try { - File tempLibDir = IOUtils.tempDir("R.", ".lib"); + File tempLibDir = IOUtils.tempDir("Rlib.", ""); tempFiles.add(tempLibDir); StringBuilder expression = new StringBuilder("tempLibDir = '").append(tempLibDir).append("';"); diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java rename to public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java index 06985041e..f440eda5d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingOp.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.clipreads; +package org.broadinstitute.sting.utils.clipping; import com.google.java.contract.Requires; import net.sf.samtools.Cigar; @@ -460,17 +460,20 @@ public class ClippingOp { int newShift = 0; int oldShift = 0; + boolean readHasStarted = false; // if the new cigar is composed of S and H only, we have to traverse the entire old cigar to calculate the shift for (CigarElement cigarElement : newCigar.getCigarElements()) { if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) newShift += cigarElement.getLength(); - else + else { + readHasStarted = true; break; + } } for (CigarElement cigarElement : oldCigar.getCigarElements()) { if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP ) oldShift += Math.min(cigarElement.getLength(), newShift - oldShift); - else + else if (readHasStarted) break; } return newShift - oldShift; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingRepresentation.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java rename to public/java/src/org/broadinstitute/sting/utils/clipping/ClippingRepresentation.java index d574ba2f0..c9d8730d1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ClippingRepresentation.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.clipreads; +package org.broadinstitute.sting.utils.clipping; /** * How should we represent a clipped bases in a read? diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java similarity index 93% rename from public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java rename to public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java index 4bb309a48..6e3f37980 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.clipreads; +package org.broadinstitute.sting.utils.clipping; import com.google.java.contract.Requires; import net.sf.samtools.CigarElement; @@ -63,12 +63,18 @@ public class ReadClipper { int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL); int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); + if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) + return new GATKSAMRecord(read.getHeader()); + if (start < 0 || stop > read.getReadLength() - 1) throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); if ( start > stop ) throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!"); + if ( start > 0 && stop < read.getReadLength() - 1) + throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString())); + this.addOp(new ClippingOp(start, stop)); GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES); this.ops = null; @@ -76,6 +82,9 @@ public class ReadClipper { } public GATKSAMRecord hardClipByReadCoordinates(int start, int stop) { + if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) + return new GATKSAMRecord(read.getHeader()); + this.addOp(new ClippingOp(start, stop)); return clipRead(ClippingRepresentation.HARDCLIP_BASES); } @@ -195,16 +204,12 @@ public class ReadClipper { for(CigarElement cigarElement : read.getCigar().getCigarElements()) { if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP && - cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.DELETION) + cigarElement.getOperator() != CigarOperator.INSERTION) break; - else if (cigarElement.getOperator() == CigarOperator.INSERTION) { + else if (cigarElement.getOperator() == CigarOperator.INSERTION) this.addOp(new ClippingOp(0, cigarElement.getLength() - 1)); - } - else if (cigarElement.getOperator() == CigarOperator.DELETION) { - throw new ReviewedStingException("No read should start with a deletion. Aligner bug?"); - } } return clipRead(ClippingRepresentation.HARDCLIP_BASES); } diff --git a/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java b/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java index 6f0573b02..b3fdb93d3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java @@ -79,7 +79,7 @@ public class IOUtils { tempDirParent = FileUtils.getTempDirectory(); if (!tempDirParent.exists() && !tempDirParent.mkdirs()) throw new UserException.BadTmpDir("Could not create temp directory: " + tempDirParent); - File temp = File.createTempFile(prefix + "-", suffix, tempDirParent); + File temp = File.createTempFile(prefix, suffix, tempDirParent); if (!temp.delete()) throw new UserException.BadTmpDir("Could not delete sub file: " + temp.getAbsolutePath()); if (!temp.mkdir()) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 91a018c4e..c9a4965c1 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -619,8 +619,9 @@ public class VariantContextUtils { if (vc.alleles.size() == 1) continue; if ( hasPLIncompatibleAlleles(alleles, vc.alleles)) { - logger.warn(String.format("Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s", - genomeLocParser.createGenomeLoc(vc), alleles, vc.alleles)); + if ( ! genotypes.isEmpty() ) + logger.warn(String.format("Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s", + genomeLocParser.createGenomeLoc(vc), alleles, vc.alleles)); genotypes = stripPLs(genotypes); // this will remove stale AC,AF attributed from vc calculateChromosomeCounts(vc, attributes, true); diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index 54d34fcfa..8e218f950 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -1,18 +1,12 @@ package org.broadinstitute.sting; -import org.apache.commons.io.FileUtils; import org.apache.log4j.*; import org.apache.log4j.spi.LoggingEvent; import org.broadinstitute.sting.commandline.CommandLineUtils; -import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.testng.Assert; +import org.broadinstitute.sting.utils.io.IOUtils; -import javax.swing.*; import java.io.*; -import java.math.BigInteger; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; import java.util.*; /** @@ -78,8 +72,8 @@ public abstract class BaseTest { public static final String hg19Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list"; public static final String hg19Chr20Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.chr20.interval_list"; - public static final String networkTempDir = "/broad/shptmp/" + System.getProperty("user.name") + "/"; - public static final File networkTempDirFile = new File(networkTempDir); + public static final String networkTempDir; + public static final File networkTempDirFile; public static final File testDirFile = new File("public/testdata/"); public static final String testDir = testDirFile.getAbsolutePath() + "/"; @@ -99,6 +93,10 @@ public abstract class BaseTest { // Set the Root logger to only output warnings. logger.setLevel(Level.WARN); + networkTempDirFile = IOUtils.tempDir("temp.", ".dir", new File("/broad/shptmp/" + System.getProperty("user.name"))); + networkTempDirFile.deleteOnExit(); + networkTempDir = networkTempDirFile.getAbsolutePath() + "/"; + // find our file sources // if (!fileExist(hg18Reference) || !fileExist(hg19Reference) || !fileExist(b36KGReference)) { // logger.fatal("We can't locate the reference directories. Aborting!"); @@ -233,18 +231,12 @@ public abstract class BaseTest { /** * Creates a temp file that will be deleted on exit after tests are complete. - * @param name Prefix of the file. - * @param extension Extension to concat to the end of the file. - * @return A file in the network temporary directory starting with name, ending with extension, which will be deleted after the program exits. + * @param name Name of the file. + * @return A file in the network temporary directory with name, which will be deleted after the program exits. */ - public static File createNetworkTempFile(String name, String extension) { - try { - FileUtils.forceMkdir(networkTempDirFile); - File file = File.createTempFile(name, extension, networkTempDirFile); - file.deleteOnExit(); - return file; - } catch (IOException ex) { - throw new ReviewedStingException("Cannot create temp file: " + ex.getMessage(), ex); - } + public static File createNetworkTempFile(String name) { + File file = new File(networkTempDirFile, name); + file.deleteOnExit(); + return file; } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index ffb9aedcc..245fda3c7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -171,7 +171,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testTDTAnnotation() { - final String MD5 = "9fe37b61aab695ad47ce3c587148e91f"; + final String MD5 = "204e67536a17af7eaa6bf0a910818997"; WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" + " -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1, diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java index dec6ecb79..6f259f699 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java @@ -97,7 +97,12 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) { int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1)); int calculatedAlleleCount = MathUtils.maxElementIndex(result.log10AlleleFrequencyPosteriors[allele]); - Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount); + + if ( result.log10AlleleFrequencyPosteriors[0][0] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) { + Assert.assertTrue(calculatedAlleleCount == expectedAlleleCount || result.log10AlleleFrequencyPosteriors[0][calculatedAlleleCount] < result.log10PosteriorOfAFzero); + } else { + Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount); + } } } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 6041f80b8..e049af064 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -115,7 +115,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testCallingParameters() { HashMap e = new HashMap(); e.put( "--min_base_quality_score 26", "7acb1a5aee5fdadb0cc0ea07a212efc6" ); - e.put( "--computeSLOD", "e9d23a08472e4e27b4f25e844f5bad57" ); + e.put( "--computeSLOD", "6172d2f3d370132f4c57a26aa94c256e" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -129,8 +129,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testOutputParameter() { HashMap e = new HashMap(); e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" ); - e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "274bd9d1b9c7857690fa5f0228ffc6d7" ); - e.put( "--output_mode EMIT_ALL_SITES", "594c6d3c48bbc73289de7727d768644d" ); + e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "d971d392956aea69c3707da64d24485b" ); + e.put( "--output_mode EMIT_ALL_SITES", "21993e71ca1a06a0d1f11d58e3cc26c3" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -285,7 +285,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1, - Arrays.asList("98a4d1e1e0a363ba37518563ac6cbead")); + Arrays.asList("8bd5028cf294850b8a95b3c0af23d728")); executeTest("test MultiSample Pilot2 indels with complicated records", spec3); } @@ -294,7 +294,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec( baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1, - Arrays.asList("915e7a3e7cbfd995dbc41fdd382d0d51")); + Arrays.asList("814dcd66950635a870602d90a1618cce")); executeTest("test MultiSample Phase1 indels with complicated records", spec4); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index 72a07bd0e..042de2a27 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -116,6 +116,19 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testUsingDbsnpName--" + testFile, spec); } + @Test + public void testMultipleRecordsAtOnePosition() { + String testFile = validationDataLocation + "selectVariants.onePosition.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec( + "-T SelectVariants -R " + b36KGReference + " -select 'KG_FREQ < 0.5' --variant " + testFile + " -o %s -NO_HEADER", + 1, + Arrays.asList("20b52c96f5c48258494d072752b53693") + ); + + executeTest("testMultipleRecordsAtOnePositionFirstIsFiltered--" + testFile, spec); + } + @Test public void testParallelization() { String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf"; diff --git a/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSessionIntegrationTest.java b/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSessionIntegrationTest.java index 48f4c3777..f68a96d26 100644 --- a/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSessionIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSessionIntegrationTest.java @@ -62,7 +62,7 @@ public class JnaSessionIntegrationTest extends BaseTest { return; } - File outFile = createNetworkTempFile("JnaSessionIntegrationTest-", ".out"); + File outFile = createNetworkTempFile("JnaSessionIntegrationTest.out"); Session session = factory.getSession(); session.init(null); try { diff --git a/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/LibDrmaaIntegrationTest.java b/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/LibDrmaaIntegrationTest.java index d98281ad3..4c7d4ce06 100644 --- a/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/LibDrmaaIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/jna/drmaa/v1_0/LibDrmaaIntegrationTest.java @@ -86,7 +86,7 @@ public class LibDrmaaIntegrationTest extends BaseTest { @Test(dependsOnMethods = { "testDrmaa" }) public void testSubmitEcho() throws Exception { - if (implementation.indexOf("LSF") >= 0) { + if (implementation.contains("LSF")) { System.err.println(" *********************************************************"); System.err.println(" ***********************************************************"); System.err.println(" **** ****"); @@ -101,7 +101,7 @@ public class LibDrmaaIntegrationTest extends BaseTest { Memory error = new Memory(LibDrmaa.DRMAA_ERROR_STRING_BUFFER); int errnum; - File outFile = createNetworkTempFile("LibDrmaaIntegrationTest-", ".out"); + File outFile = createNetworkTempFile("LibDrmaaIntegrationTest.out"); errnum = LibDrmaa.drmaa_init(null, error, LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN); diff --git a/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java b/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java index b4fb5cfa3..21339eb46 100644 --- a/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/jna/lsf/v7_0_6/LibBatIntegrationTest.java @@ -93,7 +93,7 @@ public class LibBatIntegrationTest extends BaseTest { @Test public void testSubmitEcho() throws Exception { String queue = "hour"; - File outFile = createNetworkTempFile("LibBatIntegrationTest-", ".out"); + File outFile = createNetworkTempFile("LibBatIntegrationTest.out"); submit req = new submit(); diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java similarity index 93% rename from public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java rename to public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java index c6dc3a833..18108e0a1 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.clipreads; +package org.broadinstitute.sting.utils.clipping; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; @@ -8,7 +8,9 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; -import java.util.*; +import java.util.LinkedList; +import java.util.List; +import java.util.Stack; /** * Created by IntelliJ IDEA. @@ -17,7 +19,7 @@ import java.util.*; * Time: 6:45 AM * To change this template use File | Settings | File Templates. */ -public class ClipReadsTestUtils { +public class ReadClipperTestUtils { //Should contain all the utils needed for tests to mass produce //reads, cigars, and other needed classes @@ -176,4 +178,16 @@ public class ClipReadsTestUtils { Assert.assertEquals(actual.isEmpty(), expected.isEmpty()); } + public static Cigar invertCigar (Cigar cigar) { + Stack cigarStack = new Stack(); + for (CigarElement cigarElement : cigar.getCigarElements()) + cigarStack.push(cigarElement); + + Cigar invertedCigar = new Cigar(); + while (!cigarStack.isEmpty()) + invertedCigar.add(cigarStack.pop()); + + return invertedCigar; + } + } diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java similarity index 52% rename from public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java index fc1459ee0..3b4b0abce 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperUnitTest.java @@ -23,7 +23,7 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.clipreads; +package org.broadinstitute.sting.utils.clipping; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; @@ -32,75 +32,119 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; import org.testng.Assert; import org.testng.annotations.BeforeClass; -import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; import java.util.List; /** - * Created by IntelliJ IDEA. * User: roger * Date: 9/28/11 - * Time: 9:54 PM - * To change this template use File | Settings | File Templates. */ public class ReadClipperUnitTest extends BaseTest { - // TODO: exception testing, make cases that should fail will fail - - // TODO: add indels to all test cases - List cigarList; - int maximumCigarSize = 10; + int maximumCigarSize = 6; // 6 is the minimum necessary number to try all combinations of cigar types with guarantee of clipping an element with length = 2 @BeforeClass public void init() { - cigarList = ClipReadsTestUtils.generateCigarList(maximumCigarSize); + cigarList = ReadClipperTestUtils.generateCigarList(maximumCigarSize); } @Test(enabled = true) public void testHardClipBothEndsByReferenceCoordinates() { - logger.warn("Executing testHardClipBothEndsByReferenceCoordinates"); - + for (Cigar cigar : cigarList) { + GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); + int alnStart = read.getAlignmentStart(); + int alnEnd = read.getAlignmentEnd(); + int readLength = alnStart - alnEnd; + for (int i=0; i= alnStart + i, String.format("Clipped alignment start is less than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString())); + Assert.assertTrue(clippedRead.getAlignmentEnd() <= alnEnd + i, String.format("Clipped alignment end is greater than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString())); + } + } } @Test(enabled = true) public void testHardClipByReadCoordinates() { - logger.warn("Executing testHardClipByReadCoordinates"); + for (Cigar cigar : cigarList) { + GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); + int readLength = read.getReadLength(); + for (int i=0; i %s", i, read.getCigarString(), clipLeft.getCigarString())); + GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReadCoordinates(i, readLength-1); + Assert.assertTrue(clipRight.getReadLength() <= i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipRight.getCigarString())); + } + } } @Test(enabled = true) public void testHardClipByReferenceCoordinates() { - logger.warn("Executing testHardClipByReferenceCoordinates"); + for (Cigar cigar : cigarList) { + GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); + int alnStart = read.getAlignmentStart(); + int alnEnd = read.getAlignmentEnd(); + for (int i=alnStart; i<=alnEnd; i++) { + if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side + GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i); + if (!clipLeft.isEmpty()) + Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString())); + } + if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side + GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd); + if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. + Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); + } + } + } } @Test(enabled = true) public void testHardClipByReferenceCoordinatesLeftTail() { - logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail"); - + for (Cigar cigar : cigarList) { + GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); + int alnStart = read.getAlignmentStart(); + int alnEnd = read.getAlignmentEnd(); + if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side + for (int i=alnStart; i<=alnEnd; i++) { + GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i); + if (!clipLeft.isEmpty()) + Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString())); + } + } + } } @Test(enabled = true) public void testHardClipByReferenceCoordinatesRightTail() { - init(); - logger.warn("Executing testHardClipByReferenceCoordinatesRightTail"); - + for (Cigar cigar : cigarList) { + GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); + int alnStart = read.getAlignmentStart(); + int alnEnd = read.getAlignmentEnd(); + if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side + for (int i=alnStart; i<=alnEnd; i++) { + GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd); + if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. + Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); + } + } + } } @Test(enabled = true) public void testHardClipLowQualEnds() { - logger.warn("Executing testHardClipLowQualEnds"); - final byte LOW_QUAL = 2; final byte HIGH_QUAL = 30; // create a read for every cigar permutation for (Cigar cigar : cigarList) { - GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar); + GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); int readLength = read.getReadLength(); byte [] quals = new byte[readLength]; @@ -116,7 +160,7 @@ public class ReadClipperUnitTest extends BaseTest { // Tests // Make sure the low qualities are gone - testNoLowQualBases(clipLeft, LOW_QUAL); + assertNoLowQualBases(clipLeft, LOW_QUAL); // Can't run this test with the current contract of no hanging insertions //Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString())); @@ -131,7 +175,7 @@ public class ReadClipperUnitTest extends BaseTest { // Tests // Make sure the low qualities are gone - testNoLowQualBases(clipRight, LOW_QUAL); + assertNoLowQualBases(clipRight, LOW_QUAL); // Make sure we haven't clipped any high quals -- Can't run this test with the current contract of no hanging insertions //Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString())); @@ -149,7 +193,7 @@ public class ReadClipperUnitTest extends BaseTest { // Tests // Make sure the low qualities are gone - testNoLowQualBases(clipBoth, LOW_QUAL); + assertNoLowQualBases(clipBoth, LOW_QUAL); // Can't run this test with the current contract of no hanging insertions //Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2*nLowQualBases), read.getCigarString(), clipBoth.getCigarString())); @@ -158,11 +202,7 @@ public class ReadClipperUnitTest extends BaseTest { // logger.warn(String.format("Testing %s for all combinations of low/high qual... PASSED", read.getCigarString())); } - - - - - // ONE OFF Testing clipping that ends inside an insertion + // ONE OFF Testing clipping that ends inside an insertion ( Ryan's bug ) final byte[] BASES = {'A','C','G','T','A','C','G','T'}; final byte[] QUALS = {2, 2, 2, 2, 20, 20, 20, 2}; final String CIGAR = "1S1M5I1S"; @@ -176,15 +216,7 @@ public class ReadClipperUnitTest extends BaseTest { GATKSAMRecord expected = ArtificialSAMUtils.createArtificialRead(CLIPPED_BASES, CLIPPED_QUALS, CLIPPED_CIGAR); ReadClipper lowQualClipper = new ReadClipper(read); - ClipReadsTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected); - } - - private void testNoLowQualBases(GATKSAMRecord read, byte low_qual) { - if (!read.isEmpty()) { - byte [] quals = read.getBaseQualities(); - for (int i=0; i %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString())); } } + + @Test(enabled = false) + public void testHardClipLeadingInsertions() { + for (Cigar cigar : cigarList) { + if (startsWithInsertion(cigar)) { + GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar); + GATKSAMRecord clippedRead = (new ReadClipper(read)).hardClipLeadingInsertions(); + + int expectedLength = read.getReadLength() - leadingInsertionLength(read.getCigar()); + if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar())) + expectedLength -= leadingInsertionLength(ReadClipperTestUtils.invertCigar(read.getCigar())); + + if (! clippedRead.isEmpty()) { + Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there + Assert.assertFalse(startsWithInsertion(clippedRead.getCigar())); // check that the insertions are gone + } + else + Assert.assertTrue(expectedLength == 0, String.format("expected length: %d", expectedLength)); // check that the read was expected to be fully clipped + } + } + } + + + + private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) { + if (!read.isEmpty()) { + byte [] quals = read.getBaseQualities(); + for (int i=0; i 0; + } + + private int leadingInsertionLength(Cigar cigar) { + for (CigarElement cigarElement : cigar.getCigarElements()) { + if (cigarElement.getOperator() == CigarOperator.INSERTION) + return cigarElement.getLength(); + if (cigarElement.getOperator() != CigarOperator.HARD_CLIP) + break; + } + return 0; + } + + private boolean cigarHasElementsDifferentThanInsertionsAndHardClips (Cigar cigar) { + for (CigarElement cigarElement : cigar.getCigarElements()) + if (cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.HARD_CLIP) + return true; + return false; + } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java deleted file mode 100644 index dd674ff1c..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java +++ /dev/null @@ -1,67 +0,0 @@ -package org.broadinstitute.sting.utils.clipreads; - -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.Test; - - -/** - * Created by IntelliJ IDEA. - * User: roger - * Date: 11/27/11 - * Time: 5:17 AM - * To change this template use File | Settings | File Templates. - */ -public class ClippingOpUnitTest extends BaseTest { - - ClippingOp clippingOp; - GATKSAMRecord read; - - @BeforeTest - public void init() { - read = ClipReadsTestUtils.makeRead(); - } - - @Test - private void testHardClip() { -// List testList = new LinkedList(); -// testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start -// testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end -// testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start -// testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end -// testList.add(new TestParameter(0, 2, 3, 4, "3H1M"));//clip 3 bases at start -// testList.add(new TestParameter(1, 3, 0, 1, "1M3H"));//clip 3 bases at end -// -// for (TestParameter p : testList) { -// init(); -// clippingOp = new ClippingOp(p.inputStart, p.inputStop); -// logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.substringStart + "," + p.substringStop + "," + p.cigar); -// ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.HARDCLIP_BASES, read), -// ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(), -// ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(), -// p.cigar); -// } - - } - - @Test - private void testSoftClip() { -// List testList = new LinkedList(); -// testList.add(new TestParameter(0, 0, -1, -1, "1S3M"));//clip 1 base at start -// testList.add(new TestParameter(3, 3, -1, -1, "3M1S"));//clip 1 base at end -// testList.add(new TestParameter(0, 1, -1, -1, "2S2M"));//clip 2 bases at start -// testList.add(new TestParameter(2, 3, -1, -1, "2M2S"));//clip 2 bases at end -// testList.add(new TestParameter(0, 2, -1, -1, "3S1M"));//clip 3 bases at start -// testList.add(new TestParameter(1, 3, -1, -1, "1M3S"));//clip 3 bases at end -// -// for (TestParameter p : testList) { -// init(); -// clippingOp = new ClippingOp(p.inputStart, p.inputStop); -// logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.cigar); -// ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.SOFTCLIP_BASES, read), -// ClipReadsTestUtils.BASES.getBytes(), ClipReadsTestUtils.QUALS.getBytes(), p.cigar); -// } - - } -} diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountLoci.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountLoci.scala index 4ca3cbb89..149376018 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountLoci.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountLoci.scala @@ -19,7 +19,7 @@ class ExampleCountLoci extends QScript { @Output var out: File = _ - def script = { + def script() { val countLoci = new CountLoci countLoci.reference_sequence = referenceFile countLoci.input_file = bamFiles diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountReads.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountReads.scala index 9fdd1ba4c..7f9d3f87a 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountReads.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountReads.scala @@ -24,7 +24,7 @@ class ExampleCountReads extends QScript { /** * In script, you create and then add() functions to the pipeline. */ - def script = { + def script() { // Run CountReads for all bams jointly. @@ -41,6 +41,9 @@ class ExampleCountReads extends QScript { // matches the full form of the argument, but will actually be a scala List[] jointCountReads.input_file = bamFiles + // Set the memory limit. Also acts as a memory request on LSF and GridEngine. + jointCountReads.memoryLimit = 1 + // Add the newly created function to the pipeline. add(jointCountReads) @@ -51,6 +54,7 @@ class ExampleCountReads extends QScript { singleCountReads.reference_sequence = referenceFile // ':+' is the scala List append operator singleCountReads.input_file :+= bamFile + singleCountReads.memoryLimit = 1 add(singleCountReads) } } diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCustomWalker.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCustomWalker.scala index d3796d350..d30668c19 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCustomWalker.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCustomWalker.scala @@ -24,7 +24,7 @@ class ExampleCustomWalker extends QScript { /** * In script, you create and then add() functions to the pipeline. */ - def script = { + def script() { val customWalker = new CommandLineGATK { // Set the name of your walker, for example this will be passed as -T MyCustomWalker this.analysis_type = "MyCustomWalker" diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala index 2f4aea755..8cb86db0b 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala @@ -33,7 +33,6 @@ class ExampleUnifiedGenotyper extends QScript { @Argument(doc="An optional list of filter expressions.", shortName="filterExpression", required=false) var filterExpressions: List[String] = Nil - // This trait allows us set the variables below in one place, // and then reuse this trait on each CommandLineGATK function below. trait UnifiedGenotyperArguments extends CommandLineGATK { diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala index 913bd243c..32913deb4 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala @@ -89,7 +89,7 @@ class QCommandLine extends CommandLineProgram with Logging { private var shuttingDown = false private lazy val pluginManager = { - qScriptClasses = IOUtils.tempDir("Q-Classes", "", settings.qSettings.tempDirectory) + qScriptClasses = IOUtils.tempDir("Q-Classes-", "", settings.qSettings.tempDirectory) qScriptManager.loadScripts(scripts, qScriptClasses) new PluginManager[QScript](classOf[QScript], List(qScriptClasses.toURI.toURL)) } diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala index bb14bb6e6..73d1c028a 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QJobReport.scala @@ -81,6 +81,10 @@ trait QJobReport extends Logging { this.reportFeatures = features.mapValues(_.toString) } + def addJobReportBinding(key: String, value: Any) { + this.reportFeatures += (key -> value.toString) + } + // copy the QJobReport information -- todo : what's the best way to do this? override def copySettingsTo(function: QFunction) { self.copySettingsTo(function) diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala index 5901cab46..f657e4be1 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala @@ -30,7 +30,7 @@ import org.broadinstitute.sting.BaseTest class ExampleCountLociPipelineTest { @Test - def testCountLoci { + def testCountLoci() { val testOut = "count.out" val spec = new PipelineTestSpec spec.name = "countloci" diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala new file mode 100644 index 000000000..8b286f090 --- /dev/null +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.pipeline.examples + +import org.testng.annotations.Test +import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} +import org.broadinstitute.sting.BaseTest + +class ExampleCountReadsPipelineTest { + @Test + def testCountReads() { + val spec = new PipelineTestSpec + spec.name = "countreads" + spec.args = Array( + " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountReads.scala", + " -R " + BaseTest.testDir + "exampleFASTA.fasta", + " -I " + BaseTest.testDir + "exampleBAM.bam").mkString + PipelineTest.executeTest(spec) + } +} diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala new file mode 100644 index 000000000..d50673a1a --- /dev/null +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.pipeline.examples + +import org.testng.annotations.Test +import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} +import org.broadinstitute.sting.BaseTest + +class ExampleUnifiedGenotyperPipelineTest { + @Test + def testUnifiedGenotyper() { + val spec = new PipelineTestSpec + spec.name = "unifiedgenotyper" + spec.args = Array( + " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala", + " -R " + BaseTest.testDir + "exampleFASTA.fasta", + " -I " + BaseTest.testDir + "exampleBAM.bam", + " -filter QD", + " -filterExpression 'QD < 2.0'").mkString + PipelineTest.executeTest(spec) + } +}