Merge branch 'master' of ssh://tin.broadinstitute.org/humgen/gsa-scr1/chartl/dev/unstable

Conflicts:
	private/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IntronLossGenotyperV2.java
This commit is contained in:
Christopher Hartl 2011-12-19 10:59:18 -05:00
commit 418d22b67e
38 changed files with 607 additions and 383 deletions

View File

@ -12,7 +12,7 @@ if ( onCMDLine ) {
inputFileName = args[1]
outputPDF = args[2]
} else {
inputFileName = "Q-8271@gsa2.jobreport.txt"
inputFileName = "Q-26618@gsa4.jobreport.txt"
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
outputPDF = NA
@ -129,9 +129,11 @@ plotGroup <- function(groupTable) {
# as above, but averaging over all iterations
groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration")
if ( dim(sub)[1] > 1 ) {
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
textplot(as.data.frame(sum), show.rownames=F)
title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
try({ # need a try here because we will fail to reduce when there's just a single iteration
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
textplot(as.data.frame(sum), show.rownames=F)
title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
}, silent=T)
}
}
@ -193,6 +195,7 @@ plotJobsGantt(gatkReportData, F, F)
plotProgressByTime(gatkReportData)
plotTimeByHost(gatkReportData)
for ( group in gatkReportData ) {
print(group)
plotGroup(group)
}

View File

@ -64,6 +64,9 @@ import java.util.concurrent.*;
public class SAMDataSource {
final private static GATKSamRecordFactory factory = new GATKSamRecordFactory();
/** If true, we will load SAMReaders in parallel */
final private static boolean USE_PARALLEL_LOADING = false;
/** Backing support for reads. */
protected final ReadProperties readProperties;
@ -97,11 +100,6 @@ public class SAMDataSource {
*/
private final Map<SAMReaderID,GATKBAMFileSpan> readerPositions = new HashMap<SAMReaderID,GATKBAMFileSpan>();
/**
* Cached representation of the merged header used to generate a merging iterator.
*/
private final SamFileHeaderMerger headerMerger;
/**
* The merged header.
*/
@ -297,9 +295,8 @@ public class SAMDataSource {
initializeReaderPositions(readers);
headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true);
mergedHeader = headerMerger.getMergedHeader();
hasReadGroupCollisions = headerMerger.hasReadGroupCollisions();
mergedHeader = readers.getMergedHeader();
hasReadGroupCollisions = readers.hasReadGroupCollisions();
readProperties = new ReadProperties(
samFiles,
@ -324,9 +321,9 @@ public class SAMDataSource {
List<SAMReadGroupRecord> readGroups = reader.getFileHeader().getReadGroups();
for(SAMReadGroupRecord readGroup: readGroups) {
if(headerMerger.hasReadGroupCollisions()) {
mappingToMerged.put(readGroup.getReadGroupId(),headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()));
mergedToOriginalReadGroupMappings.put(headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()),readGroup.getReadGroupId());
if(hasReadGroupCollisions) {
mappingToMerged.put(readGroup.getReadGroupId(),readers.getReadGroupId(id,readGroup.getReadGroupId()));
mergedToOriginalReadGroupMappings.put(readers.getReadGroupId(id,readGroup.getReadGroupId()),readGroup.getReadGroupId());
} else {
mappingToMerged.put(readGroup.getReadGroupId(),readGroup.getReadGroupId());
mergedToOriginalReadGroupMappings.put(readGroup.getReadGroupId(),readGroup.getReadGroupId());
@ -559,7 +556,7 @@ public class SAMDataSource {
*/
private StingSAMIterator getIterator(SAMReaders readers, Shard shard, boolean enableVerification) {
// Set up merging to dynamically merge together multiple BAMs.
MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readers.values(),true);
MergingSamRecordIterator mergingIterator = readers.createMergingIterator();
for(SAMReaderID id: getReaderIDs()) {
CloseableIterator<SAMRecord> iterator = null;
@ -704,6 +701,11 @@ public class SAMDataSource {
* A collection of readers derived from a reads metadata structure.
*/
private class SAMReaders implements Iterable<SAMFileReader> {
/**
* Cached representation of the merged header used to generate a merging iterator.
*/
private final SamFileHeaderMerger headerMerger;
/**
* Internal storage for a map of id -> reader.
*/
@ -720,68 +722,133 @@ public class SAMDataSource {
* @param validationStringency validation stringency.
*/
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
final int N_THREADS = 8;
int totalNumberOfFiles = readerIDs.size();
final int totalNumberOfFiles = readerIDs.size();
int readerNumber = 1;
final SimpleTimer timer = new SimpleTimer().start();
ExecutorService executor = Executors.newFixedThreadPool(N_THREADS);
final List<ReaderInitializer> inits = new ArrayList<ReaderInitializer>(totalNumberOfFiles);
Queue<Future<ReaderInitializer>> futures = new LinkedList<Future<ReaderInitializer>>();
for (SAMReaderID readerID: readerIDs) {
logger.debug("Enqueuing for initialization: " + readerID.samFile);
final ReaderInitializer init = new ReaderInitializer(readerID);
inits.add(init);
futures.add(executor.submit(init));
}
final SimpleTimer timer = new SimpleTimer();
try {
final int MAX_WAIT = 30 * 1000;
final int MIN_WAIT = 1 * 1000;
timer.start();
while ( ! futures.isEmpty() ) {
final int prevSize = futures.size();
final double waitTime = prevSize * (0.5 / N_THREADS); // about 0.5 seconds to load each file
final int waitTimeInMS = Math.min(MAX_WAIT, Math.max((int) (waitTime * 1000), MIN_WAIT));
Thread.sleep(waitTimeInMS);
Queue<Future<ReaderInitializer>> pending = new LinkedList<Future<ReaderInitializer>>();
for ( final Future<ReaderInitializer> initFuture : futures ) {
if ( initFuture.isDone() ) {
final ReaderInitializer init = initFuture.get();
if (threadAllocation.getNumIOThreads() > 0) {
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
}
logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, init.readerID));
readers.put(init.readerID, init.reader);
} else {
pending.add(initFuture);
}
if ( totalNumberOfFiles > 0 ) logger.info("Initializing SAMRecords " + (USE_PARALLEL_LOADING ? "in parallel" : "in serial"));
if ( ! USE_PARALLEL_LOADING ) {
final int tickSize = 50;
int nExecutedTotal = 0;
long lastTick = timer.currentTime();
for(final SAMReaderID readerID: readerIDs) {
final ReaderInitializer init = new ReaderInitializer(readerID).call();
if (threadAllocation.getNumIOThreads() > 0) {
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
}
final int pendingSize = pending.size();
final int nExecutedInTick = prevSize - pendingSize;
final int nExecutedTotal = totalNumberOfFiles - pendingSize;
final double totalTimeInSeconds = timer.getElapsedTime();
final double nTasksPerSecond = nExecutedTotal / (1.0*totalTimeInSeconds);
final int nRemaining = pendingSize;
final double estTimeToComplete = pendingSize / nTasksPerSecond;
logger.info(String.format("Init %d BAMs in last %d s, %d of %d in %.2f s / %.2f m (%.2f tasks/s). %d remaining with est. completion in %.2f s / %.2f m",
nExecutedInTick, (int)(waitTimeInMS / 1000.0),
nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond,
nRemaining, estTimeToComplete, estTimeToComplete / 60));
futures = pending;
logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, readerID.samFile));
readers.put(init.readerID,init.reader);
if ( ++nExecutedTotal % tickSize == 0) {
double tickInSec = (timer.currentTime() - lastTick) / 1000.0;
printReaderPerformance(nExecutedTotal, tickSize, totalNumberOfFiles, timer, tickInSec);
lastTick = timer.currentTime();
}
}
} catch ( InterruptedException e ) {
throw new ReviewedStingException("Interrupted SAMReader initialization", e);
} catch ( ExecutionException e ) {
throw new ReviewedStingException("Execution exception during SAMReader initialization", e);
} else {
final int N_THREADS = 8;
final ExecutorService executor = Executors.newFixedThreadPool(N_THREADS);
final List<ReaderInitializer> inits = new ArrayList<ReaderInitializer>(totalNumberOfFiles);
Queue<Future<ReaderInitializer>> futures = new LinkedList<Future<ReaderInitializer>>();
for (final SAMReaderID readerID: readerIDs) {
logger.debug("Enqueuing for initialization: " + readerID.samFile);
final ReaderInitializer init = new ReaderInitializer(readerID);
inits.add(init);
futures.add(executor.submit(init));
}
try {
final int MAX_WAIT = 30 * 1000;
final int MIN_WAIT = 1 * 1000;
while ( ! futures.isEmpty() ) {
final int prevSize = futures.size();
final double waitTime = prevSize * (0.5 / N_THREADS); // about 0.5 seconds to load each file
final int waitTimeInMS = Math.min(MAX_WAIT, Math.max((int) (waitTime * 1000), MIN_WAIT));
Thread.sleep(waitTimeInMS);
Queue<Future<ReaderInitializer>> pending = new LinkedList<Future<ReaderInitializer>>();
for ( final Future<ReaderInitializer> initFuture : futures ) {
if ( initFuture.isDone() ) {
final ReaderInitializer init = initFuture.get();
if (threadAllocation.getNumIOThreads() > 0) {
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
}
logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, init.readerID));
readers.put(init.readerID, init.reader);
} else {
pending.add(initFuture);
}
}
final int nExecutedTotal = totalNumberOfFiles - pending.size();
final int nExecutedInTick = prevSize - pending.size();
printReaderPerformance(nExecutedTotal, nExecutedInTick, totalNumberOfFiles, timer, waitTimeInMS / 1000.0);
futures = pending;
}
} catch ( InterruptedException e ) {
throw new ReviewedStingException("Interrupted SAMReader initialization", e);
} catch ( ExecutionException e ) {
throw new ReviewedStingException("Execution exception during SAMReader initialization", e);
}
executor.shutdown();
}
logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime()));
executor.shutdown();
if ( totalNumberOfFiles > 0 ) logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime()));
Collection<SAMFileHeader> headers = new LinkedList<SAMFileHeader>();
for(SAMFileReader reader: readers.values())
headers.add(reader.getFileHeader());
headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,headers,true);
}
final private void printReaderPerformance(final int nExecutedTotal,
final int nExecutedInTick,
final int totalNumberOfFiles,
final SimpleTimer timer,
final double tickDurationInSec) {
final int pendingSize = totalNumberOfFiles - nExecutedTotal;
final double totalTimeInSeconds = timer.getElapsedTime();
final double nTasksPerSecond = nExecutedTotal / (1.0*totalTimeInSeconds);
final int nRemaining = pendingSize;
final double estTimeToComplete = pendingSize / nTasksPerSecond;
logger.info(String.format("Init %d BAMs in last %.2f s, %d of %d in %.2f s / %.2f m (%.2f tasks/s). %d remaining with est. completion in %.2f s / %.2f m",
nExecutedInTick, tickDurationInSec,
nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond,
nRemaining, estTimeToComplete, estTimeToComplete / 60));
}
/**
* Return the header derived from the merging of these BAM files.
* @return the merged header.
*/
public SAMFileHeader getMergedHeader() {
return headerMerger.getMergedHeader();
}
/**
* Do multiple read groups collide in this dataset?
* @return True if multiple read groups collide; false otherwis.
*/
public boolean hasReadGroupCollisions() {
return headerMerger.hasReadGroupCollisions();
}
/**
* Get the newly mapped read group ID for the given read group.
* @param readerID Reader for which to discern the transformed ID.
* @param originalReadGroupID Original read group.
* @return Remapped read group.
*/
public String getReadGroupId(final SAMReaderID readerID, final String originalReadGroupID) {
SAMFileHeader header = readers.get(readerID).getFileHeader();
return headerMerger.getReadGroupId(header,originalReadGroupID);
}
public MergingSamRecordIterator createMergingIterator() {
return new MergingSamRecordIterator(headerMerger,readers.values(),true);
}
/**
@ -833,25 +900,6 @@ public class SAMDataSource {
public boolean isEmpty() {
return readers.isEmpty();
}
/**
* Gets all the actual readers out of this data structure.
* @return A collection of the readers.
*/
public Collection<SAMFileReader> values() {
return readers.values();
}
/**
* Gets all the actual readers out of this data structure.
* @return A collection of the readers.
*/
public Collection<SAMFileHeader> headers() {
ArrayList<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(readers.size());
for (SAMFileReader reader : values())
headers.add(reader.getFileHeader());
return headers;
}
}
class ReaderInitializer implements Callable<ReaderInitializer> {

View File

@ -38,9 +38,9 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.clipreads.ClippingOp;
import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation;
import org.broadinstitute.sting.utils.clipreads.ReadClipper;
import org.broadinstitute.sting.utils.clipping.ClippingOp;
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;

View File

@ -63,27 +63,26 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
private double calculateTDT( final VariantContext vc, final Set<Sample> triosToTest ) {
final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM);
final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM);
final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM) + calculateNChildren(vc, triosToTest, HET, HOM, HET);
final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM) + calculateNChildren(vc, triosToTest, HOM, HOM, HET);
final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, REF, HET, HET);
final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HOM, HET, HET);
final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET);
final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET);
final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET) + calculateNChildren(vc, triosToTest, REF, HET, REF);
final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET) + calculateNChildren(vc, triosToTest, HET, HET, REF);
final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB);
final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB);
return (numer * numer) / denom;
}
private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int momIdx, final int dadIdx ) {
final double likelihoodVector[] = new double[triosToTest.size() * 2];
private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int parent1Idx, final int parent2Idx ) {
final double likelihoodVector[] = new double[triosToTest.size()];
int iii = 0;
for( final Sample child : triosToTest ) {
final double[] momGL = vc.getGenotype(child.getMaternalID()).getLikelihoods().getAsVector();
final double[] dadGL = vc.getGenotype(child.getPaternalID()).getLikelihoods().getAsVector();
final double[] childGL = vc.getGenotype(child.getID()).getLikelihoods().getAsVector();
likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx];
likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx];
likelihoodVector[iii++] = momGL[parent1Idx] + dadGL[parent2Idx] + childGL[childIdx];
}
return MathUtils.sumLog10(likelihoodVector);

View File

@ -170,6 +170,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio")
public double minGenotypeQualityP = 0.0;
@Argument(fullName="requireStrictAlleleMatch", shortName="strict", doc="If provided only comp tracks that exactly match both reference and alternate alleles will be counted as concordant", required=false)
private boolean requireStrictAlleleMatch = false;
private VariantAnnotatorEngine engine;
private Collection<VariantContext> indelBufferContext;
@ -194,11 +197,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
System.exit(0);
}
@Override
public SampleDB getSampleDB() {
return super.getSampleDB();
}
/**
* Prepare the output file and the list of available features.
*/
@ -216,6 +214,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
else
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, annotationsToExclude, this, getToolkit());
engine.initializeExpressions(expressionsToUse);
engine.setRequireStrictAlleleMatch(requireStrictAlleleMatch);
// setup the header fields
// note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones

View File

@ -47,9 +47,11 @@ public class VariantAnnotatorEngine {
private List<GenotypeAnnotation> requestedGenotypeAnnotations;
private List<VAExpression> requestedExpressions = new ArrayList<VAExpression>();
private HashMap<RodBinding<VariantContext>, String> dbAnnotations = new HashMap<RodBinding<VariantContext>, String>();
private AnnotatorCompatibleWalker walker;
private GenomeAnalysisEngine toolkit;
private final HashMap<RodBinding<VariantContext>, String> dbAnnotations = new HashMap<RodBinding<VariantContext>, String>();
private final AnnotatorCompatibleWalker walker;
private final GenomeAnalysisEngine toolkit;
private boolean requireStrictAlleleMatch = false;
protected static class VAExpression {
@ -163,6 +165,10 @@ public class VariantAnnotatorEngine {
return descriptions;
}
public void setRequireStrictAlleleMatch( final boolean requireStrictAlleleMatch ) {
this.requireStrictAlleleMatch = requireStrictAlleleMatch;
}
public VariantContext annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
@ -197,7 +203,7 @@ public class VariantAnnotatorEngine {
} else {
boolean overlapsComp = false;
for ( VariantContext comp : tracker.getValues(dbSet.getKey(), ref.getLocus()) ) {
if ( !comp.isFiltered() ) {
if ( !comp.isFiltered() && ( !requireStrictAlleleMatch || comp.getAlleles().equals(vc.getAlleles()) ) ) {
overlapsComp = true;
break;
}

View File

@ -34,9 +34,20 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
*/
public class AlleleFrequencyCalculationResult {
// IMPORTANT NOTE:
// These 2 arrays are intended to contain the likelihoods/posterior probabilities for each alternate allele over each possible frequency (from 0 to 2N).
// For any given alternate allele and frequency, the likelihoods are marginalized over values for all other alternate alleles. What this means is that
// the likelihoods at cell index zero (AF=0) in the array is actually that of the site's being polymorphic (because although this alternate allele may
// be at AF=0, it is marginalized over all other alternate alleles which are not necessarily at AF=0).
// In the bi-allelic case (where there are no other alternate alleles over which to marginalize),
// the value at cell index zero will be equal to AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED.
final double[][] log10AlleleFrequencyLikelihoods;
final double[][] log10AlleleFrequencyPosteriors;
// These 2 variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles)
double log10LikelihoodOfAFzero = 0.0;
double log10PosteriorOfAFzero = 0.0;
AlleleFrequencyCalculationResult(int maxAltAlleles, int numChr) {
log10AlleleFrequencyLikelihoods = new double[maxAltAlleles][numChr+1];
log10AlleleFrequencyPosteriors = new double[maxAltAlleles][numChr+1];

View File

@ -407,14 +407,19 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
nonRefAlleles++;
}
// update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs
for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
int AC = set.ACcounts.getCounts()[i];
result.log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyLikelihoods[i][AC], log10LofK);
// for k=0, we don't want to put that value into the likelihoods/posteriors matrix, but instead want to set the value in the results object
if ( nonRefAlleles == 0 ) {
result.log10LikelihoodOfAFzero = log10LofK;
result.log10PosteriorOfAFzero = log10LofK + log10AlleleFrequencyPriors[0][0];
} else {
// update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs
for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
int AC = set.ACcounts.getCounts()[i];
result.log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyLikelihoods[i][AC], log10LofK);
// for k=0 we still want to use theta
final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][AC];
result.log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior);
final double prior = log10AlleleFrequencyPriors[nonRefAlleles-1][AC];
result.log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior);
}
}
}

View File

@ -75,8 +75,9 @@ public class UnifiedGenotyperEngine {
// the model used for calculating p(non-ref)
private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>();
// the allele frequency likelihoods (allocated once as an optimization)
// the allele frequency likelihoods and posteriors (allocated once as an optimization)
private ThreadLocal<AlleleFrequencyCalculationResult> alleleFrequencyCalculationResult = new ThreadLocal<AlleleFrequencyCalculationResult>();
private ThreadLocal<double[]> posteriorsArray = new ThreadLocal<double[]>();
// because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything
private final double[][] log10AlleleFrequencyPriorsSNPs;
@ -289,7 +290,9 @@ public class UnifiedGenotyperEngine {
if ( afcm.get() == null ) {
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
alleleFrequencyCalculationResult.set(new AlleleFrequencyCalculationResult(UAC.MAX_ALTERNATE_ALLELES, N));
posteriorsArray.set(new double[N + 2]);
}
AlleleFrequencyCalculationResult AFresult = alleleFrequencyCalculationResult.get();
// don't try to genotype too many alternate alleles
if ( vc.getAlternateAlleles().size() > UAC.MAX_ALTERNATE_ALLELES ) {
@ -307,24 +310,20 @@ public class UnifiedGenotyperEngine {
}
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods);
clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors);
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get());
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult);
// is the most likely frequency conformation AC=0 for all alternate alleles?
boolean bestGuessIsRef = true;
// which alternate allele has the highest MLE AC?
int indexOfHighestAlt = -1;
int alleleCountOfHighestAlt = -1;
// determine which alternate alleles have AF>0
boolean[] altAllelesToUse = new boolean[vc.getAlternateAlleles().size()];
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
int indexOfBestAC = MathUtils.maxElementIndex(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[i]);
int indexOfBestAC = MathUtils.maxElementIndex(AFresult.log10AlleleFrequencyPosteriors[i]);
// if the most likely AC is not 0, then this is a good alternate allele to use
if ( indexOfBestAC != 0 ) {
if ( indexOfBestAC != 0 && (AFresult.log10AlleleFrequencyPosteriors[i][0] != AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED || AFresult.log10AlleleFrequencyPosteriors[i][indexOfBestAC] > AFresult.log10PosteriorOfAFzero) ) {
altAllelesToUse[i] = true;
bestGuessIsRef = false;
}
@ -332,35 +331,28 @@ public class UnifiedGenotyperEngine {
else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
altAllelesToUse[i] = true;
}
// keep track of the "best" alternate allele to use
if ( indexOfBestAC > alleleCountOfHighestAlt) {
alleleCountOfHighestAlt = indexOfBestAC;
indexOfHighestAlt = i;
}
}
// calculate p(f>0)
// TODO -- right now we just calculate it for the alt allele with highest AF, but the likelihoods need to be combined correctly over all AFs
double[] normalizedPosteriors = MathUtils.normalizeFromLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[indexOfHighestAlt]);
double sum = 0.0;
for (int i = 1; i <= N; i++)
sum += normalizedPosteriors[i];
double PofF = Math.min(sum, 1.0); // deal with precision errors
// calculate p(f>0):
// because the likelihoods are marginalized for each alternate allele, we only need to compare log10PosteriorOfAFzero against any one of them
final double[] normalizedPosteriors = generateNormalizedPosteriors(AFresult, posteriorsArray.get());
final double PofF = 1.0 - normalizedPosteriors[0];
double phredScaledConfidence;
if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
if ( Double.isInfinite(phredScaledConfidence) )
phredScaledConfidence = -10.0 * alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0];
phredScaledConfidence = -10.0 * AFresult.log10PosteriorOfAFzero;
} else {
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
if ( Double.isInfinite(phredScaledConfidence) ) {
sum = 0.0;
double sum = AFresult.log10AlleleFrequencyPosteriors[0][0];
if ( sum == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
sum = 0.0;
for (int i = 1; i <= N; i++) {
if ( alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
if ( AFresult.log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
break;
sum += alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i];
sum += AFresult.log10AlleleFrequencyPosteriors[0][i];
}
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
}
@ -374,7 +366,7 @@ public class UnifiedGenotyperEngine {
}
// strip out any alleles that aren't going to be used in the VariantContext
List<Allele> myAlleles;
final List<Allele> myAlleles;
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
myAlleles = new ArrayList<Allele>(vc.getAlleles().size());
myAlleles.add(vc.getReference());
@ -388,8 +380,8 @@ public class UnifiedGenotyperEngine {
}
// start constructing the resulting VC
GenomeLoc loc = genomeLocParser.createGenomeLoc(vc);
VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles);
final GenomeLoc loc = genomeLocParser.createGenomeLoc(vc);
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles);
builder.log10PError(phredScaledConfidence/-10.0);
if ( ! passesCallThreshold(phredScaledConfidence) )
builder.filters(filter);
@ -400,14 +392,14 @@ public class UnifiedGenotyperEngine {
}
// create the genotypes
GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse, myAlleles);
final GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse, myAlleles);
// print out stats if we have a writer
if ( verboseWriter != null && !limitedContext )
printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors, model);
// *** note that calculating strand bias involves overwriting data structures, so we do that last
HashMap<String, Object> attributes = new HashMap<String, Object>();
final HashMap<String, Object> attributes = new HashMap<String, Object>();
// if the site was downsampled, record that fact
if ( !limitedContext && rawContext.hasPileupBeenDownsampled() )
@ -418,31 +410,31 @@ public class UnifiedGenotyperEngine {
// the overall lod
VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods);
clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors);
afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get());
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
double overallLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1);
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult);
//double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
double overallLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
// the forward lod
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods);
clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors);
afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get());
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
double forwardLog10PofNull = alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0];
double forwardLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1);
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult);
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
double forwardLog10PofNull = AFresult.log10PosteriorOfAFzero;
double forwardLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
// the reverse lod
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods);
clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors);
afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get());
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
double reverseLog10PofNull = alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0];
double reverseLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1);
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult);
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
double reverseLog10PofNull = AFresult.log10PosteriorOfAFzero;
double reverseLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
@ -455,7 +447,8 @@ public class UnifiedGenotyperEngine {
strandScore *= 10.0;
//logger.debug(String.format("SLOD=%f", strandScore));
attributes.put("SB", strandScore);
if ( !Double.isNaN(strandScore) )
attributes.put("SB", strandScore);
}
// finish constructing the resulting VC
@ -478,6 +471,12 @@ public class UnifiedGenotyperEngine {
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
}
private double[] generateNormalizedPosteriors(AlleleFrequencyCalculationResult AFresult, double[] normalizedPosteriors) {
normalizedPosteriors[0] = AFresult.log10PosteriorOfAFzero;
System.arraycopy(AFresult.log10AlleleFrequencyPosteriors[0], 0, normalizedPosteriors, 1, normalizedPosteriors.length-1);
return MathUtils.normalizeFromLog10(normalizedPosteriors);
}
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
Map<String, AlignmentContext> stratifiedContexts = null;

View File

@ -175,7 +175,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
/////////////////////////////
// Debug Arguments
/////////////////////////////
@Hidden
@Advanced
@Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false)
protected Boolean TRUST_ALL_POLYMORPHIC = false;
//@Hidden

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
@ -42,6 +43,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SampleUtils;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.*;
@ -140,7 +142,7 @@ import java.util.*;
* -R ref.fasta \
* -T SelectVariants \
* --variant input.vcf \
* -family NA12891+NA12892=NA12878 \
* -bed family.ped \
* -mvq 50 \
* -o violations.vcf
*
@ -250,16 +252,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
@Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't update the AC, AF, or AN values in the INFO field after selecting", required=false)
private boolean KEEP_ORIGINAL_CHR_COUNTS = false;
@Hidden
@Argument(fullName="family_structure_file", shortName="familyFile", doc="use -family unless you know what you're doing", required=false)
private File FAMILY_STRUCTURE_FILE = null;
/**
* String formatted as dad+mom=child where these parameters determine which sample names are examined.
*/
@Argument(fullName="family_structure", shortName="family", doc="string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
private String FAMILY_STRUCTURE = "";
/**
* This activates the mendelian violation module that will select all variants that correspond to a mendelian violation following the rules given by the family structure.
*/
@ -286,13 +278,21 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
private double fractionGenotypes = 0;
/**
* This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria.
* This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria.
* When specified one or more times, a particular type of variant is selected.
*
*/
*/
@Argument(fullName="selectTypeToInclude", shortName="selectType", doc="Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times", required=false)
private List<VariantContext.Type> TYPES_TO_INCLUDE = new ArrayList<VariantContext.Type>();
/**
* If provided, we will only include variants whose ID field is present in this list of ids. The matching
* is exact string matching. The file format is just one ID per line
*
*/
@Argument(fullName="keepIDs", shortName="IDs", doc="Only emit sites whose ID is found in this file (one ID per line)", required=false)
private File rsIDFile = null;
@Hidden
@Argument(fullName="outMVFile", shortName="outMVFile", doc="", required=false)
@ -313,9 +313,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
}
public enum NumberAlleleRestriction {
ALL,
BIALLELIC,
MULTIALLELIC
ALL,
BIALLELIC,
MULTIALLELIC
}
private ArrayList<VariantContext.Type> selectedTypes = new ArrayList<VariantContext.Type>();
@ -339,17 +339,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
private int positionToAdd = 0;
private RandomVariantStructure [] variantArray;
/* Variables used for random selection with AF boosting */
private ArrayList<Double> afBreakpoints = null;
private ArrayList<Double> afBoosts = null;
double bkDelta = 0.0;
private PrintStream outMVFileStream = null;
//Random number generator for the genotypes to remove
//Random number generator for the genotypes to remove
private Random randomGenotypes = new Random();
private Set<String> IDsToKeep = null;
/**
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
*/
@ -437,7 +433,18 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + 100.0*fractionRandom + "% of the variants at random from the variant track");
/** load in the IDs file to a hashset for matching */
if ( rsIDFile != null ) {
IDsToKeep = new HashSet<String>();
try {
for ( final String line : new XReadLines(rsIDFile).readLines() ) {
IDsToKeep.add(line.trim());
}
logger.info("Selecting only variants with one of " + IDsToKeep.size() + " IDs from " + rsIDFile);
} catch ( FileNotFoundException e ) {
throw new UserException.CouldNotReadInputFile(rsIDFile, e);
}
}
}
/**
@ -460,20 +467,23 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
}
for (VariantContext vc : vcs) {
if ( IDsToKeep != null && ! IDsToKeep.contains(vc.getID()) )
continue;
if (MENDELIAN_VIOLATIONS && mv.countViolations(this.getSampleDB().getFamilies(samples),vc) < 1)
break;
break;
if (outMVFile != null){
for( String familyId : mv.getViolationFamilies()){
for(Sample sample : this.getSampleDB().getFamily(familyId)){
if(sample.getParents().size() > 0){
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
"childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)),
sample.getMaternalID(), sample.getPaternalID(), sample.getID(),
vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(),
vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(),
vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() );
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
"childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)),
sample.getMaternalID(), sample.getPaternalID(), sample.getID(),
vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(),
vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(),
vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() );
}
}
@ -483,12 +493,12 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
if (DISCORDANCE_ONLY) {
Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation());
if (!isDiscordant(vc, compVCs))
return 0;
continue;
}
if (CONCORDANCE_ONLY) {
Collection<VariantContext> compVCs = tracker.getValues(concordanceTrack, context.getLocation());
if (!isConcordant(vc, compVCs))
return 0;
continue;
}
if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic())
@ -502,21 +512,22 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
VariantContext sub = subsetRecord(vc, samples);
if ( (sub.isPolymorphicInSamples() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) {
boolean failedJexlMatch = false;
for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) {
if ( !VariantContextUtils.match(sub, jexl) ) {
return 0;
failedJexlMatch = true;
break;
}
}
if (SELECT_RANDOM_NUMBER) {
randomlyAddVariant(++variantNumber, sub);
if ( !failedJexlMatch ) {
if (SELECT_RANDOM_NUMBER) {
randomlyAddVariant(++variantNumber, sub);
}
else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
vcfWriter.add(sub);
}
}
else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
vcfWriter.add(sub);
}
}
}
return 1;
@ -647,7 +658,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate)
if ( vc.getAlleles().size() != sub.getAlleles().size() )
newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
//Remove a fraction of the genotypes if needed
if(fractionGenotypes>0){
@ -655,10 +666,10 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
for ( Genotype genotype : newGC ) {
//Set genotype to no call if it falls in the fraction.
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
alleles.add(Allele.create((byte)'.'));
alleles.add(Allele.create((byte)'.'));
genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap<String, Object>(),false));
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
alleles.add(Allele.create((byte)'.'));
alleles.add(Allele.create((byte)'.'));
genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap<String, Object>(),false));
}
else{
genotypes.add(genotype);

View File

@ -127,12 +127,12 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
* multi-allelic INFO field values can be lists of values.
*/
@Advanced
@Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false)
public boolean keepMultiAllelic = false;
@Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false)
public boolean keepMultiAllelic = false;
@Hidden
@Argument(fullName="logACSum", shortName="logACSum", doc="Log sum of AC instead of max value in case of multiallelic variants", required=false)
public boolean logACSum = false;
public boolean logACSum = false;
/**
* By default, this tool throws a UserException when it encounters a field without a value in some record. This

View File

@ -109,7 +109,7 @@ public class RScriptExecutor {
List<File> tempFiles = new ArrayList<File>();
try {
File tempLibDir = IOUtils.tempDir("R.", ".lib");
File tempLibDir = IOUtils.tempDir("Rlib.", "");
tempFiles.add(tempLibDir);
StringBuilder expression = new StringBuilder("tempLibDir = '").append(tempLibDir).append("';");

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.utils.clipreads;
package org.broadinstitute.sting.utils.clipping;
import com.google.java.contract.Requires;
import net.sf.samtools.Cigar;
@ -460,17 +460,20 @@ public class ClippingOp {
int newShift = 0;
int oldShift = 0;
boolean readHasStarted = false; // if the new cigar is composed of S and H only, we have to traverse the entire old cigar to calculate the shift
for (CigarElement cigarElement : newCigar.getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP)
newShift += cigarElement.getLength();
else
else {
readHasStarted = true;
break;
}
}
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP )
oldShift += Math.min(cigarElement.getLength(), newShift - oldShift);
else
else if (readHasStarted)
break;
}
return newShift - oldShift;

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.utils.clipreads;
package org.broadinstitute.sting.utils.clipping;
/**
* How should we represent a clipped bases in a read?

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.utils.clipreads;
package org.broadinstitute.sting.utils.clipping;
import com.google.java.contract.Requires;
import net.sf.samtools.CigarElement;
@ -63,12 +63,18 @@ public class ReadClipper {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
return new GATKSAMRecord(read.getHeader());
if (start < 0 || stop > read.getReadLength() - 1)
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
if ( start > stop )
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
if ( start > 0 && stop < read.getReadLength() - 1)
throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString()));
this.addOp(new ClippingOp(start, stop));
GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
this.ops = null;
@ -76,6 +82,9 @@ public class ReadClipper {
}
public GATKSAMRecord hardClipByReadCoordinates(int start, int stop) {
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
return new GATKSAMRecord(read.getHeader());
this.addOp(new ClippingOp(start, stop));
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
@ -195,16 +204,12 @@ public class ReadClipper {
for(CigarElement cigarElement : read.getCigar().getCigarElements()) {
if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP &&
cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.DELETION)
cigarElement.getOperator() != CigarOperator.INSERTION)
break;
else if (cigarElement.getOperator() == CigarOperator.INSERTION) {
else if (cigarElement.getOperator() == CigarOperator.INSERTION)
this.addOp(new ClippingOp(0, cigarElement.getLength() - 1));
}
else if (cigarElement.getOperator() == CigarOperator.DELETION) {
throw new ReviewedStingException("No read should start with a deletion. Aligner bug?");
}
}
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}

View File

@ -79,7 +79,7 @@ public class IOUtils {
tempDirParent = FileUtils.getTempDirectory();
if (!tempDirParent.exists() && !tempDirParent.mkdirs())
throw new UserException.BadTmpDir("Could not create temp directory: " + tempDirParent);
File temp = File.createTempFile(prefix + "-", suffix, tempDirParent);
File temp = File.createTempFile(prefix, suffix, tempDirParent);
if (!temp.delete())
throw new UserException.BadTmpDir("Could not delete sub file: " + temp.getAbsolutePath());
if (!temp.mkdir())

View File

@ -619,8 +619,9 @@ public class VariantContextUtils {
if (vc.alleles.size() == 1)
continue;
if ( hasPLIncompatibleAlleles(alleles, vc.alleles)) {
logger.warn(String.format("Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s",
genomeLocParser.createGenomeLoc(vc), alleles, vc.alleles));
if ( ! genotypes.isEmpty() )
logger.warn(String.format("Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s",
genomeLocParser.createGenomeLoc(vc), alleles, vc.alleles));
genotypes = stripPLs(genotypes);
// this will remove stale AC,AF attributed from vc
calculateChromosomeCounts(vc, attributes, true);

View File

@ -1,18 +1,12 @@
package org.broadinstitute.sting;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.*;
import org.apache.log4j.spi.LoggingEvent;
import org.broadinstitute.sting.commandline.CommandLineUtils;
import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.testng.Assert;
import org.broadinstitute.sting.utils.io.IOUtils;
import javax.swing.*;
import java.io.*;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.*;
/**
@ -78,8 +72,8 @@ public abstract class BaseTest {
public static final String hg19Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list";
public static final String hg19Chr20Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.chr20.interval_list";
public static final String networkTempDir = "/broad/shptmp/" + System.getProperty("user.name") + "/";
public static final File networkTempDirFile = new File(networkTempDir);
public static final String networkTempDir;
public static final File networkTempDirFile;
public static final File testDirFile = new File("public/testdata/");
public static final String testDir = testDirFile.getAbsolutePath() + "/";
@ -99,6 +93,10 @@ public abstract class BaseTest {
// Set the Root logger to only output warnings.
logger.setLevel(Level.WARN);
networkTempDirFile = IOUtils.tempDir("temp.", ".dir", new File("/broad/shptmp/" + System.getProperty("user.name")));
networkTempDirFile.deleteOnExit();
networkTempDir = networkTempDirFile.getAbsolutePath() + "/";
// find our file sources
// if (!fileExist(hg18Reference) || !fileExist(hg19Reference) || !fileExist(b36KGReference)) {
// logger.fatal("We can't locate the reference directories. Aborting!");
@ -233,18 +231,12 @@ public abstract class BaseTest {
/**
* Creates a temp file that will be deleted on exit after tests are complete.
* @param name Prefix of the file.
* @param extension Extension to concat to the end of the file.
* @return A file in the network temporary directory starting with name, ending with extension, which will be deleted after the program exits.
* @param name Name of the file.
* @return A file in the network temporary directory with name, which will be deleted after the program exits.
*/
public static File createNetworkTempFile(String name, String extension) {
try {
FileUtils.forceMkdir(networkTempDirFile);
File file = File.createTempFile(name, extension, networkTempDirFile);
file.deleteOnExit();
return file;
} catch (IOException ex) {
throw new ReviewedStingException("Cannot create temp file: " + ex.getMessage(), ex);
}
public static File createNetworkTempFile(String name) {
File file = new File(networkTempDirFile, name);
file.deleteOnExit();
return file;
}
}

View File

@ -171,7 +171,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testTDTAnnotation() {
final String MD5 = "9fe37b61aab695ad47ce3c587148e91f";
final String MD5 = "204e67536a17af7eaa6bf0a910818997";
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" +
" -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1,

View File

@ -97,7 +97,12 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
int calculatedAlleleCount = MathUtils.maxElementIndex(result.log10AlleleFrequencyPosteriors[allele]);
Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
if ( result.log10AlleleFrequencyPosteriors[0][0] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) {
Assert.assertTrue(calculatedAlleleCount == expectedAlleleCount || result.log10AlleleFrequencyPosteriors[0][calculatedAlleleCount] < result.log10PosteriorOfAFzero);
} else {
Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
}
}
}
}

View File

@ -115,7 +115,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testCallingParameters() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "--min_base_quality_score 26", "7acb1a5aee5fdadb0cc0ea07a212efc6" );
e.put( "--computeSLOD", "e9d23a08472e4e27b4f25e844f5bad57" );
e.put( "--computeSLOD", "6172d2f3d370132f4c57a26aa94c256e" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -129,8 +129,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testOutputParameter() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" );
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "274bd9d1b9c7857690fa5f0228ffc6d7" );
e.put( "--output_mode EMIT_ALL_SITES", "594c6d3c48bbc73289de7727d768644d" );
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "d971d392956aea69c3707da64d24485b" );
e.put( "--output_mode EMIT_ALL_SITES", "21993e71ca1a06a0d1f11d58e3cc26c3" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -285,7 +285,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
Arrays.asList("98a4d1e1e0a363ba37518563ac6cbead"));
Arrays.asList("8bd5028cf294850b8a95b3c0af23d728"));
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
}
@ -294,7 +294,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
Arrays.asList("915e7a3e7cbfd995dbc41fdd382d0d51"));
Arrays.asList("814dcd66950635a870602d90a1618cce"));
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
}

View File

@ -116,6 +116,19 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
executeTest("testUsingDbsnpName--" + testFile, spec);
}
@Test
public void testMultipleRecordsAtOnePosition() {
String testFile = validationDataLocation + "selectVariants.onePosition.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -select 'KG_FREQ < 0.5' --variant " + testFile + " -o %s -NO_HEADER",
1,
Arrays.asList("20b52c96f5c48258494d072752b53693")
);
executeTest("testMultipleRecordsAtOnePositionFirstIsFiltered--" + testFile, spec);
}
@Test
public void testParallelization() {
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";

View File

@ -62,7 +62,7 @@ public class JnaSessionIntegrationTest extends BaseTest {
return;
}
File outFile = createNetworkTempFile("JnaSessionIntegrationTest-", ".out");
File outFile = createNetworkTempFile("JnaSessionIntegrationTest.out");
Session session = factory.getSession();
session.init(null);
try {

View File

@ -86,7 +86,7 @@ public class LibDrmaaIntegrationTest extends BaseTest {
@Test(dependsOnMethods = { "testDrmaa" })
public void testSubmitEcho() throws Exception {
if (implementation.indexOf("LSF") >= 0) {
if (implementation.contains("LSF")) {
System.err.println(" *********************************************************");
System.err.println(" ***********************************************************");
System.err.println(" **** ****");
@ -101,7 +101,7 @@ public class LibDrmaaIntegrationTest extends BaseTest {
Memory error = new Memory(LibDrmaa.DRMAA_ERROR_STRING_BUFFER);
int errnum;
File outFile = createNetworkTempFile("LibDrmaaIntegrationTest-", ".out");
File outFile = createNetworkTempFile("LibDrmaaIntegrationTest.out");
errnum = LibDrmaa.drmaa_init(null, error, LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN);

View File

@ -93,7 +93,7 @@ public class LibBatIntegrationTest extends BaseTest {
@Test
public void testSubmitEcho() throws Exception {
String queue = "hour";
File outFile = createNetworkTempFile("LibBatIntegrationTest-", ".out");
File outFile = createNetworkTempFile("LibBatIntegrationTest.out");
submit req = new submit();

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.utils.clipreads;
package org.broadinstitute.sting.utils.clipping;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
@ -8,7 +8,9 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import java.util.*;
import java.util.LinkedList;
import java.util.List;
import java.util.Stack;
/**
* Created by IntelliJ IDEA.
@ -17,7 +19,7 @@ import java.util.*;
* Time: 6:45 AM
* To change this template use File | Settings | File Templates.
*/
public class ClipReadsTestUtils {
public class ReadClipperTestUtils {
//Should contain all the utils needed for tests to mass produce
//reads, cigars, and other needed classes
@ -176,4 +178,16 @@ public class ClipReadsTestUtils {
Assert.assertEquals(actual.isEmpty(), expected.isEmpty());
}
public static Cigar invertCigar (Cigar cigar) {
Stack<CigarElement> cigarStack = new Stack<CigarElement>();
for (CigarElement cigarElement : cigar.getCigarElements())
cigarStack.push(cigarElement);
Cigar invertedCigar = new Cigar();
while (!cigarStack.isEmpty())
invertedCigar.add(cigarStack.pop());
return invertedCigar;
}
}

View File

@ -23,7 +23,7 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.clipreads;
package org.broadinstitute.sting.utils.clipping;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
@ -32,75 +32,119 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: roger
* Date: 9/28/11
* Time: 9:54 PM
* To change this template use File | Settings | File Templates.
*/
public class ReadClipperUnitTest extends BaseTest {
// TODO: exception testing, make cases that should fail will fail
// TODO: add indels to all test cases
List<Cigar> cigarList;
int maximumCigarSize = 10;
int maximumCigarSize = 6; // 6 is the minimum necessary number to try all combinations of cigar types with guarantee of clipping an element with length = 2
@BeforeClass
public void init() {
cigarList = ClipReadsTestUtils.generateCigarList(maximumCigarSize);
cigarList = ReadClipperTestUtils.generateCigarList(maximumCigarSize);
}
@Test(enabled = true)
public void testHardClipBothEndsByReferenceCoordinates() {
logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
int readLength = alnStart - alnEnd;
for (int i=0; i<readLength/2; i++) {
GATKSAMRecord clippedRead = (new ReadClipper(read)).hardClipBothEndsByReferenceCoordinates(alnStart + i, alnEnd - i);
Assert.assertTrue(clippedRead.getAlignmentStart() >= alnStart + i, String.format("Clipped alignment start is less than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
Assert.assertTrue(clippedRead.getAlignmentEnd() <= alnEnd + i, String.format("Clipped alignment end is greater than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
}
}
}
@Test(enabled = true)
public void testHardClipByReadCoordinates() {
logger.warn("Executing testHardClipByReadCoordinates");
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int readLength = read.getReadLength();
for (int i=0; i<readLength; i++) {
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReadCoordinates(0, i);
Assert.assertTrue(clipLeft.getReadLength() <= readLength - i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipLeft.getCigarString()));
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReadCoordinates(i, readLength-1);
Assert.assertTrue(clipRight.getReadLength() <= i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipRight.getCigarString()));
}
}
}
@Test(enabled = true)
public void testHardClipByReferenceCoordinates() {
logger.warn("Executing testHardClipByReferenceCoordinates");
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
for (int i=alnStart; i<=alnEnd; i++) {
if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i);
if (!clipLeft.isEmpty())
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
}
if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd);
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
}
}
}
}
@Test(enabled = true)
public void testHardClipByReferenceCoordinatesLeftTail() {
logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail");
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
for (int i=alnStart; i<=alnEnd; i++) {
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i);
if (!clipLeft.isEmpty())
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
}
}
}
}
@Test(enabled = true)
public void testHardClipByReferenceCoordinatesRightTail() {
init();
logger.warn("Executing testHardClipByReferenceCoordinatesRightTail");
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int alnStart = read.getAlignmentStart();
int alnEnd = read.getAlignmentEnd();
if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
for (int i=alnStart; i<=alnEnd; i++) {
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd);
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
}
}
}
}
@Test(enabled = true)
public void testHardClipLowQualEnds() {
logger.warn("Executing testHardClipLowQualEnds");
final byte LOW_QUAL = 2;
final byte HIGH_QUAL = 30;
// create a read for every cigar permutation
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
int readLength = read.getReadLength();
byte [] quals = new byte[readLength];
@ -116,7 +160,7 @@ public class ReadClipperUnitTest extends BaseTest {
// Tests
// Make sure the low qualities are gone
testNoLowQualBases(clipLeft, LOW_QUAL);
assertNoLowQualBases(clipLeft, LOW_QUAL);
// Can't run this test with the current contract of no hanging insertions
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
@ -131,7 +175,7 @@ public class ReadClipperUnitTest extends BaseTest {
// Tests
// Make sure the low qualities are gone
testNoLowQualBases(clipRight, LOW_QUAL);
assertNoLowQualBases(clipRight, LOW_QUAL);
// Make sure we haven't clipped any high quals -- Can't run this test with the current contract of no hanging insertions
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString()));
@ -149,7 +193,7 @@ public class ReadClipperUnitTest extends BaseTest {
// Tests
// Make sure the low qualities are gone
testNoLowQualBases(clipBoth, LOW_QUAL);
assertNoLowQualBases(clipBoth, LOW_QUAL);
// Can't run this test with the current contract of no hanging insertions
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2*nLowQualBases), read.getCigarString(), clipBoth.getCigarString()));
@ -158,11 +202,7 @@ public class ReadClipperUnitTest extends BaseTest {
// logger.warn(String.format("Testing %s for all combinations of low/high qual... PASSED", read.getCigarString()));
}
// ONE OFF Testing clipping that ends inside an insertion
// ONE OFF Testing clipping that ends inside an insertion ( Ryan's bug )
final byte[] BASES = {'A','C','G','T','A','C','G','T'};
final byte[] QUALS = {2, 2, 2, 2, 20, 20, 20, 2};
final String CIGAR = "1S1M5I1S";
@ -176,15 +216,7 @@ public class ReadClipperUnitTest extends BaseTest {
GATKSAMRecord expected = ArtificialSAMUtils.createArtificialRead(CLIPPED_BASES, CLIPPED_QUALS, CLIPPED_CIGAR);
ReadClipper lowQualClipper = new ReadClipper(read);
ClipReadsTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected);
}
private void testNoLowQualBases(GATKSAMRecord read, byte low_qual) {
if (!read.isEmpty()) {
byte [] quals = read.getBaseQualities();
for (int i=0; i<quals.length; i++)
Assert.assertFalse(quals[i] <= low_qual, String.format("Found low qual (%d) base after hard clipping. Position: %d -- %s", low_qual, i, read.getCigarString()));
}
ReadClipperTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected);
}
@Test(enabled = true)
@ -192,7 +224,7 @@ public class ReadClipperUnitTest extends BaseTest {
// Generate a list of cigars to test
for (Cigar cigar : cigarList) {
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
ReadClipper readClipper = new ReadClipper(read);
GATKSAMRecord clippedRead = readClipper.hardClipSoftClippedBases();
@ -239,4 +271,56 @@ public class ReadClipperUnitTest extends BaseTest {
// logger.warn(String.format("Cigar %s -> %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString()));
}
}
@Test(enabled = false)
public void testHardClipLeadingInsertions() {
for (Cigar cigar : cigarList) {
if (startsWithInsertion(cigar)) {
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
GATKSAMRecord clippedRead = (new ReadClipper(read)).hardClipLeadingInsertions();
int expectedLength = read.getReadLength() - leadingInsertionLength(read.getCigar());
if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar()))
expectedLength -= leadingInsertionLength(ReadClipperTestUtils.invertCigar(read.getCigar()));
if (! clippedRead.isEmpty()) {
Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there
Assert.assertFalse(startsWithInsertion(clippedRead.getCigar())); // check that the insertions are gone
}
else
Assert.assertTrue(expectedLength == 0, String.format("expected length: %d", expectedLength)); // check that the read was expected to be fully clipped
}
}
}
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) {
if (!read.isEmpty()) {
byte [] quals = read.getBaseQualities();
for (int i=0; i<quals.length; i++)
Assert.assertFalse(quals[i] <= low_qual, String.format("Found low qual (%d) base after hard clipping. Position: %d -- %s", low_qual, i, read.getCigarString()));
}
}
private boolean startsWithInsertion(Cigar cigar) {
return leadingInsertionLength(cigar) > 0;
}
private int leadingInsertionLength(Cigar cigar) {
for (CigarElement cigarElement : cigar.getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.INSERTION)
return cigarElement.getLength();
if (cigarElement.getOperator() != CigarOperator.HARD_CLIP)
break;
}
return 0;
}
private boolean cigarHasElementsDifferentThanInsertionsAndHardClips (Cigar cigar) {
for (CigarElement cigarElement : cigar.getCigarElements())
if (cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.HARD_CLIP)
return true;
return false;
}
}

View File

@ -1,67 +0,0 @@
package org.broadinstitute.sting.utils.clipreads;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
/**
* Created by IntelliJ IDEA.
* User: roger
* Date: 11/27/11
* Time: 5:17 AM
* To change this template use File | Settings | File Templates.
*/
public class ClippingOpUnitTest extends BaseTest {
ClippingOp clippingOp;
GATKSAMRecord read;
@BeforeTest
public void init() {
read = ClipReadsTestUtils.makeRead();
}
@Test
private void testHardClip() {
// List<TestParameter> testList = new LinkedList<TestParameter>();
// testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start
// testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end
// testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start
// testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end
// testList.add(new TestParameter(0, 2, 3, 4, "3H1M"));//clip 3 bases at start
// testList.add(new TestParameter(1, 3, 0, 1, "1M3H"));//clip 3 bases at end
//
// for (TestParameter p : testList) {
// init();
// clippingOp = new ClippingOp(p.inputStart, p.inputStop);
// logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.substringStart + "," + p.substringStop + "," + p.cigar);
// ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.HARDCLIP_BASES, read),
// ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
// ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
// p.cigar);
// }
}
@Test
private void testSoftClip() {
// List<TestParameter> testList = new LinkedList<TestParameter>();
// testList.add(new TestParameter(0, 0, -1, -1, "1S3M"));//clip 1 base at start
// testList.add(new TestParameter(3, 3, -1, -1, "3M1S"));//clip 1 base at end
// testList.add(new TestParameter(0, 1, -1, -1, "2S2M"));//clip 2 bases at start
// testList.add(new TestParameter(2, 3, -1, -1, "2M2S"));//clip 2 bases at end
// testList.add(new TestParameter(0, 2, -1, -1, "3S1M"));//clip 3 bases at start
// testList.add(new TestParameter(1, 3, -1, -1, "1M3S"));//clip 3 bases at end
//
// for (TestParameter p : testList) {
// init();
// clippingOp = new ClippingOp(p.inputStart, p.inputStop);
// logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.cigar);
// ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.SOFTCLIP_BASES, read),
// ClipReadsTestUtils.BASES.getBytes(), ClipReadsTestUtils.QUALS.getBytes(), p.cigar);
// }
}
}

View File

@ -19,7 +19,7 @@ class ExampleCountLoci extends QScript {
@Output
var out: File = _
def script = {
def script() {
val countLoci = new CountLoci
countLoci.reference_sequence = referenceFile
countLoci.input_file = bamFiles

View File

@ -24,7 +24,7 @@ class ExampleCountReads extends QScript {
/**
* In script, you create and then add() functions to the pipeline.
*/
def script = {
def script() {
// Run CountReads for all bams jointly.
@ -41,6 +41,9 @@ class ExampleCountReads extends QScript {
// matches the full form of the argument, but will actually be a scala List[]
jointCountReads.input_file = bamFiles
// Set the memory limit. Also acts as a memory request on LSF and GridEngine.
jointCountReads.memoryLimit = 1
// Add the newly created function to the pipeline.
add(jointCountReads)
@ -51,6 +54,7 @@ class ExampleCountReads extends QScript {
singleCountReads.reference_sequence = referenceFile
// ':+' is the scala List append operator
singleCountReads.input_file :+= bamFile
singleCountReads.memoryLimit = 1
add(singleCountReads)
}
}

View File

@ -24,7 +24,7 @@ class ExampleCustomWalker extends QScript {
/**
* In script, you create and then add() functions to the pipeline.
*/
def script = {
def script() {
val customWalker = new CommandLineGATK {
// Set the name of your walker, for example this will be passed as -T MyCustomWalker
this.analysis_type = "MyCustomWalker"

View File

@ -33,7 +33,6 @@ class ExampleUnifiedGenotyper extends QScript {
@Argument(doc="An optional list of filter expressions.", shortName="filterExpression", required=false)
var filterExpressions: List[String] = Nil
// This trait allows us set the variables below in one place,
// and then reuse this trait on each CommandLineGATK function below.
trait UnifiedGenotyperArguments extends CommandLineGATK {

View File

@ -89,7 +89,7 @@ class QCommandLine extends CommandLineProgram with Logging {
private var shuttingDown = false
private lazy val pluginManager = {
qScriptClasses = IOUtils.tempDir("Q-Classes", "", settings.qSettings.tempDirectory)
qScriptClasses = IOUtils.tempDir("Q-Classes-", "", settings.qSettings.tempDirectory)
qScriptManager.loadScripts(scripts, qScriptClasses)
new PluginManager[QScript](classOf[QScript], List(qScriptClasses.toURI.toURL))
}

View File

@ -81,6 +81,10 @@ trait QJobReport extends Logging {
this.reportFeatures = features.mapValues(_.toString)
}
def addJobReportBinding(key: String, value: Any) {
this.reportFeatures += (key -> value.toString)
}
// copy the QJobReport information -- todo : what's the best way to do this?
override def copySettingsTo(function: QFunction) {
self.copySettingsTo(function)

View File

@ -30,7 +30,7 @@ import org.broadinstitute.sting.BaseTest
class ExampleCountLociPipelineTest {
@Test
def testCountLoci {
def testCountLoci() {
val testOut = "count.out"
val spec = new PipelineTestSpec
spec.name = "countloci"

View File

@ -0,0 +1,42 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.queue.pipeline.examples
import org.testng.annotations.Test
import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
import org.broadinstitute.sting.BaseTest
class ExampleCountReadsPipelineTest {
@Test
def testCountReads() {
val spec = new PipelineTestSpec
spec.name = "countreads"
spec.args = Array(
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountReads.scala",
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
" -I " + BaseTest.testDir + "exampleBAM.bam").mkString
PipelineTest.executeTest(spec)
}
}

View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.queue.pipeline.examples
import org.testng.annotations.Test
import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
import org.broadinstitute.sting.BaseTest
class ExampleUnifiedGenotyperPipelineTest {
@Test
def testUnifiedGenotyper() {
val spec = new PipelineTestSpec
spec.name = "unifiedgenotyper"
spec.args = Array(
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala",
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
" -I " + BaseTest.testDir + "exampleBAM.bam",
" -filter QD",
" -filterExpression 'QD < 2.0'").mkString
PipelineTest.executeTest(spec)
}
}