Merge branch 'master' of ssh://tin.broadinstitute.org/humgen/gsa-scr1/chartl/dev/unstable
Conflicts: private/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IntronLossGenotyperV2.java
This commit is contained in:
commit
418d22b67e
|
|
@ -12,7 +12,7 @@ if ( onCMDLine ) {
|
|||
inputFileName = args[1]
|
||||
outputPDF = args[2]
|
||||
} else {
|
||||
inputFileName = "Q-8271@gsa2.jobreport.txt"
|
||||
inputFileName = "Q-26618@gsa4.jobreport.txt"
|
||||
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
|
||||
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
|
||||
outputPDF = NA
|
||||
|
|
@ -129,9 +129,11 @@ plotGroup <- function(groupTable) {
|
|||
# as above, but averaging over all iterations
|
||||
groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration")
|
||||
if ( dim(sub)[1] > 1 ) {
|
||||
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
|
||||
textplot(as.data.frame(sum), show.rownames=F)
|
||||
title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
|
||||
try({ # need a try here because we will fail to reduce when there's just a single iteration
|
||||
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
|
||||
textplot(as.data.frame(sum), show.rownames=F)
|
||||
title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
|
||||
}, silent=T)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -193,6 +195,7 @@ plotJobsGantt(gatkReportData, F, F)
|
|||
plotProgressByTime(gatkReportData)
|
||||
plotTimeByHost(gatkReportData)
|
||||
for ( group in gatkReportData ) {
|
||||
print(group)
|
||||
plotGroup(group)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -64,6 +64,9 @@ import java.util.concurrent.*;
|
|||
public class SAMDataSource {
|
||||
final private static GATKSamRecordFactory factory = new GATKSamRecordFactory();
|
||||
|
||||
/** If true, we will load SAMReaders in parallel */
|
||||
final private static boolean USE_PARALLEL_LOADING = false;
|
||||
|
||||
/** Backing support for reads. */
|
||||
protected final ReadProperties readProperties;
|
||||
|
||||
|
|
@ -97,11 +100,6 @@ public class SAMDataSource {
|
|||
*/
|
||||
private final Map<SAMReaderID,GATKBAMFileSpan> readerPositions = new HashMap<SAMReaderID,GATKBAMFileSpan>();
|
||||
|
||||
/**
|
||||
* Cached representation of the merged header used to generate a merging iterator.
|
||||
*/
|
||||
private final SamFileHeaderMerger headerMerger;
|
||||
|
||||
/**
|
||||
* The merged header.
|
||||
*/
|
||||
|
|
@ -297,9 +295,8 @@ public class SAMDataSource {
|
|||
|
||||
initializeReaderPositions(readers);
|
||||
|
||||
headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true);
|
||||
mergedHeader = headerMerger.getMergedHeader();
|
||||
hasReadGroupCollisions = headerMerger.hasReadGroupCollisions();
|
||||
mergedHeader = readers.getMergedHeader();
|
||||
hasReadGroupCollisions = readers.hasReadGroupCollisions();
|
||||
|
||||
readProperties = new ReadProperties(
|
||||
samFiles,
|
||||
|
|
@ -324,9 +321,9 @@ public class SAMDataSource {
|
|||
|
||||
List<SAMReadGroupRecord> readGroups = reader.getFileHeader().getReadGroups();
|
||||
for(SAMReadGroupRecord readGroup: readGroups) {
|
||||
if(headerMerger.hasReadGroupCollisions()) {
|
||||
mappingToMerged.put(readGroup.getReadGroupId(),headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()));
|
||||
mergedToOriginalReadGroupMappings.put(headerMerger.getReadGroupId(reader,readGroup.getReadGroupId()),readGroup.getReadGroupId());
|
||||
if(hasReadGroupCollisions) {
|
||||
mappingToMerged.put(readGroup.getReadGroupId(),readers.getReadGroupId(id,readGroup.getReadGroupId()));
|
||||
mergedToOriginalReadGroupMappings.put(readers.getReadGroupId(id,readGroup.getReadGroupId()),readGroup.getReadGroupId());
|
||||
} else {
|
||||
mappingToMerged.put(readGroup.getReadGroupId(),readGroup.getReadGroupId());
|
||||
mergedToOriginalReadGroupMappings.put(readGroup.getReadGroupId(),readGroup.getReadGroupId());
|
||||
|
|
@ -559,7 +556,7 @@ public class SAMDataSource {
|
|||
*/
|
||||
private StingSAMIterator getIterator(SAMReaders readers, Shard shard, boolean enableVerification) {
|
||||
// Set up merging to dynamically merge together multiple BAMs.
|
||||
MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readers.values(),true);
|
||||
MergingSamRecordIterator mergingIterator = readers.createMergingIterator();
|
||||
|
||||
for(SAMReaderID id: getReaderIDs()) {
|
||||
CloseableIterator<SAMRecord> iterator = null;
|
||||
|
|
@ -704,6 +701,11 @@ public class SAMDataSource {
|
|||
* A collection of readers derived from a reads metadata structure.
|
||||
*/
|
||||
private class SAMReaders implements Iterable<SAMFileReader> {
|
||||
/**
|
||||
* Cached representation of the merged header used to generate a merging iterator.
|
||||
*/
|
||||
private final SamFileHeaderMerger headerMerger;
|
||||
|
||||
/**
|
||||
* Internal storage for a map of id -> reader.
|
||||
*/
|
||||
|
|
@ -720,68 +722,133 @@ public class SAMDataSource {
|
|||
* @param validationStringency validation stringency.
|
||||
*/
|
||||
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
|
||||
final int N_THREADS = 8;
|
||||
int totalNumberOfFiles = readerIDs.size();
|
||||
final int totalNumberOfFiles = readerIDs.size();
|
||||
int readerNumber = 1;
|
||||
final SimpleTimer timer = new SimpleTimer().start();
|
||||
|
||||
ExecutorService executor = Executors.newFixedThreadPool(N_THREADS);
|
||||
final List<ReaderInitializer> inits = new ArrayList<ReaderInitializer>(totalNumberOfFiles);
|
||||
Queue<Future<ReaderInitializer>> futures = new LinkedList<Future<ReaderInitializer>>();
|
||||
for (SAMReaderID readerID: readerIDs) {
|
||||
logger.debug("Enqueuing for initialization: " + readerID.samFile);
|
||||
final ReaderInitializer init = new ReaderInitializer(readerID);
|
||||
inits.add(init);
|
||||
futures.add(executor.submit(init));
|
||||
}
|
||||
|
||||
final SimpleTimer timer = new SimpleTimer();
|
||||
try {
|
||||
final int MAX_WAIT = 30 * 1000;
|
||||
final int MIN_WAIT = 1 * 1000;
|
||||
|
||||
timer.start();
|
||||
while ( ! futures.isEmpty() ) {
|
||||
final int prevSize = futures.size();
|
||||
final double waitTime = prevSize * (0.5 / N_THREADS); // about 0.5 seconds to load each file
|
||||
final int waitTimeInMS = Math.min(MAX_WAIT, Math.max((int) (waitTime * 1000), MIN_WAIT));
|
||||
Thread.sleep(waitTimeInMS);
|
||||
|
||||
Queue<Future<ReaderInitializer>> pending = new LinkedList<Future<ReaderInitializer>>();
|
||||
for ( final Future<ReaderInitializer> initFuture : futures ) {
|
||||
if ( initFuture.isDone() ) {
|
||||
final ReaderInitializer init = initFuture.get();
|
||||
if (threadAllocation.getNumIOThreads() > 0) {
|
||||
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
|
||||
}
|
||||
logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, init.readerID));
|
||||
readers.put(init.readerID, init.reader);
|
||||
} else {
|
||||
pending.add(initFuture);
|
||||
}
|
||||
if ( totalNumberOfFiles > 0 ) logger.info("Initializing SAMRecords " + (USE_PARALLEL_LOADING ? "in parallel" : "in serial"));
|
||||
if ( ! USE_PARALLEL_LOADING ) {
|
||||
final int tickSize = 50;
|
||||
int nExecutedTotal = 0;
|
||||
long lastTick = timer.currentTime();
|
||||
for(final SAMReaderID readerID: readerIDs) {
|
||||
final ReaderInitializer init = new ReaderInitializer(readerID).call();
|
||||
if (threadAllocation.getNumIOThreads() > 0) {
|
||||
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
|
||||
}
|
||||
|
||||
final int pendingSize = pending.size();
|
||||
final int nExecutedInTick = prevSize - pendingSize;
|
||||
final int nExecutedTotal = totalNumberOfFiles - pendingSize;
|
||||
final double totalTimeInSeconds = timer.getElapsedTime();
|
||||
final double nTasksPerSecond = nExecutedTotal / (1.0*totalTimeInSeconds);
|
||||
final int nRemaining = pendingSize;
|
||||
final double estTimeToComplete = pendingSize / nTasksPerSecond;
|
||||
logger.info(String.format("Init %d BAMs in last %d s, %d of %d in %.2f s / %.2f m (%.2f tasks/s). %d remaining with est. completion in %.2f s / %.2f m",
|
||||
nExecutedInTick, (int)(waitTimeInMS / 1000.0),
|
||||
nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond,
|
||||
nRemaining, estTimeToComplete, estTimeToComplete / 60));
|
||||
|
||||
futures = pending;
|
||||
logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, readerID.samFile));
|
||||
readers.put(init.readerID,init.reader);
|
||||
if ( ++nExecutedTotal % tickSize == 0) {
|
||||
double tickInSec = (timer.currentTime() - lastTick) / 1000.0;
|
||||
printReaderPerformance(nExecutedTotal, tickSize, totalNumberOfFiles, timer, tickInSec);
|
||||
lastTick = timer.currentTime();
|
||||
}
|
||||
}
|
||||
} catch ( InterruptedException e ) {
|
||||
throw new ReviewedStingException("Interrupted SAMReader initialization", e);
|
||||
} catch ( ExecutionException e ) {
|
||||
throw new ReviewedStingException("Execution exception during SAMReader initialization", e);
|
||||
} else {
|
||||
final int N_THREADS = 8;
|
||||
|
||||
final ExecutorService executor = Executors.newFixedThreadPool(N_THREADS);
|
||||
final List<ReaderInitializer> inits = new ArrayList<ReaderInitializer>(totalNumberOfFiles);
|
||||
Queue<Future<ReaderInitializer>> futures = new LinkedList<Future<ReaderInitializer>>();
|
||||
for (final SAMReaderID readerID: readerIDs) {
|
||||
logger.debug("Enqueuing for initialization: " + readerID.samFile);
|
||||
final ReaderInitializer init = new ReaderInitializer(readerID);
|
||||
inits.add(init);
|
||||
futures.add(executor.submit(init));
|
||||
}
|
||||
|
||||
try {
|
||||
final int MAX_WAIT = 30 * 1000;
|
||||
final int MIN_WAIT = 1 * 1000;
|
||||
|
||||
while ( ! futures.isEmpty() ) {
|
||||
final int prevSize = futures.size();
|
||||
final double waitTime = prevSize * (0.5 / N_THREADS); // about 0.5 seconds to load each file
|
||||
final int waitTimeInMS = Math.min(MAX_WAIT, Math.max((int) (waitTime * 1000), MIN_WAIT));
|
||||
Thread.sleep(waitTimeInMS);
|
||||
|
||||
Queue<Future<ReaderInitializer>> pending = new LinkedList<Future<ReaderInitializer>>();
|
||||
for ( final Future<ReaderInitializer> initFuture : futures ) {
|
||||
if ( initFuture.isDone() ) {
|
||||
final ReaderInitializer init = initFuture.get();
|
||||
if (threadAllocation.getNumIOThreads() > 0) {
|
||||
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
|
||||
}
|
||||
logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, init.readerID));
|
||||
readers.put(init.readerID, init.reader);
|
||||
} else {
|
||||
pending.add(initFuture);
|
||||
}
|
||||
}
|
||||
|
||||
final int nExecutedTotal = totalNumberOfFiles - pending.size();
|
||||
final int nExecutedInTick = prevSize - pending.size();
|
||||
printReaderPerformance(nExecutedTotal, nExecutedInTick, totalNumberOfFiles, timer, waitTimeInMS / 1000.0);
|
||||
futures = pending;
|
||||
}
|
||||
} catch ( InterruptedException e ) {
|
||||
throw new ReviewedStingException("Interrupted SAMReader initialization", e);
|
||||
} catch ( ExecutionException e ) {
|
||||
throw new ReviewedStingException("Execution exception during SAMReader initialization", e);
|
||||
}
|
||||
|
||||
executor.shutdown();
|
||||
}
|
||||
|
||||
logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime()));
|
||||
executor.shutdown();
|
||||
if ( totalNumberOfFiles > 0 ) logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime()));
|
||||
|
||||
Collection<SAMFileHeader> headers = new LinkedList<SAMFileHeader>();
|
||||
for(SAMFileReader reader: readers.values())
|
||||
headers.add(reader.getFileHeader());
|
||||
headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,headers,true);
|
||||
}
|
||||
|
||||
final private void printReaderPerformance(final int nExecutedTotal,
|
||||
final int nExecutedInTick,
|
||||
final int totalNumberOfFiles,
|
||||
final SimpleTimer timer,
|
||||
final double tickDurationInSec) {
|
||||
final int pendingSize = totalNumberOfFiles - nExecutedTotal;
|
||||
final double totalTimeInSeconds = timer.getElapsedTime();
|
||||
final double nTasksPerSecond = nExecutedTotal / (1.0*totalTimeInSeconds);
|
||||
final int nRemaining = pendingSize;
|
||||
final double estTimeToComplete = pendingSize / nTasksPerSecond;
|
||||
logger.info(String.format("Init %d BAMs in last %.2f s, %d of %d in %.2f s / %.2f m (%.2f tasks/s). %d remaining with est. completion in %.2f s / %.2f m",
|
||||
nExecutedInTick, tickDurationInSec,
|
||||
nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond,
|
||||
nRemaining, estTimeToComplete, estTimeToComplete / 60));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the header derived from the merging of these BAM files.
|
||||
* @return the merged header.
|
||||
*/
|
||||
public SAMFileHeader getMergedHeader() {
|
||||
return headerMerger.getMergedHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
* Do multiple read groups collide in this dataset?
|
||||
* @return True if multiple read groups collide; false otherwis.
|
||||
*/
|
||||
public boolean hasReadGroupCollisions() {
|
||||
return headerMerger.hasReadGroupCollisions();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the newly mapped read group ID for the given read group.
|
||||
* @param readerID Reader for which to discern the transformed ID.
|
||||
* @param originalReadGroupID Original read group.
|
||||
* @return Remapped read group.
|
||||
*/
|
||||
public String getReadGroupId(final SAMReaderID readerID, final String originalReadGroupID) {
|
||||
SAMFileHeader header = readers.get(readerID).getFileHeader();
|
||||
return headerMerger.getReadGroupId(header,originalReadGroupID);
|
||||
}
|
||||
|
||||
public MergingSamRecordIterator createMergingIterator() {
|
||||
return new MergingSamRecordIterator(headerMerger,readers.values(),true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -833,25 +900,6 @@ public class SAMDataSource {
|
|||
public boolean isEmpty() {
|
||||
return readers.isEmpty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets all the actual readers out of this data structure.
|
||||
* @return A collection of the readers.
|
||||
*/
|
||||
public Collection<SAMFileReader> values() {
|
||||
return readers.values();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets all the actual readers out of this data structure.
|
||||
* @return A collection of the readers.
|
||||
*/
|
||||
public Collection<SAMFileHeader> headers() {
|
||||
ArrayList<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(readers.size());
|
||||
for (SAMFileReader reader : values())
|
||||
headers.add(reader.getFileHeader());
|
||||
return headers;
|
||||
}
|
||||
}
|
||||
|
||||
class ReaderInitializer implements Callable<ReaderInitializer> {
|
||||
|
|
|
|||
|
|
@ -38,9 +38,9 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
|||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.clipreads.ClippingOp;
|
||||
import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation;
|
||||
import org.broadinstitute.sting.utils.clipreads.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.clipping.ClippingOp;
|
||||
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
|
|||
|
|
@ -63,27 +63,26 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
|
|||
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
|
||||
private double calculateTDT( final VariantContext vc, final Set<Sample> triosToTest ) {
|
||||
|
||||
final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM);
|
||||
final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM);
|
||||
final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM) + calculateNChildren(vc, triosToTest, HET, HOM, HET);
|
||||
final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM) + calculateNChildren(vc, triosToTest, HOM, HOM, HET);
|
||||
final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, REF, HET, HET);
|
||||
final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HOM, HET, HET);
|
||||
final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET);
|
||||
final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET);
|
||||
final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET) + calculateNChildren(vc, triosToTest, REF, HET, REF);
|
||||
final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET) + calculateNChildren(vc, triosToTest, HET, HET, REF);
|
||||
|
||||
final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB);
|
||||
final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB);
|
||||
return (numer * numer) / denom;
|
||||
}
|
||||
|
||||
private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int momIdx, final int dadIdx ) {
|
||||
final double likelihoodVector[] = new double[triosToTest.size() * 2];
|
||||
private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int parent1Idx, final int parent2Idx ) {
|
||||
final double likelihoodVector[] = new double[triosToTest.size()];
|
||||
int iii = 0;
|
||||
for( final Sample child : triosToTest ) {
|
||||
final double[] momGL = vc.getGenotype(child.getMaternalID()).getLikelihoods().getAsVector();
|
||||
final double[] dadGL = vc.getGenotype(child.getPaternalID()).getLikelihoods().getAsVector();
|
||||
final double[] childGL = vc.getGenotype(child.getID()).getLikelihoods().getAsVector();
|
||||
likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx];
|
||||
likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx];
|
||||
likelihoodVector[iii++] = momGL[parent1Idx] + dadGL[parent2Idx] + childGL[childIdx];
|
||||
}
|
||||
|
||||
return MathUtils.sumLog10(likelihoodVector);
|
||||
|
|
|
|||
|
|
@ -170,6 +170,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio")
|
||||
public double minGenotypeQualityP = 0.0;
|
||||
|
||||
@Argument(fullName="requireStrictAlleleMatch", shortName="strict", doc="If provided only comp tracks that exactly match both reference and alternate alleles will be counted as concordant", required=false)
|
||||
private boolean requireStrictAlleleMatch = false;
|
||||
|
||||
private VariantAnnotatorEngine engine;
|
||||
|
||||
private Collection<VariantContext> indelBufferContext;
|
||||
|
|
@ -194,11 +197,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
System.exit(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SampleDB getSampleDB() {
|
||||
return super.getSampleDB();
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare the output file and the list of available features.
|
||||
*/
|
||||
|
|
@ -216,6 +214,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
else
|
||||
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, annotationsToExclude, this, getToolkit());
|
||||
engine.initializeExpressions(expressionsToUse);
|
||||
engine.setRequireStrictAlleleMatch(requireStrictAlleleMatch);
|
||||
|
||||
// setup the header fields
|
||||
// note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones
|
||||
|
|
|
|||
|
|
@ -47,9 +47,11 @@ public class VariantAnnotatorEngine {
|
|||
private List<GenotypeAnnotation> requestedGenotypeAnnotations;
|
||||
private List<VAExpression> requestedExpressions = new ArrayList<VAExpression>();
|
||||
|
||||
private HashMap<RodBinding<VariantContext>, String> dbAnnotations = new HashMap<RodBinding<VariantContext>, String>();
|
||||
private AnnotatorCompatibleWalker walker;
|
||||
private GenomeAnalysisEngine toolkit;
|
||||
private final HashMap<RodBinding<VariantContext>, String> dbAnnotations = new HashMap<RodBinding<VariantContext>, String>();
|
||||
private final AnnotatorCompatibleWalker walker;
|
||||
private final GenomeAnalysisEngine toolkit;
|
||||
|
||||
private boolean requireStrictAlleleMatch = false;
|
||||
|
||||
protected static class VAExpression {
|
||||
|
||||
|
|
@ -163,6 +165,10 @@ public class VariantAnnotatorEngine {
|
|||
return descriptions;
|
||||
}
|
||||
|
||||
public void setRequireStrictAlleleMatch( final boolean requireStrictAlleleMatch ) {
|
||||
this.requireStrictAlleleMatch = requireStrictAlleleMatch;
|
||||
}
|
||||
|
||||
public VariantContext annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
|
||||
|
||||
|
|
@ -197,7 +203,7 @@ public class VariantAnnotatorEngine {
|
|||
} else {
|
||||
boolean overlapsComp = false;
|
||||
for ( VariantContext comp : tracker.getValues(dbSet.getKey(), ref.getLocus()) ) {
|
||||
if ( !comp.isFiltered() ) {
|
||||
if ( !comp.isFiltered() && ( !requireStrictAlleleMatch || comp.getAlleles().equals(vc.getAlleles()) ) ) {
|
||||
overlapsComp = true;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,9 +34,20 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
*/
|
||||
public class AlleleFrequencyCalculationResult {
|
||||
|
||||
// IMPORTANT NOTE:
|
||||
// These 2 arrays are intended to contain the likelihoods/posterior probabilities for each alternate allele over each possible frequency (from 0 to 2N).
|
||||
// For any given alternate allele and frequency, the likelihoods are marginalized over values for all other alternate alleles. What this means is that
|
||||
// the likelihoods at cell index zero (AF=0) in the array is actually that of the site's being polymorphic (because although this alternate allele may
|
||||
// be at AF=0, it is marginalized over all other alternate alleles which are not necessarily at AF=0).
|
||||
// In the bi-allelic case (where there are no other alternate alleles over which to marginalize),
|
||||
// the value at cell index zero will be equal to AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED.
|
||||
final double[][] log10AlleleFrequencyLikelihoods;
|
||||
final double[][] log10AlleleFrequencyPosteriors;
|
||||
|
||||
// These 2 variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles)
|
||||
double log10LikelihoodOfAFzero = 0.0;
|
||||
double log10PosteriorOfAFzero = 0.0;
|
||||
|
||||
AlleleFrequencyCalculationResult(int maxAltAlleles, int numChr) {
|
||||
log10AlleleFrequencyLikelihoods = new double[maxAltAlleles][numChr+1];
|
||||
log10AlleleFrequencyPosteriors = new double[maxAltAlleles][numChr+1];
|
||||
|
|
|
|||
|
|
@ -407,14 +407,19 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
nonRefAlleles++;
|
||||
}
|
||||
|
||||
// update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs
|
||||
for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
|
||||
int AC = set.ACcounts.getCounts()[i];
|
||||
result.log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyLikelihoods[i][AC], log10LofK);
|
||||
// for k=0, we don't want to put that value into the likelihoods/posteriors matrix, but instead want to set the value in the results object
|
||||
if ( nonRefAlleles == 0 ) {
|
||||
result.log10LikelihoodOfAFzero = log10LofK;
|
||||
result.log10PosteriorOfAFzero = log10LofK + log10AlleleFrequencyPriors[0][0];
|
||||
} else {
|
||||
// update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs
|
||||
for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
|
||||
int AC = set.ACcounts.getCounts()[i];
|
||||
result.log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyLikelihoods[i][AC], log10LofK);
|
||||
|
||||
// for k=0 we still want to use theta
|
||||
final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][AC];
|
||||
result.log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior);
|
||||
final double prior = log10AlleleFrequencyPriors[nonRefAlleles-1][AC];
|
||||
result.log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -75,8 +75,9 @@ public class UnifiedGenotyperEngine {
|
|||
// the model used for calculating p(non-ref)
|
||||
private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>();
|
||||
|
||||
// the allele frequency likelihoods (allocated once as an optimization)
|
||||
// the allele frequency likelihoods and posteriors (allocated once as an optimization)
|
||||
private ThreadLocal<AlleleFrequencyCalculationResult> alleleFrequencyCalculationResult = new ThreadLocal<AlleleFrequencyCalculationResult>();
|
||||
private ThreadLocal<double[]> posteriorsArray = new ThreadLocal<double[]>();
|
||||
|
||||
// because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything
|
||||
private final double[][] log10AlleleFrequencyPriorsSNPs;
|
||||
|
|
@ -289,7 +290,9 @@ public class UnifiedGenotyperEngine {
|
|||
if ( afcm.get() == null ) {
|
||||
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
|
||||
alleleFrequencyCalculationResult.set(new AlleleFrequencyCalculationResult(UAC.MAX_ALTERNATE_ALLELES, N));
|
||||
posteriorsArray.set(new double[N + 2]);
|
||||
}
|
||||
AlleleFrequencyCalculationResult AFresult = alleleFrequencyCalculationResult.get();
|
||||
|
||||
// don't try to genotype too many alternate alleles
|
||||
if ( vc.getAlternateAlleles().size() > UAC.MAX_ALTERNATE_ALLELES ) {
|
||||
|
|
@ -307,24 +310,20 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
|
||||
clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods);
|
||||
clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors);
|
||||
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get());
|
||||
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
|
||||
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
|
||||
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult);
|
||||
|
||||
// is the most likely frequency conformation AC=0 for all alternate alleles?
|
||||
boolean bestGuessIsRef = true;
|
||||
|
||||
// which alternate allele has the highest MLE AC?
|
||||
int indexOfHighestAlt = -1;
|
||||
int alleleCountOfHighestAlt = -1;
|
||||
|
||||
// determine which alternate alleles have AF>0
|
||||
boolean[] altAllelesToUse = new boolean[vc.getAlternateAlleles().size()];
|
||||
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
|
||||
int indexOfBestAC = MathUtils.maxElementIndex(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[i]);
|
||||
int indexOfBestAC = MathUtils.maxElementIndex(AFresult.log10AlleleFrequencyPosteriors[i]);
|
||||
|
||||
// if the most likely AC is not 0, then this is a good alternate allele to use
|
||||
if ( indexOfBestAC != 0 ) {
|
||||
if ( indexOfBestAC != 0 && (AFresult.log10AlleleFrequencyPosteriors[i][0] != AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED || AFresult.log10AlleleFrequencyPosteriors[i][indexOfBestAC] > AFresult.log10PosteriorOfAFzero) ) {
|
||||
altAllelesToUse[i] = true;
|
||||
bestGuessIsRef = false;
|
||||
}
|
||||
|
|
@ -332,35 +331,28 @@ public class UnifiedGenotyperEngine {
|
|||
else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
altAllelesToUse[i] = true;
|
||||
}
|
||||
|
||||
// keep track of the "best" alternate allele to use
|
||||
if ( indexOfBestAC > alleleCountOfHighestAlt) {
|
||||
alleleCountOfHighestAlt = indexOfBestAC;
|
||||
indexOfHighestAlt = i;
|
||||
}
|
||||
}
|
||||
|
||||
// calculate p(f>0)
|
||||
// TODO -- right now we just calculate it for the alt allele with highest AF, but the likelihoods need to be combined correctly over all AFs
|
||||
double[] normalizedPosteriors = MathUtils.normalizeFromLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[indexOfHighestAlt]);
|
||||
double sum = 0.0;
|
||||
for (int i = 1; i <= N; i++)
|
||||
sum += normalizedPosteriors[i];
|
||||
double PofF = Math.min(sum, 1.0); // deal with precision errors
|
||||
// calculate p(f>0):
|
||||
// because the likelihoods are marginalized for each alternate allele, we only need to compare log10PosteriorOfAFzero against any one of them
|
||||
final double[] normalizedPosteriors = generateNormalizedPosteriors(AFresult, posteriorsArray.get());
|
||||
final double PofF = 1.0 - normalizedPosteriors[0];
|
||||
|
||||
double phredScaledConfidence;
|
||||
if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
|
||||
if ( Double.isInfinite(phredScaledConfidence) )
|
||||
phredScaledConfidence = -10.0 * alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0];
|
||||
phredScaledConfidence = -10.0 * AFresult.log10PosteriorOfAFzero;
|
||||
} else {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
|
||||
if ( Double.isInfinite(phredScaledConfidence) ) {
|
||||
sum = 0.0;
|
||||
double sum = AFresult.log10AlleleFrequencyPosteriors[0][0];
|
||||
if ( sum == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
|
||||
sum = 0.0;
|
||||
for (int i = 1; i <= N; i++) {
|
||||
if ( alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
|
||||
if ( AFresult.log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
|
||||
break;
|
||||
sum += alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i];
|
||||
sum += AFresult.log10AlleleFrequencyPosteriors[0][i];
|
||||
}
|
||||
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
|
||||
}
|
||||
|
|
@ -374,7 +366,7 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
// strip out any alleles that aren't going to be used in the VariantContext
|
||||
List<Allele> myAlleles;
|
||||
final List<Allele> myAlleles;
|
||||
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
|
||||
myAlleles = new ArrayList<Allele>(vc.getAlleles().size());
|
||||
myAlleles.add(vc.getReference());
|
||||
|
|
@ -388,8 +380,8 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
// start constructing the resulting VC
|
||||
GenomeLoc loc = genomeLocParser.createGenomeLoc(vc);
|
||||
VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles);
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(vc);
|
||||
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles);
|
||||
builder.log10PError(phredScaledConfidence/-10.0);
|
||||
if ( ! passesCallThreshold(phredScaledConfidence) )
|
||||
builder.filters(filter);
|
||||
|
|
@ -400,14 +392,14 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
// create the genotypes
|
||||
GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse, myAlleles);
|
||||
final GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse, myAlleles);
|
||||
|
||||
// print out stats if we have a writer
|
||||
if ( verboseWriter != null && !limitedContext )
|
||||
printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors, model);
|
||||
|
||||
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
final HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
|
||||
// if the site was downsampled, record that fact
|
||||
if ( !limitedContext && rawContext.hasPileupBeenDownsampled() )
|
||||
|
|
@ -418,31 +410,31 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// the overall lod
|
||||
VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
|
||||
clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods);
|
||||
clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors);
|
||||
afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get());
|
||||
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||
double overallLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1);
|
||||
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
|
||||
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
|
||||
afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult);
|
||||
//double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
|
||||
double overallLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
|
||||
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||
|
||||
// the forward lod
|
||||
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
|
||||
clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods);
|
||||
clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors);
|
||||
afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get());
|
||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||
double forwardLog10PofNull = alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0];
|
||||
double forwardLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1);
|
||||
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
|
||||
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
|
||||
afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult);
|
||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
double forwardLog10PofNull = AFresult.log10PosteriorOfAFzero;
|
||||
double forwardLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
|
||||
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
||||
|
||||
// the reverse lod
|
||||
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
|
||||
clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods);
|
||||
clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors);
|
||||
afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get());
|
||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||
double reverseLog10PofNull = alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0];
|
||||
double reverseLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1);
|
||||
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
|
||||
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
|
||||
afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult);
|
||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
double reverseLog10PofNull = AFresult.log10PosteriorOfAFzero;
|
||||
double reverseLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
|
||||
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
|
||||
|
||||
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
|
||||
|
|
@ -455,7 +447,8 @@ public class UnifiedGenotyperEngine {
|
|||
strandScore *= 10.0;
|
||||
//logger.debug(String.format("SLOD=%f", strandScore));
|
||||
|
||||
attributes.put("SB", strandScore);
|
||||
if ( !Double.isNaN(strandScore) )
|
||||
attributes.put("SB", strandScore);
|
||||
}
|
||||
|
||||
// finish constructing the resulting VC
|
||||
|
|
@ -478,6 +471,12 @@ public class UnifiedGenotyperEngine {
|
|||
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
|
||||
}
|
||||
|
||||
private double[] generateNormalizedPosteriors(AlleleFrequencyCalculationResult AFresult, double[] normalizedPosteriors) {
|
||||
normalizedPosteriors[0] = AFresult.log10PosteriorOfAFzero;
|
||||
System.arraycopy(AFresult.log10AlleleFrequencyPosteriors[0], 0, normalizedPosteriors, 1, normalizedPosteriors.length-1);
|
||||
return MathUtils.normalizeFromLog10(normalizedPosteriors);
|
||||
}
|
||||
|
||||
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
|
||||
Map<String, AlignmentContext> stratifiedContexts = null;
|
||||
|
|
|
|||
|
|
@ -175,7 +175,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
/////////////////////////////
|
||||
// Debug Arguments
|
||||
/////////////////////////////
|
||||
@Hidden
|
||||
@Advanced
|
||||
@Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false)
|
||||
protected Boolean TRUST_ALL_POLYMORPHIC = false;
|
||||
//@Hidden
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.*;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
|
|
@ -42,6 +43,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -140,7 +142,7 @@ import java.util.*;
|
|||
* -R ref.fasta \
|
||||
* -T SelectVariants \
|
||||
* --variant input.vcf \
|
||||
* -family NA12891+NA12892=NA12878 \
|
||||
* -bed family.ped \
|
||||
* -mvq 50 \
|
||||
* -o violations.vcf
|
||||
*
|
||||
|
|
@ -250,16 +252,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
@Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't update the AC, AF, or AN values in the INFO field after selecting", required=false)
|
||||
private boolean KEEP_ORIGINAL_CHR_COUNTS = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="family_structure_file", shortName="familyFile", doc="use -family unless you know what you're doing", required=false)
|
||||
private File FAMILY_STRUCTURE_FILE = null;
|
||||
|
||||
/**
|
||||
* String formatted as dad+mom=child where these parameters determine which sample names are examined.
|
||||
*/
|
||||
@Argument(fullName="family_structure", shortName="family", doc="string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
|
||||
private String FAMILY_STRUCTURE = "";
|
||||
|
||||
/**
|
||||
* This activates the mendelian violation module that will select all variants that correspond to a mendelian violation following the rules given by the family structure.
|
||||
*/
|
||||
|
|
@ -286,13 +278,21 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
private double fractionGenotypes = 0;
|
||||
|
||||
/**
|
||||
* This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria.
|
||||
* This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria.
|
||||
* When specified one or more times, a particular type of variant is selected.
|
||||
*
|
||||
*/
|
||||
*/
|
||||
@Argument(fullName="selectTypeToInclude", shortName="selectType", doc="Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times", required=false)
|
||||
private List<VariantContext.Type> TYPES_TO_INCLUDE = new ArrayList<VariantContext.Type>();
|
||||
|
||||
/**
|
||||
* If provided, we will only include variants whose ID field is present in this list of ids. The matching
|
||||
* is exact string matching. The file format is just one ID per line
|
||||
*
|
||||
*/
|
||||
@Argument(fullName="keepIDs", shortName="IDs", doc="Only emit sites whose ID is found in this file (one ID per line)", required=false)
|
||||
private File rsIDFile = null;
|
||||
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="outMVFile", shortName="outMVFile", doc="", required=false)
|
||||
|
|
@ -313,9 +313,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
}
|
||||
|
||||
public enum NumberAlleleRestriction {
|
||||
ALL,
|
||||
BIALLELIC,
|
||||
MULTIALLELIC
|
||||
ALL,
|
||||
BIALLELIC,
|
||||
MULTIALLELIC
|
||||
}
|
||||
|
||||
private ArrayList<VariantContext.Type> selectedTypes = new ArrayList<VariantContext.Type>();
|
||||
|
|
@ -339,17 +339,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
private int positionToAdd = 0;
|
||||
private RandomVariantStructure [] variantArray;
|
||||
|
||||
|
||||
/* Variables used for random selection with AF boosting */
|
||||
private ArrayList<Double> afBreakpoints = null;
|
||||
private ArrayList<Double> afBoosts = null;
|
||||
double bkDelta = 0.0;
|
||||
|
||||
private PrintStream outMVFileStream = null;
|
||||
|
||||
//Random number generator for the genotypes to remove
|
||||
//Random number generator for the genotypes to remove
|
||||
private Random randomGenotypes = new Random();
|
||||
|
||||
private Set<String> IDsToKeep = null;
|
||||
|
||||
/**
|
||||
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
||||
*/
|
||||
|
|
@ -437,7 +433,18 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + 100.0*fractionRandom + "% of the variants at random from the variant track");
|
||||
|
||||
|
||||
|
||||
/** load in the IDs file to a hashset for matching */
|
||||
if ( rsIDFile != null ) {
|
||||
IDsToKeep = new HashSet<String>();
|
||||
try {
|
||||
for ( final String line : new XReadLines(rsIDFile).readLines() ) {
|
||||
IDsToKeep.add(line.trim());
|
||||
}
|
||||
logger.info("Selecting only variants with one of " + IDsToKeep.size() + " IDs from " + rsIDFile);
|
||||
} catch ( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotReadInputFile(rsIDFile, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -460,20 +467,23 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
}
|
||||
|
||||
for (VariantContext vc : vcs) {
|
||||
if ( IDsToKeep != null && ! IDsToKeep.contains(vc.getID()) )
|
||||
continue;
|
||||
|
||||
if (MENDELIAN_VIOLATIONS && mv.countViolations(this.getSampleDB().getFamilies(samples),vc) < 1)
|
||||
break;
|
||||
break;
|
||||
|
||||
if (outMVFile != null){
|
||||
for( String familyId : mv.getViolationFamilies()){
|
||||
for(Sample sample : this.getSampleDB().getFamily(familyId)){
|
||||
if(sample.getParents().size() > 0){
|
||||
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
|
||||
"childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
|
||||
vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)),
|
||||
sample.getMaternalID(), sample.getPaternalID(), sample.getID(),
|
||||
vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(),
|
||||
vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(),
|
||||
vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() );
|
||||
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
|
||||
"childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
|
||||
vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)),
|
||||
sample.getMaternalID(), sample.getPaternalID(), sample.getID(),
|
||||
vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(),
|
||||
vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(),
|
||||
vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() );
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -483,12 +493,12 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
if (DISCORDANCE_ONLY) {
|
||||
Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation());
|
||||
if (!isDiscordant(vc, compVCs))
|
||||
return 0;
|
||||
continue;
|
||||
}
|
||||
if (CONCORDANCE_ONLY) {
|
||||
Collection<VariantContext> compVCs = tracker.getValues(concordanceTrack, context.getLocation());
|
||||
if (!isConcordant(vc, compVCs))
|
||||
return 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic())
|
||||
|
|
@ -502,21 +512,22 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
|
||||
VariantContext sub = subsetRecord(vc, samples);
|
||||
if ( (sub.isPolymorphicInSamples() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) {
|
||||
boolean failedJexlMatch = false;
|
||||
for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) {
|
||||
if ( !VariantContextUtils.match(sub, jexl) ) {
|
||||
return 0;
|
||||
failedJexlMatch = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (SELECT_RANDOM_NUMBER) {
|
||||
randomlyAddVariant(++variantNumber, sub);
|
||||
if ( !failedJexlMatch ) {
|
||||
if (SELECT_RANDOM_NUMBER) {
|
||||
randomlyAddVariant(++variantNumber, sub);
|
||||
}
|
||||
else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
|
||||
vcfWriter.add(sub);
|
||||
}
|
||||
}
|
||||
else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
|
||||
vcfWriter.add(sub);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
|
@ -647,7 +658,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
|
||||
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate)
|
||||
if ( vc.getAlleles().size() != sub.getAlleles().size() )
|
||||
newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
|
||||
newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
|
||||
|
||||
//Remove a fraction of the genotypes if needed
|
||||
if(fractionGenotypes>0){
|
||||
|
|
@ -655,10 +666,10 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
for ( Genotype genotype : newGC ) {
|
||||
//Set genotype to no call if it falls in the fraction.
|
||||
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
||||
alleles.add(Allele.create((byte)'.'));
|
||||
alleles.add(Allele.create((byte)'.'));
|
||||
genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap<String, Object>(),false));
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
||||
alleles.add(Allele.create((byte)'.'));
|
||||
alleles.add(Allele.create((byte)'.'));
|
||||
genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap<String, Object>(),false));
|
||||
}
|
||||
else{
|
||||
genotypes.add(genotype);
|
||||
|
|
|
|||
|
|
@ -127,12 +127,12 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
* multi-allelic INFO field values can be lists of values.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false)
|
||||
public boolean keepMultiAllelic = false;
|
||||
@Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false)
|
||||
public boolean keepMultiAllelic = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="logACSum", shortName="logACSum", doc="Log sum of AC instead of max value in case of multiallelic variants", required=false)
|
||||
public boolean logACSum = false;
|
||||
public boolean logACSum = false;
|
||||
|
||||
/**
|
||||
* By default, this tool throws a UserException when it encounters a field without a value in some record. This
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ public class RScriptExecutor {
|
|||
|
||||
List<File> tempFiles = new ArrayList<File>();
|
||||
try {
|
||||
File tempLibDir = IOUtils.tempDir("R.", ".lib");
|
||||
File tempLibDir = IOUtils.tempDir("Rlib.", "");
|
||||
tempFiles.add(tempLibDir);
|
||||
|
||||
StringBuilder expression = new StringBuilder("tempLibDir = '").append(tempLibDir).append("';");
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.utils.clipreads;
|
||||
package org.broadinstitute.sting.utils.clipping;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.Cigar;
|
||||
|
|
@ -460,17 +460,20 @@ public class ClippingOp {
|
|||
int newShift = 0;
|
||||
int oldShift = 0;
|
||||
|
||||
boolean readHasStarted = false; // if the new cigar is composed of S and H only, we have to traverse the entire old cigar to calculate the shift
|
||||
for (CigarElement cigarElement : newCigar.getCigarElements()) {
|
||||
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP)
|
||||
newShift += cigarElement.getLength();
|
||||
else
|
||||
else {
|
||||
readHasStarted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
|
||||
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP )
|
||||
oldShift += Math.min(cigarElement.getLength(), newShift - oldShift);
|
||||
else
|
||||
else if (readHasStarted)
|
||||
break;
|
||||
}
|
||||
return newShift - oldShift;
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.utils.clipreads;
|
||||
package org.broadinstitute.sting.utils.clipping;
|
||||
|
||||
/**
|
||||
* How should we represent a clipped bases in a read?
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.utils.clipreads;
|
||||
package org.broadinstitute.sting.utils.clipping;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.CigarElement;
|
||||
|
|
@ -63,12 +63,18 @@ public class ReadClipper {
|
|||
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
|
||||
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||
|
||||
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
|
||||
return new GATKSAMRecord(read.getHeader());
|
||||
|
||||
if (start < 0 || stop > read.getReadLength() - 1)
|
||||
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
||||
|
||||
if ( start > stop )
|
||||
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
|
||||
|
||||
if ( start > 0 && stop < read.getReadLength() - 1)
|
||||
throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString()));
|
||||
|
||||
this.addOp(new ClippingOp(start, stop));
|
||||
GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
this.ops = null;
|
||||
|
|
@ -76,6 +82,9 @@ public class ReadClipper {
|
|||
}
|
||||
|
||||
public GATKSAMRecord hardClipByReadCoordinates(int start, int stop) {
|
||||
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
|
||||
return new GATKSAMRecord(read.getHeader());
|
||||
|
||||
this.addOp(new ClippingOp(start, stop));
|
||||
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
}
|
||||
|
|
@ -195,16 +204,12 @@ public class ReadClipper {
|
|||
|
||||
for(CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||
if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP &&
|
||||
cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.DELETION)
|
||||
cigarElement.getOperator() != CigarOperator.INSERTION)
|
||||
break;
|
||||
|
||||
else if (cigarElement.getOperator() == CigarOperator.INSERTION) {
|
||||
else if (cigarElement.getOperator() == CigarOperator.INSERTION)
|
||||
this.addOp(new ClippingOp(0, cigarElement.getLength() - 1));
|
||||
}
|
||||
|
||||
else if (cigarElement.getOperator() == CigarOperator.DELETION) {
|
||||
throw new ReviewedStingException("No read should start with a deletion. Aligner bug?");
|
||||
}
|
||||
}
|
||||
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
}
|
||||
|
|
@ -79,7 +79,7 @@ public class IOUtils {
|
|||
tempDirParent = FileUtils.getTempDirectory();
|
||||
if (!tempDirParent.exists() && !tempDirParent.mkdirs())
|
||||
throw new UserException.BadTmpDir("Could not create temp directory: " + tempDirParent);
|
||||
File temp = File.createTempFile(prefix + "-", suffix, tempDirParent);
|
||||
File temp = File.createTempFile(prefix, suffix, tempDirParent);
|
||||
if (!temp.delete())
|
||||
throw new UserException.BadTmpDir("Could not delete sub file: " + temp.getAbsolutePath());
|
||||
if (!temp.mkdir())
|
||||
|
|
|
|||
|
|
@ -619,8 +619,9 @@ public class VariantContextUtils {
|
|||
if (vc.alleles.size() == 1)
|
||||
continue;
|
||||
if ( hasPLIncompatibleAlleles(alleles, vc.alleles)) {
|
||||
logger.warn(String.format("Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s",
|
||||
genomeLocParser.createGenomeLoc(vc), alleles, vc.alleles));
|
||||
if ( ! genotypes.isEmpty() )
|
||||
logger.warn(String.format("Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s",
|
||||
genomeLocParser.createGenomeLoc(vc), alleles, vc.alleles));
|
||||
genotypes = stripPLs(genotypes);
|
||||
// this will remove stale AC,AF attributed from vc
|
||||
calculateChromosomeCounts(vc, attributes, true);
|
||||
|
|
|
|||
|
|
@ -1,18 +1,12 @@
|
|||
package org.broadinstitute.sting;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.log4j.*;
|
||||
import org.apache.log4j.spi.LoggingEvent;
|
||||
import org.broadinstitute.sting.commandline.CommandLineUtils;
|
||||
import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.utils.io.IOUtils;
|
||||
|
||||
import javax.swing.*;
|
||||
import java.io.*;
|
||||
import java.math.BigInteger;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -78,8 +72,8 @@ public abstract class BaseTest {
|
|||
public static final String hg19Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list";
|
||||
public static final String hg19Chr20Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.chr20.interval_list";
|
||||
|
||||
public static final String networkTempDir = "/broad/shptmp/" + System.getProperty("user.name") + "/";
|
||||
public static final File networkTempDirFile = new File(networkTempDir);
|
||||
public static final String networkTempDir;
|
||||
public static final File networkTempDirFile;
|
||||
|
||||
public static final File testDirFile = new File("public/testdata/");
|
||||
public static final String testDir = testDirFile.getAbsolutePath() + "/";
|
||||
|
|
@ -99,6 +93,10 @@ public abstract class BaseTest {
|
|||
// Set the Root logger to only output warnings.
|
||||
logger.setLevel(Level.WARN);
|
||||
|
||||
networkTempDirFile = IOUtils.tempDir("temp.", ".dir", new File("/broad/shptmp/" + System.getProperty("user.name")));
|
||||
networkTempDirFile.deleteOnExit();
|
||||
networkTempDir = networkTempDirFile.getAbsolutePath() + "/";
|
||||
|
||||
// find our file sources
|
||||
// if (!fileExist(hg18Reference) || !fileExist(hg19Reference) || !fileExist(b36KGReference)) {
|
||||
// logger.fatal("We can't locate the reference directories. Aborting!");
|
||||
|
|
@ -233,18 +231,12 @@ public abstract class BaseTest {
|
|||
|
||||
/**
|
||||
* Creates a temp file that will be deleted on exit after tests are complete.
|
||||
* @param name Prefix of the file.
|
||||
* @param extension Extension to concat to the end of the file.
|
||||
* @return A file in the network temporary directory starting with name, ending with extension, which will be deleted after the program exits.
|
||||
* @param name Name of the file.
|
||||
* @return A file in the network temporary directory with name, which will be deleted after the program exits.
|
||||
*/
|
||||
public static File createNetworkTempFile(String name, String extension) {
|
||||
try {
|
||||
FileUtils.forceMkdir(networkTempDirFile);
|
||||
File file = File.createTempFile(name, extension, networkTempDirFile);
|
||||
file.deleteOnExit();
|
||||
return file;
|
||||
} catch (IOException ex) {
|
||||
throw new ReviewedStingException("Cannot create temp file: " + ex.getMessage(), ex);
|
||||
}
|
||||
public static File createNetworkTempFile(String name) {
|
||||
File file = new File(networkTempDirFile, name);
|
||||
file.deleteOnExit();
|
||||
return file;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -171,7 +171,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testTDTAnnotation() {
|
||||
final String MD5 = "9fe37b61aab695ad47ce3c587148e91f";
|
||||
final String MD5 = "204e67536a17af7eaa6bf0a910818997";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" +
|
||||
" -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1,
|
||||
|
|
|
|||
|
|
@ -97,7 +97,12 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
|
||||
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
|
||||
int calculatedAlleleCount = MathUtils.maxElementIndex(result.log10AlleleFrequencyPosteriors[allele]);
|
||||
Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
|
||||
|
||||
if ( result.log10AlleleFrequencyPosteriors[0][0] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) {
|
||||
Assert.assertTrue(calculatedAlleleCount == expectedAlleleCount || result.log10AlleleFrequencyPosteriors[0][calculatedAlleleCount] < result.log10PosteriorOfAFzero);
|
||||
} else {
|
||||
Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testCallingParameters() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "--min_base_quality_score 26", "7acb1a5aee5fdadb0cc0ea07a212efc6" );
|
||||
e.put( "--computeSLOD", "e9d23a08472e4e27b4f25e844f5bad57" );
|
||||
e.put( "--computeSLOD", "6172d2f3d370132f4c57a26aa94c256e" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -129,8 +129,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testOutputParameter() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" );
|
||||
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "274bd9d1b9c7857690fa5f0228ffc6d7" );
|
||||
e.put( "--output_mode EMIT_ALL_SITES", "594c6d3c48bbc73289de7727d768644d" );
|
||||
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "d971d392956aea69c3707da64d24485b" );
|
||||
e.put( "--output_mode EMIT_ALL_SITES", "21993e71ca1a06a0d1f11d58e3cc26c3" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -285,7 +285,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
|
||||
Arrays.asList("98a4d1e1e0a363ba37518563ac6cbead"));
|
||||
Arrays.asList("8bd5028cf294850b8a95b3c0af23d728"));
|
||||
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
|
||||
}
|
||||
|
||||
|
|
@ -294,7 +294,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
|
||||
Arrays.asList("915e7a3e7cbfd995dbc41fdd382d0d51"));
|
||||
Arrays.asList("814dcd66950635a870602d90a1618cce"));
|
||||
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -116,6 +116,19 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
executeTest("testUsingDbsnpName--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleRecordsAtOnePosition() {
|
||||
String testFile = validationDataLocation + "selectVariants.onePosition.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -select 'KG_FREQ < 0.5' --variant " + testFile + " -o %s -NO_HEADER",
|
||||
1,
|
||||
Arrays.asList("20b52c96f5c48258494d072752b53693")
|
||||
);
|
||||
|
||||
executeTest("testMultipleRecordsAtOnePositionFirstIsFiltered--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParallelization() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ public class JnaSessionIntegrationTest extends BaseTest {
|
|||
return;
|
||||
}
|
||||
|
||||
File outFile = createNetworkTempFile("JnaSessionIntegrationTest-", ".out");
|
||||
File outFile = createNetworkTempFile("JnaSessionIntegrationTest.out");
|
||||
Session session = factory.getSession();
|
||||
session.init(null);
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ public class LibDrmaaIntegrationTest extends BaseTest {
|
|||
|
||||
@Test(dependsOnMethods = { "testDrmaa" })
|
||||
public void testSubmitEcho() throws Exception {
|
||||
if (implementation.indexOf("LSF") >= 0) {
|
||||
if (implementation.contains("LSF")) {
|
||||
System.err.println(" *********************************************************");
|
||||
System.err.println(" ***********************************************************");
|
||||
System.err.println(" **** ****");
|
||||
|
|
@ -101,7 +101,7 @@ public class LibDrmaaIntegrationTest extends BaseTest {
|
|||
Memory error = new Memory(LibDrmaa.DRMAA_ERROR_STRING_BUFFER);
|
||||
int errnum;
|
||||
|
||||
File outFile = createNetworkTempFile("LibDrmaaIntegrationTest-", ".out");
|
||||
File outFile = createNetworkTempFile("LibDrmaaIntegrationTest.out");
|
||||
|
||||
errnum = LibDrmaa.drmaa_init(null, error, LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN);
|
||||
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ public class LibBatIntegrationTest extends BaseTest {
|
|||
@Test
|
||||
public void testSubmitEcho() throws Exception {
|
||||
String queue = "hour";
|
||||
File outFile = createNetworkTempFile("LibBatIntegrationTest-", ".out");
|
||||
File outFile = createNetworkTempFile("LibBatIntegrationTest.out");
|
||||
|
||||
submit req = new submit();
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.utils.clipreads;
|
||||
package org.broadinstitute.sting.utils.clipping;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
|
|
@ -8,7 +8,9 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
|||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Stack;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -17,7 +19,7 @@ import java.util.*;
|
|||
* Time: 6:45 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ClipReadsTestUtils {
|
||||
public class ReadClipperTestUtils {
|
||||
//Should contain all the utils needed for tests to mass produce
|
||||
//reads, cigars, and other needed classes
|
||||
|
||||
|
|
@ -176,4 +178,16 @@ public class ClipReadsTestUtils {
|
|||
Assert.assertEquals(actual.isEmpty(), expected.isEmpty());
|
||||
}
|
||||
|
||||
public static Cigar invertCigar (Cigar cigar) {
|
||||
Stack<CigarElement> cigarStack = new Stack<CigarElement>();
|
||||
for (CigarElement cigarElement : cigar.getCigarElements())
|
||||
cigarStack.push(cigarElement);
|
||||
|
||||
Cigar invertedCigar = new Cigar();
|
||||
while (!cigarStack.isEmpty())
|
||||
invertedCigar.add(cigarStack.pop());
|
||||
|
||||
return invertedCigar;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -23,7 +23,7 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.clipreads;
|
||||
package org.broadinstitute.sting.utils.clipping;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
|
|
@ -32,75 +32,119 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: roger
|
||||
* Date: 9/28/11
|
||||
* Time: 9:54 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadClipperUnitTest extends BaseTest {
|
||||
|
||||
// TODO: exception testing, make cases that should fail will fail
|
||||
|
||||
// TODO: add indels to all test cases
|
||||
|
||||
List<Cigar> cigarList;
|
||||
int maximumCigarSize = 10;
|
||||
int maximumCigarSize = 6; // 6 is the minimum necessary number to try all combinations of cigar types with guarantee of clipping an element with length = 2
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
cigarList = ClipReadsTestUtils.generateCigarList(maximumCigarSize);
|
||||
cigarList = ReadClipperTestUtils.generateCigarList(maximumCigarSize);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHardClipBothEndsByReferenceCoordinates() {
|
||||
logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
|
||||
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
int alnStart = read.getAlignmentStart();
|
||||
int alnEnd = read.getAlignmentEnd();
|
||||
int readLength = alnStart - alnEnd;
|
||||
for (int i=0; i<readLength/2; i++) {
|
||||
GATKSAMRecord clippedRead = (new ReadClipper(read)).hardClipBothEndsByReferenceCoordinates(alnStart + i, alnEnd - i);
|
||||
Assert.assertTrue(clippedRead.getAlignmentStart() >= alnStart + i, String.format("Clipped alignment start is less than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
|
||||
Assert.assertTrue(clippedRead.getAlignmentEnd() <= alnEnd + i, String.format("Clipped alignment end is greater than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHardClipByReadCoordinates() {
|
||||
logger.warn("Executing testHardClipByReadCoordinates");
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
int readLength = read.getReadLength();
|
||||
for (int i=0; i<readLength; i++) {
|
||||
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReadCoordinates(0, i);
|
||||
Assert.assertTrue(clipLeft.getReadLength() <= readLength - i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipLeft.getCigarString()));
|
||||
|
||||
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReadCoordinates(i, readLength-1);
|
||||
Assert.assertTrue(clipRight.getReadLength() <= i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipRight.getCigarString()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHardClipByReferenceCoordinates() {
|
||||
logger.warn("Executing testHardClipByReferenceCoordinates");
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
int alnStart = read.getAlignmentStart();
|
||||
int alnEnd = read.getAlignmentEnd();
|
||||
for (int i=alnStart; i<=alnEnd; i++) {
|
||||
if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
||||
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i);
|
||||
if (!clipLeft.isEmpty())
|
||||
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
||||
}
|
||||
|
||||
if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
||||
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd);
|
||||
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
||||
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHardClipByReferenceCoordinatesLeftTail() {
|
||||
logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail");
|
||||
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
int alnStart = read.getAlignmentStart();
|
||||
int alnEnd = read.getAlignmentEnd();
|
||||
if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
||||
for (int i=alnStart; i<=alnEnd; i++) {
|
||||
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i);
|
||||
if (!clipLeft.isEmpty())
|
||||
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHardClipByReferenceCoordinatesRightTail() {
|
||||
init();
|
||||
logger.warn("Executing testHardClipByReferenceCoordinatesRightTail");
|
||||
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
int alnStart = read.getAlignmentStart();
|
||||
int alnEnd = read.getAlignmentEnd();
|
||||
if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
||||
for (int i=alnStart; i<=alnEnd; i++) {
|
||||
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd);
|
||||
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
||||
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHardClipLowQualEnds() {
|
||||
logger.warn("Executing testHardClipLowQualEnds");
|
||||
|
||||
final byte LOW_QUAL = 2;
|
||||
final byte HIGH_QUAL = 30;
|
||||
|
||||
// create a read for every cigar permutation
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
int readLength = read.getReadLength();
|
||||
byte [] quals = new byte[readLength];
|
||||
|
||||
|
|
@ -116,7 +160,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
// Tests
|
||||
|
||||
// Make sure the low qualities are gone
|
||||
testNoLowQualBases(clipLeft, LOW_QUAL);
|
||||
assertNoLowQualBases(clipLeft, LOW_QUAL);
|
||||
|
||||
// Can't run this test with the current contract of no hanging insertions
|
||||
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
|
||||
|
|
@ -131,7 +175,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
// Tests
|
||||
|
||||
// Make sure the low qualities are gone
|
||||
testNoLowQualBases(clipRight, LOW_QUAL);
|
||||
assertNoLowQualBases(clipRight, LOW_QUAL);
|
||||
|
||||
// Make sure we haven't clipped any high quals -- Can't run this test with the current contract of no hanging insertions
|
||||
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString()));
|
||||
|
|
@ -149,7 +193,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
// Tests
|
||||
|
||||
// Make sure the low qualities are gone
|
||||
testNoLowQualBases(clipBoth, LOW_QUAL);
|
||||
assertNoLowQualBases(clipBoth, LOW_QUAL);
|
||||
|
||||
// Can't run this test with the current contract of no hanging insertions
|
||||
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2*nLowQualBases), read.getCigarString(), clipBoth.getCigarString()));
|
||||
|
|
@ -158,11 +202,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
// logger.warn(String.format("Testing %s for all combinations of low/high qual... PASSED", read.getCigarString()));
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// ONE OFF Testing clipping that ends inside an insertion
|
||||
// ONE OFF Testing clipping that ends inside an insertion ( Ryan's bug )
|
||||
final byte[] BASES = {'A','C','G','T','A','C','G','T'};
|
||||
final byte[] QUALS = {2, 2, 2, 2, 20, 20, 20, 2};
|
||||
final String CIGAR = "1S1M5I1S";
|
||||
|
|
@ -176,15 +216,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
GATKSAMRecord expected = ArtificialSAMUtils.createArtificialRead(CLIPPED_BASES, CLIPPED_QUALS, CLIPPED_CIGAR);
|
||||
|
||||
ReadClipper lowQualClipper = new ReadClipper(read);
|
||||
ClipReadsTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected);
|
||||
}
|
||||
|
||||
private void testNoLowQualBases(GATKSAMRecord read, byte low_qual) {
|
||||
if (!read.isEmpty()) {
|
||||
byte [] quals = read.getBaseQualities();
|
||||
for (int i=0; i<quals.length; i++)
|
||||
Assert.assertFalse(quals[i] <= low_qual, String.format("Found low qual (%d) base after hard clipping. Position: %d -- %s", low_qual, i, read.getCigarString()));
|
||||
}
|
||||
ReadClipperTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
@ -192,7 +224,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
|
||||
// Generate a list of cigars to test
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
ReadClipper readClipper = new ReadClipper(read);
|
||||
GATKSAMRecord clippedRead = readClipper.hardClipSoftClippedBases();
|
||||
|
||||
|
|
@ -239,4 +271,56 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
// logger.warn(String.format("Cigar %s -> %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString()));
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testHardClipLeadingInsertions() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
if (startsWithInsertion(cigar)) {
|
||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||
GATKSAMRecord clippedRead = (new ReadClipper(read)).hardClipLeadingInsertions();
|
||||
|
||||
int expectedLength = read.getReadLength() - leadingInsertionLength(read.getCigar());
|
||||
if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar()))
|
||||
expectedLength -= leadingInsertionLength(ReadClipperTestUtils.invertCigar(read.getCigar()));
|
||||
|
||||
if (! clippedRead.isEmpty()) {
|
||||
Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there
|
||||
Assert.assertFalse(startsWithInsertion(clippedRead.getCigar())); // check that the insertions are gone
|
||||
}
|
||||
else
|
||||
Assert.assertTrue(expectedLength == 0, String.format("expected length: %d", expectedLength)); // check that the read was expected to be fully clipped
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) {
|
||||
if (!read.isEmpty()) {
|
||||
byte [] quals = read.getBaseQualities();
|
||||
for (int i=0; i<quals.length; i++)
|
||||
Assert.assertFalse(quals[i] <= low_qual, String.format("Found low qual (%d) base after hard clipping. Position: %d -- %s", low_qual, i, read.getCigarString()));
|
||||
}
|
||||
}
|
||||
|
||||
private boolean startsWithInsertion(Cigar cigar) {
|
||||
return leadingInsertionLength(cigar) > 0;
|
||||
}
|
||||
|
||||
private int leadingInsertionLength(Cigar cigar) {
|
||||
for (CigarElement cigarElement : cigar.getCigarElements()) {
|
||||
if (cigarElement.getOperator() == CigarOperator.INSERTION)
|
||||
return cigarElement.getLength();
|
||||
if (cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private boolean cigarHasElementsDifferentThanInsertionsAndHardClips (Cigar cigar) {
|
||||
for (CigarElement cigarElement : cigar.getCigarElements())
|
||||
if (cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,67 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.clipreads;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.annotations.BeforeTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: roger
|
||||
* Date: 11/27/11
|
||||
* Time: 5:17 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ClippingOpUnitTest extends BaseTest {
|
||||
|
||||
ClippingOp clippingOp;
|
||||
GATKSAMRecord read;
|
||||
|
||||
@BeforeTest
|
||||
public void init() {
|
||||
read = ClipReadsTestUtils.makeRead();
|
||||
}
|
||||
|
||||
@Test
|
||||
private void testHardClip() {
|
||||
// List<TestParameter> testList = new LinkedList<TestParameter>();
|
||||
// testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start
|
||||
// testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end
|
||||
// testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start
|
||||
// testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end
|
||||
// testList.add(new TestParameter(0, 2, 3, 4, "3H1M"));//clip 3 bases at start
|
||||
// testList.add(new TestParameter(1, 3, 0, 1, "1M3H"));//clip 3 bases at end
|
||||
//
|
||||
// for (TestParameter p : testList) {
|
||||
// init();
|
||||
// clippingOp = new ClippingOp(p.inputStart, p.inputStop);
|
||||
// logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.substringStart + "," + p.substringStop + "," + p.cigar);
|
||||
// ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.HARDCLIP_BASES, read),
|
||||
// ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
|
||||
// ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
|
||||
// p.cigar);
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
private void testSoftClip() {
|
||||
// List<TestParameter> testList = new LinkedList<TestParameter>();
|
||||
// testList.add(new TestParameter(0, 0, -1, -1, "1S3M"));//clip 1 base at start
|
||||
// testList.add(new TestParameter(3, 3, -1, -1, "3M1S"));//clip 1 base at end
|
||||
// testList.add(new TestParameter(0, 1, -1, -1, "2S2M"));//clip 2 bases at start
|
||||
// testList.add(new TestParameter(2, 3, -1, -1, "2M2S"));//clip 2 bases at end
|
||||
// testList.add(new TestParameter(0, 2, -1, -1, "3S1M"));//clip 3 bases at start
|
||||
// testList.add(new TestParameter(1, 3, -1, -1, "1M3S"));//clip 3 bases at end
|
||||
//
|
||||
// for (TestParameter p : testList) {
|
||||
// init();
|
||||
// clippingOp = new ClippingOp(p.inputStart, p.inputStop);
|
||||
// logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.cigar);
|
||||
// ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.SOFTCLIP_BASES, read),
|
||||
// ClipReadsTestUtils.BASES.getBytes(), ClipReadsTestUtils.QUALS.getBytes(), p.cigar);
|
||||
// }
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -19,7 +19,7 @@ class ExampleCountLoci extends QScript {
|
|||
@Output
|
||||
var out: File = _
|
||||
|
||||
def script = {
|
||||
def script() {
|
||||
val countLoci = new CountLoci
|
||||
countLoci.reference_sequence = referenceFile
|
||||
countLoci.input_file = bamFiles
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ class ExampleCountReads extends QScript {
|
|||
/**
|
||||
* In script, you create and then add() functions to the pipeline.
|
||||
*/
|
||||
def script = {
|
||||
def script() {
|
||||
|
||||
// Run CountReads for all bams jointly.
|
||||
|
||||
|
|
@ -41,6 +41,9 @@ class ExampleCountReads extends QScript {
|
|||
// matches the full form of the argument, but will actually be a scala List[]
|
||||
jointCountReads.input_file = bamFiles
|
||||
|
||||
// Set the memory limit. Also acts as a memory request on LSF and GridEngine.
|
||||
jointCountReads.memoryLimit = 1
|
||||
|
||||
// Add the newly created function to the pipeline.
|
||||
add(jointCountReads)
|
||||
|
||||
|
|
@ -51,6 +54,7 @@ class ExampleCountReads extends QScript {
|
|||
singleCountReads.reference_sequence = referenceFile
|
||||
// ':+' is the scala List append operator
|
||||
singleCountReads.input_file :+= bamFile
|
||||
singleCountReads.memoryLimit = 1
|
||||
add(singleCountReads)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ class ExampleCustomWalker extends QScript {
|
|||
/**
|
||||
* In script, you create and then add() functions to the pipeline.
|
||||
*/
|
||||
def script = {
|
||||
def script() {
|
||||
val customWalker = new CommandLineGATK {
|
||||
// Set the name of your walker, for example this will be passed as -T MyCustomWalker
|
||||
this.analysis_type = "MyCustomWalker"
|
||||
|
|
|
|||
|
|
@ -33,7 +33,6 @@ class ExampleUnifiedGenotyper extends QScript {
|
|||
@Argument(doc="An optional list of filter expressions.", shortName="filterExpression", required=false)
|
||||
var filterExpressions: List[String] = Nil
|
||||
|
||||
|
||||
// This trait allows us set the variables below in one place,
|
||||
// and then reuse this trait on each CommandLineGATK function below.
|
||||
trait UnifiedGenotyperArguments extends CommandLineGATK {
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ class QCommandLine extends CommandLineProgram with Logging {
|
|||
private var shuttingDown = false
|
||||
|
||||
private lazy val pluginManager = {
|
||||
qScriptClasses = IOUtils.tempDir("Q-Classes", "", settings.qSettings.tempDirectory)
|
||||
qScriptClasses = IOUtils.tempDir("Q-Classes-", "", settings.qSettings.tempDirectory)
|
||||
qScriptManager.loadScripts(scripts, qScriptClasses)
|
||||
new PluginManager[QScript](classOf[QScript], List(qScriptClasses.toURI.toURL))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -81,6 +81,10 @@ trait QJobReport extends Logging {
|
|||
this.reportFeatures = features.mapValues(_.toString)
|
||||
}
|
||||
|
||||
def addJobReportBinding(key: String, value: Any) {
|
||||
this.reportFeatures += (key -> value.toString)
|
||||
}
|
||||
|
||||
// copy the QJobReport information -- todo : what's the best way to do this?
|
||||
override def copySettingsTo(function: QFunction) {
|
||||
self.copySettingsTo(function)
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import org.broadinstitute.sting.BaseTest
|
|||
|
||||
class ExampleCountLociPipelineTest {
|
||||
@Test
|
||||
def testCountLoci {
|
||||
def testCountLoci() {
|
||||
val testOut = "count.out"
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "countloci"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.pipeline.examples
|
||||
|
||||
import org.testng.annotations.Test
|
||||
import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
|
||||
import org.broadinstitute.sting.BaseTest
|
||||
|
||||
class ExampleCountReadsPipelineTest {
|
||||
@Test
|
||||
def testCountReads() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "countreads"
|
||||
spec.args = Array(
|
||||
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountReads.scala",
|
||||
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
|
||||
" -I " + BaseTest.testDir + "exampleBAM.bam").mkString
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.pipeline.examples
|
||||
|
||||
import org.testng.annotations.Test
|
||||
import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
|
||||
import org.broadinstitute.sting.BaseTest
|
||||
|
||||
class ExampleUnifiedGenotyperPipelineTest {
|
||||
@Test
|
||||
def testUnifiedGenotyper() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "unifiedgenotyper"
|
||||
spec.args = Array(
|
||||
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala",
|
||||
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
|
||||
" -I " + BaseTest.testDir + "exampleBAM.bam",
|
||||
" -filter QD",
|
||||
" -filterExpression 'QD < 2.0'").mkString
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue