Merge branch 'master' of github.com:broadinstitute/gsa-unstable
This commit is contained in:
commit
d07f3d4a5a
32
build.xml
32
build.xml
|
|
@ -327,14 +327,18 @@
|
|||
|
||||
|
||||
<!-- INIT OVERRIDES: call these targets BEFORE init to override build defaults -->
|
||||
<target name="init.publiconly">
|
||||
<target name="init.build.publiconly">
|
||||
<property name="build.target" value="public" />
|
||||
</target>
|
||||
|
||||
<target name="init.publicprotectedonly">
|
||||
<target name="init.build.publicprotectedonly">
|
||||
<property name="build.target" value="protected" />
|
||||
</target>
|
||||
|
||||
<target name="init.build.all">
|
||||
<property name="build.target" value="all" />
|
||||
</target>
|
||||
|
||||
<target name="init.javaonly">
|
||||
<property name="compile.scala" value="false" />
|
||||
</target>
|
||||
|
|
@ -842,19 +846,23 @@
|
|||
<!-- Release-related tasks -->
|
||||
<!-- ******************************************************************************** -->
|
||||
|
||||
<target name="init.buildgatkfull" depends="init.publicprotectedonly, init.javaonly">
|
||||
<target name="init.executable.gatkfull" depends="init.build.publicprotectedonly, init.javaonly">
|
||||
<property name="executable" value="GenomeAnalysisTK" />
|
||||
</target>
|
||||
|
||||
<target name="init.buildgatklite" depends="init.publiconly, init.javaonly">
|
||||
<target name="init.executable.gatklite" depends="init.build.publiconly, init.javaonly">
|
||||
<property name="executable" value="GenomeAnalysisTKLite" />
|
||||
</target>
|
||||
|
||||
<target name="init.buildqueuefull" depends="init.publicprotectedonly, init.javaandscala">
|
||||
<target name="init.executable.queueall" depends="init.build.all, init.javaandscala">
|
||||
<property name="executable" value="Queue" />
|
||||
</target>
|
||||
|
||||
<target name="init.buildqueuelite" depends="init.publiconly, init.javaandscala">
|
||||
<target name="init.executable.queuefull" depends="init.build.publicprotectedonly, init.javaandscala">
|
||||
<property name="executable" value="Queue" />
|
||||
</target>
|
||||
|
||||
<target name="init.executable.queuelite" depends="init.build.publiconly, init.javaandscala">
|
||||
<property name="executable" value="QueueLite" />
|
||||
</target>
|
||||
|
||||
|
|
@ -906,13 +914,15 @@
|
|||
</target>
|
||||
|
||||
<!-- Package specific versions of the GATK/Queue. ALWAYS do an ant clean before invoking these! -->
|
||||
<target name="package.gatk.full" depends="init.buildgatkfull,package" />
|
||||
<target name="package.gatk.full" depends="init.executable.gatkfull,package" />
|
||||
|
||||
<target name="package.gatk.lite" depends="init.buildgatklite,package" />
|
||||
<target name="package.gatk.lite" depends="init.executable.gatklite,package" />
|
||||
|
||||
<target name="package.queue.full" depends="init.buildqueuefull,package" />
|
||||
<target name="package.queue.all" depends="init.executable.queueall,package" />
|
||||
|
||||
<target name="package.queue.lite" depends="init.buildqueuelite,package" />
|
||||
<target name="package.queue.full" depends="init.executable.queuefull,package" />
|
||||
|
||||
<target name="package.queue.lite" depends="init.executable.queuelite,package" />
|
||||
|
||||
|
||||
<!-- Release a build. Don't call this target directly. Call one of the specific release targets below -->
|
||||
|
|
@ -975,6 +985,8 @@
|
|||
|
||||
<target name="mvninstall.gatk.lite" depends="package.gatk.lite,mvninstall" />
|
||||
|
||||
<target name="mvninstall.queue.all" depends="package.queue.all,mvninstall" />
|
||||
|
||||
<target name="mvninstall.queue.full" depends="package.queue.full,mvninstall" />
|
||||
|
||||
<target name="mvninstall.queue.lite" depends="package.queue.lite,mvninstall" />
|
||||
|
|
|
|||
|
|
@ -0,0 +1,59 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLocComparator;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
* A stash of regions that must be kept uncompressed in all samples
|
||||
*
|
||||
* In general, these are regions that were kept uncompressed by a tumor sample and we want to force
|
||||
* all other samples (normals and/or tumors) to also keep these regions uncompressed
|
||||
*
|
||||
* User: carneiro
|
||||
* Date: 10/15/12
|
||||
* Time: 4:08 PM
|
||||
*/
|
||||
public class CompressionStash extends TreeSet<SimpleGenomeLoc> {
|
||||
public CompressionStash() {
|
||||
super(new GenomeLocComparator());
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a SimpleGenomeLoc to the stash and merges it with any overlapping (and contiguous) existing loc
|
||||
* in the stash.
|
||||
*
|
||||
* @param insertLoc the new loc to be inserted
|
||||
* @return true if the loc, or it's merged version, wasn't present in the list before.
|
||||
*/
|
||||
@Override
|
||||
public boolean add(SimpleGenomeLoc insertLoc) {
|
||||
TreeSet<SimpleGenomeLoc> removedLocs = new TreeSet<SimpleGenomeLoc>();
|
||||
for (SimpleGenomeLoc existingLoc : this) {
|
||||
if (existingLoc.isPast(insertLoc)) {
|
||||
break; // if we're past the loc we're done looking for overlaps.
|
||||
}
|
||||
if (existingLoc.equals(insertLoc)) {
|
||||
return false; // if this loc was already present in the stash, we don't need to insert it.
|
||||
}
|
||||
if (existingLoc.contiguousP(insertLoc)) {
|
||||
removedLocs.add(existingLoc); // list the original loc for merging
|
||||
}
|
||||
}
|
||||
for (SimpleGenomeLoc loc : removedLocs) {
|
||||
this.remove(loc); // remove all locs that will be merged
|
||||
}
|
||||
removedLocs.add(insertLoc); // add the new loc to the list of locs that will be merged
|
||||
return super.add(SimpleGenomeLoc.merge(removedLocs)); // merge them all into one loc and add to the stash
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean addAll(Collection<? extends SimpleGenomeLoc> locs) {
|
||||
boolean result = false;
|
||||
for (SimpleGenomeLoc loc : locs) {
|
||||
result |= this.add(loc);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
@ -3,13 +3,14 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
|||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.SortedSet;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/*
|
||||
|
|
@ -41,7 +42,7 @@ import java.util.TreeSet;
|
|||
*
|
||||
* @author depristo
|
||||
*/
|
||||
public class MultiSampleCompressor implements Compressor {
|
||||
public class MultiSampleCompressor {
|
||||
protected static final Logger logger = Logger.getLogger(MultiSampleCompressor.class);
|
||||
|
||||
protected Map<String, SingleSampleCompressor> compressorsPerSample = new HashMap<String, SingleSampleCompressor>();
|
||||
|
|
@ -63,21 +64,36 @@ public class MultiSampleCompressor implements Compressor {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterable<GATKSAMRecord> addAlignment(GATKSAMRecord read) {
|
||||
String sample = read.getReadGroup().getSample();
|
||||
SingleSampleCompressor compressor = compressorsPerSample.get(sample);
|
||||
public Set<GATKSAMRecord> addAlignment(GATKSAMRecord read) {
|
||||
String sampleName = read.getReadGroup().getSample();
|
||||
SingleSampleCompressor compressor = compressorsPerSample.get(sampleName);
|
||||
if ( compressor == null )
|
||||
throw new ReviewedStingException("No compressor for sample " + sample);
|
||||
return compressor.addAlignment(read);
|
||||
throw new ReviewedStingException("No compressor for sample " + sampleName);
|
||||
Pair<Set<GATKSAMRecord>, CompressionStash> readsAndStash = compressor.addAlignment(read);
|
||||
Set<GATKSAMRecord> reads = readsAndStash.getFirst();
|
||||
CompressionStash regions = readsAndStash.getSecond();
|
||||
|
||||
reads.addAll(closeVariantRegionsInAllSamples(regions));
|
||||
|
||||
return reads;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterable<GATKSAMRecord> close() {
|
||||
SortedSet<GATKSAMRecord> reads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
for ( SingleSampleCompressor comp : compressorsPerSample.values() )
|
||||
for ( GATKSAMRecord read : comp.close() )
|
||||
reads.add(read);
|
||||
public Set<GATKSAMRecord> close() {
|
||||
Set<GATKSAMRecord> reads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
for ( SingleSampleCompressor sample : compressorsPerSample.values() ) {
|
||||
Pair<Set<GATKSAMRecord>, CompressionStash> readsAndStash = sample.close();
|
||||
reads = readsAndStash.getFirst();
|
||||
}
|
||||
return reads;
|
||||
}
|
||||
|
||||
private Set<GATKSAMRecord> closeVariantRegionsInAllSamples(CompressionStash regions) {
|
||||
Set<GATKSAMRecord> reads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
if (!regions.isEmpty()) {
|
||||
for (SingleSampleCompressor sample : compressorsPerSample.values()) {
|
||||
reads.addAll(sample.closeVariantRegions(regions));
|
||||
}
|
||||
}
|
||||
return reads;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,9 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMProgramRecord;
|
||||
import net.sf.samtools.util.SequenceUtil;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
|
|
@ -45,6 +48,7 @@ import org.broadinstitute.sting.utils.Utils;
|
|||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.BySampleSAMFileWriter;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
|
|
@ -86,7 +90,8 @@ import java.util.*;
|
|||
public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceReadsStash> {
|
||||
|
||||
@Output
|
||||
private StingSAMFileWriter out;
|
||||
private StingSAMFileWriter out = null;
|
||||
private SAMFileWriter writerToUse = null;
|
||||
|
||||
/**
|
||||
* The number of bases to keep around mismatches (potential variation)
|
||||
|
|
@ -196,6 +201,10 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
|||
@Argument(fullName = "contigs", shortName = "ctg", doc = "", required = false)
|
||||
private int nContigs = 2;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "nwayout", shortName = "nw", doc = "", required = false)
|
||||
private boolean nwayout = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "", shortName = "dl", doc = "", required = false)
|
||||
private int debugLevel = 0;
|
||||
|
|
@ -222,9 +231,12 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
|||
HashMap<String, Long> readNameHash; // This hash will keep the name of the original read the new compressed name (a number).
|
||||
Long nextReadNumber = 1L; // The next number to use for the compressed read name.
|
||||
|
||||
CompressionStash compressionStash = new CompressionStash();
|
||||
|
||||
SortedSet<GenomeLoc> intervalList;
|
||||
|
||||
private static final String PROGRAM_RECORD_NAME = "GATK ReduceReads"; // The name that will go in the @PG tag
|
||||
private static final String PROGRAM_FILENAME_EXTENSION = ".reduced.bam";
|
||||
|
||||
/**
|
||||
* Basic generic initialization of the readNameHash and the intervalList. Output initialization
|
||||
|
|
@ -240,10 +252,23 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
|||
if (toolkit.getIntervals() != null)
|
||||
intervalList.addAll(toolkit.getIntervals());
|
||||
|
||||
if (!NO_PG_TAG)
|
||||
Utils.setupWriter(out, toolkit, false, true, this, PROGRAM_RECORD_NAME);
|
||||
else
|
||||
|
||||
// todo -- rework the whole NO_PG_TAG thing
|
||||
final boolean preSorted = true;
|
||||
final boolean indexOnTheFly = true;
|
||||
final boolean keep_records = true;
|
||||
final SAMFileHeader.SortOrder sortOrder = SAMFileHeader.SortOrder.coordinate;
|
||||
if (nwayout) {
|
||||
SAMProgramRecord programRecord = NO_PG_TAG ? null : Utils.createProgramRecord(toolkit, this, PROGRAM_RECORD_NAME);
|
||||
writerToUse = new BySampleSAMFileWriter(toolkit, PROGRAM_FILENAME_EXTENSION, sortOrder, preSorted, indexOnTheFly, NO_PG_TAG, programRecord, true);
|
||||
}
|
||||
else {
|
||||
writerToUse = out;
|
||||
out.setPresorted(false);
|
||||
if (!NO_PG_TAG) {
|
||||
Utils.setupWriter(out, toolkit, toolkit.getSAMFileHeader(), !preSorted, keep_records, this, PROGRAM_RECORD_NAME);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -384,6 +409,9 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
|||
// output any remaining reads in the compressor
|
||||
for (GATKSAMRecord read : stash.close())
|
||||
outputRead(read);
|
||||
|
||||
if (nwayout)
|
||||
writerToUse.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -552,7 +580,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
|
|||
if (!DONT_COMPRESS_READ_NAMES)
|
||||
compressReadName(read);
|
||||
|
||||
out.addAlignment(read);
|
||||
writerToUse.addAlignment(read);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
|
|
@ -10,7 +12,7 @@ import java.util.TreeSet;
|
|||
* @author carneiro, depristo
|
||||
* @version 3.0
|
||||
*/
|
||||
public class SingleSampleCompressor implements Compressor {
|
||||
public class SingleSampleCompressor {
|
||||
final private int contextSize;
|
||||
final private int downsampleCoverage;
|
||||
final private int minMappingQuality;
|
||||
|
|
@ -24,6 +26,7 @@ public class SingleSampleCompressor implements Compressor {
|
|||
private SlidingWindow slidingWindow;
|
||||
private int slidingWindowCounter;
|
||||
|
||||
public static Pair<Set<GATKSAMRecord>, CompressionStash> emptyPair = new Pair<Set<GATKSAMRecord>,CompressionStash>(new TreeSet<GATKSAMRecord>(), new CompressionStash());
|
||||
|
||||
public SingleSampleCompressor(final int contextSize,
|
||||
final int downsampleCoverage,
|
||||
|
|
@ -46,12 +49,9 @@ public class SingleSampleCompressor implements Compressor {
|
|||
this.allowPolyploidReduction = allowPolyploidReduction;
|
||||
}
|
||||
|
||||
/**
|
||||
* @{inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public Iterable<GATKSAMRecord> addAlignment( GATKSAMRecord read ) {
|
||||
TreeSet<GATKSAMRecord> result = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
public Pair<Set<GATKSAMRecord>, CompressionStash> addAlignment( GATKSAMRecord read ) {
|
||||
Set<GATKSAMRecord> reads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
CompressionStash stash = new CompressionStash();
|
||||
int readOriginalStart = read.getUnclippedStart();
|
||||
|
||||
// create a new window if:
|
||||
|
|
@ -60,7 +60,9 @@ public class SingleSampleCompressor implements Compressor {
|
|||
(readOriginalStart - contextSize > slidingWindow.getStopLocation()))) { // this read is too far away from the end of the current sliding window
|
||||
|
||||
// close the current sliding window
|
||||
result.addAll(slidingWindow.close());
|
||||
Pair<Set<GATKSAMRecord>, CompressionStash> readsAndStash = slidingWindow.close();
|
||||
reads = readsAndStash.getFirst();
|
||||
stash = readsAndStash.getSecond();
|
||||
slidingWindow = null; // so we create a new one on the next if
|
||||
}
|
||||
|
||||
|
|
@ -69,13 +71,16 @@ public class SingleSampleCompressor implements Compressor {
|
|||
slidingWindowCounter++;
|
||||
}
|
||||
|
||||
result.addAll(slidingWindow.addRead(read));
|
||||
return result;
|
||||
stash.addAll(slidingWindow.addRead(read));
|
||||
return new Pair<Set<GATKSAMRecord>, CompressionStash>(reads, stash);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterable<GATKSAMRecord> close() {
|
||||
return (slidingWindow != null) ? slidingWindow.close() : new TreeSet<GATKSAMRecord>();
|
||||
public Pair<Set<GATKSAMRecord>, CompressionStash> close() {
|
||||
return (slidingWindow != null) ? slidingWindow.close() : emptyPair;
|
||||
}
|
||||
|
||||
public Set<GATKSAMRecord> closeVariantRegions(CompressionStash regions) {
|
||||
return slidingWindow.closeVariantRegions(regions);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler;
|
|||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -57,6 +58,8 @@ public class SlidingWindow {
|
|||
|
||||
private boolean allowPolyploidReductionInGeneral;
|
||||
|
||||
private static CompressionStash emptyRegions = new CompressionStash();
|
||||
|
||||
/**
|
||||
* The types of synthetic reads to use in the finalizeAndAdd method
|
||||
*/
|
||||
|
|
@ -137,7 +140,7 @@ public class SlidingWindow {
|
|||
* @param read the read
|
||||
* @return a list of reads that have been finished by sliding the window.
|
||||
*/
|
||||
public List<GATKSAMRecord> addRead(GATKSAMRecord read) {
|
||||
public CompressionStash addRead(GATKSAMRecord read) {
|
||||
addToHeader(windowHeader, read); // update the window header counts
|
||||
readsInWindow.add(read); // add read to sliding reads
|
||||
return slideWindow(read.getUnclippedStart());
|
||||
|
|
@ -151,8 +154,9 @@ public class SlidingWindow {
|
|||
* @param variantSite boolean array with true marking variant regions
|
||||
* @return null if nothing is variant, start/stop if there is a complete variant region, start/-1 if there is an incomplete variant region.
|
||||
*/
|
||||
private Pair<Integer, Integer> getNextVariantRegion(int from, int to, boolean[] variantSite) {
|
||||
private SimpleGenomeLoc findNextVariantRegion(int from, int to, boolean[] variantSite, boolean forceClose) {
|
||||
boolean foundStart = false;
|
||||
final int windowHeaderStart = getStartLocation(windowHeader);
|
||||
int variantRegionStartIndex = 0;
|
||||
for (int i=from; i<to; i++) {
|
||||
if (variantSite[i] && !foundStart) {
|
||||
|
|
@ -160,10 +164,12 @@ public class SlidingWindow {
|
|||
foundStart = true;
|
||||
}
|
||||
else if(!variantSite[i] && foundStart) {
|
||||
return(new Pair<Integer, Integer>(variantRegionStartIndex, i-1));
|
||||
return(new SimpleGenomeLoc(contig, contigIndex, windowHeaderStart + variantRegionStartIndex, windowHeaderStart + i - 1, true));
|
||||
}
|
||||
}
|
||||
return (foundStart) ? new Pair<Integer, Integer>(variantRegionStartIndex, -1) : null;
|
||||
final int refStart = windowHeaderStart + variantRegionStartIndex;
|
||||
final int refStop = windowHeaderStart + to - 1;
|
||||
return (foundStart && forceClose) ? new SimpleGenomeLoc(contig, contigIndex, refStart, refStop, true) : null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -172,25 +178,25 @@ public class SlidingWindow {
|
|||
* @param from beginning window header index of the search window (inclusive)
|
||||
* @param to end window header index of the search window (exclusive)
|
||||
* @param variantSite boolean array with true marking variant regions
|
||||
* @return a list with start/stops of variant regions following getNextVariantRegion description
|
||||
* @return a list with start/stops of variant regions following findNextVariantRegion description
|
||||
*/
|
||||
private List<Pair<Integer, Integer>> getAllVariantRegions(int from, int to, boolean[] variantSite) {
|
||||
List<Pair<Integer,Integer>> regions = new LinkedList<Pair<Integer, Integer>>();
|
||||
private CompressionStash findVariantRegions(int from, int to, boolean[] variantSite, boolean forceClose) {
|
||||
CompressionStash regions = new CompressionStash();
|
||||
int index = from;
|
||||
while(index < to) {
|
||||
Pair<Integer,Integer> result = getNextVariantRegion(index, to, variantSite);
|
||||
SimpleGenomeLoc result = findNextVariantRegion(index, to, variantSite, forceClose);
|
||||
if (result == null)
|
||||
break;
|
||||
|
||||
regions.add(result);
|
||||
if (result.getSecond() < 0)
|
||||
if (!result.isFinished())
|
||||
break;
|
||||
index = result.getSecond() + 1;
|
||||
|
||||
index = result.getStop() + 1;
|
||||
}
|
||||
return regions;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Determines if the window can be slid given the new incoming read.
|
||||
*
|
||||
|
|
@ -201,25 +207,25 @@ public class SlidingWindow {
|
|||
* @param incomingReadUnclippedStart the incoming read's start position. Must be the unclipped start!
|
||||
* @return all reads that have fallen to the left of the sliding window after the slide
|
||||
*/
|
||||
protected List<GATKSAMRecord> slideWindow(final int incomingReadUnclippedStart) {
|
||||
List<GATKSAMRecord> finalizedReads = new LinkedList<GATKSAMRecord>();
|
||||
|
||||
protected CompressionStash slideWindow(final int incomingReadUnclippedStart) {
|
||||
final int windowHeaderStartLocation = getStartLocation(windowHeader);
|
||||
CompressionStash regions = emptyRegions;
|
||||
boolean forceClose = true;
|
||||
|
||||
if (incomingReadUnclippedStart - contextSize > windowHeaderStartLocation) {
|
||||
markSites(incomingReadUnclippedStart);
|
||||
int readStartHeaderIndex = incomingReadUnclippedStart - windowHeaderStartLocation;
|
||||
int breakpoint = Math.max(readStartHeaderIndex - contextSize - 1, 0); // this is the limit of what we can close/send to consensus (non-inclusive)
|
||||
|
||||
List<Pair<Integer,Integer>> regions = getAllVariantRegions(0, breakpoint, markedSites.getVariantSiteBitSet());
|
||||
finalizedReads = closeVariantRegions(regions, false);
|
||||
|
||||
while (!readsInWindow.isEmpty() && readsInWindow.first().getSoftEnd() < windowHeaderStartLocation) {
|
||||
readsInWindow.pollFirst();
|
||||
}
|
||||
regions = findVariantRegions(0, breakpoint, markedSites.getVariantSiteBitSet(), !forceClose);
|
||||
}
|
||||
|
||||
return finalizedReads;
|
||||
// todo -- can be more aggressive here removing until the NEW window header start location after closing the variant regions
|
||||
while (!readsInWindow.isEmpty() && readsInWindow.first().getSoftEnd() < windowHeaderStartLocation) {
|
||||
readsInWindow.pollFirst();
|
||||
}
|
||||
|
||||
return regions;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -623,26 +629,27 @@ public class SlidingWindow {
|
|||
result.addAll(addToSyntheticReads(windowHeader, 0, stop, false));
|
||||
result.addAll(finalizeAndAdd(ConsensusType.BOTH));
|
||||
|
||||
return result; // finalized reads will be downsampled if necessary
|
||||
return result; // finalized reads will be downsampled if necessary
|
||||
}
|
||||
|
||||
|
||||
private List<GATKSAMRecord> closeVariantRegions(List<Pair<Integer, Integer>> regions, boolean forceClose) {
|
||||
List<GATKSAMRecord> allReads = new LinkedList<GATKSAMRecord>();
|
||||
public Set<GATKSAMRecord> closeVariantRegions(CompressionStash regions) {
|
||||
TreeSet<GATKSAMRecord> allReads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
if (!regions.isEmpty()) {
|
||||
int lastStop = -1;
|
||||
for (Pair<Integer, Integer> region : regions) {
|
||||
int start = region.getFirst();
|
||||
int stop = region.getSecond();
|
||||
if (stop < 0 && forceClose)
|
||||
stop = windowHeader.size() - 1;
|
||||
if (stop >= 0) {
|
||||
allReads.addAll(closeVariantRegion(start, stop, regions.size() > 1));
|
||||
int windowHeaderStart = getStartLocation(windowHeader);
|
||||
|
||||
for (SimpleGenomeLoc region : regions) {
|
||||
if (region.isFinished() && region.getContig() == contig && region.getStart() >= windowHeaderStart && region.getStop() <= windowHeaderStart + windowHeader.size()) {
|
||||
int start = region.getStart() - windowHeaderStart;
|
||||
int stop = region.getStop() - windowHeaderStart;
|
||||
|
||||
allReads.addAll(closeVariantRegion(start, stop, regions.size() > 1)); // todo -- add condition here dependent on dbSNP track
|
||||
lastStop = stop;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < lastStop; i++) // clean up the window header elements up until the end of the variant region. (we keep the last element in case the following element had a read that started with insertion)
|
||||
windowHeader.remove(); // todo -- can't believe java doesn't allow me to just do windowHeader = windowHeader.get(stop). Should be more efficient here!
|
||||
|
||||
for (int i = 0; i <= lastStop; i++) // clean up the window header elements up until the end of the variant region. (we keep the last element in case the following element had a read that started with insertion)
|
||||
windowHeader.remove();
|
||||
}
|
||||
return allReads;
|
||||
}
|
||||
|
|
@ -676,23 +683,24 @@ public class SlidingWindow {
|
|||
*
|
||||
* @return All reads generated
|
||||
*/
|
||||
public List<GATKSAMRecord> close() {
|
||||
public Pair<Set<GATKSAMRecord>, CompressionStash> close() {
|
||||
// mark variant regions
|
||||
List<GATKSAMRecord> finalizedReads = new LinkedList<GATKSAMRecord>();
|
||||
Set<GATKSAMRecord> finalizedReads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
CompressionStash regions = new CompressionStash();
|
||||
boolean forceCloseUnfinishedRegions = true;
|
||||
|
||||
if (!windowHeader.isEmpty()) {
|
||||
markSites(getStopLocation(windowHeader) + 1);
|
||||
List<Pair<Integer,Integer>> regions = getAllVariantRegions(0, windowHeader.size(), markedSites.getVariantSiteBitSet());
|
||||
finalizedReads = closeVariantRegions(regions, true);
|
||||
regions = findVariantRegions(0, windowHeader.size(), markedSites.getVariantSiteBitSet(), forceCloseUnfinishedRegions);
|
||||
finalizedReads = closeVariantRegions(regions);
|
||||
|
||||
if (!windowHeader.isEmpty()) {
|
||||
finalizedReads.addAll(addToSyntheticReads(windowHeader, 0, windowHeader.size(), false));
|
||||
finalizedReads.addAll(finalizeAndAdd(ConsensusType.BOTH)); // if it ended in running consensus, finish it up
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return finalizedReads;
|
||||
return new Pair<Set<GATKSAMRecord>, CompressionStash>(finalizedReads, regions);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -14,6 +14,9 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
final String DIVIDEBYZERO_BAM = validationDataLocation + "ReduceReadsDivideByZeroBug.bam";
|
||||
final String DIVIDEBYZERO_L = " -L " + validationDataLocation + "ReduceReadsDivideByZeroBug.intervals";
|
||||
final String L = " -L 20:10,100,000-10,120,000 ";
|
||||
final String COREDUCTION_BAM_A = validationDataLocation + "coreduction.test.A.bam";
|
||||
final String COREDUCTION_BAM_B = validationDataLocation + "coreduction.test.B.bam";
|
||||
final String COREDUCTION_L = " -L 1:1,853,860-1,854,354 -L 1:1,884,131-1,892,057";
|
||||
|
||||
private void RRTest(String testName, String args, String md5) {
|
||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, BAM) + " -o %s ";
|
||||
|
|
@ -21,36 +24,36 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
executeTest(testName, spec);
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
@Test(enabled = true)
|
||||
public void testDefaultCompression() {
|
||||
RRTest("testDefaultCompression ", L, "323dd4deabd7767efa0f2c6e7fa4189f");
|
||||
RRTest("testDefaultCompression ", L, "98080d3c53f441564796fc143cf510da");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
@Test(enabled = true)
|
||||
public void testMultipleIntervals() {
|
||||
String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
|
||||
RRTest("testMultipleIntervals ", intervals, "c437fb160547ff271f8eba30e5f3ff76");
|
||||
RRTest("testMultipleIntervals ", intervals, "c5dcdf4edf368b5b897d66f76034d9f0");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
@Test(enabled = true)
|
||||
public void testHighCompression() {
|
||||
RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "3a607bc3ebaf84e9dc44e005c5f8a047");
|
||||
RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "27cb99e87eda5e46187e56f50dd37f26");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
@Test(enabled = true)
|
||||
public void testLowCompression() {
|
||||
RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "7c9b4a70c2c90b0a995800aa42852e63");
|
||||
RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "4e7f111688d49973c35669855b7a2eaf");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
@Test(enabled = true)
|
||||
public void testIndelCompression() {
|
||||
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "f7b9fa44c10bc4b2247813d2b8dc1973");
|
||||
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "f6c9ea83608f35f113cf1f62a77ee6d0");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
@Test(enabled = true)
|
||||
public void testFilteredDeletionCompression() {
|
||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s ";
|
||||
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("891bd6dcda66611f343e8ff25f34aaeb")));
|
||||
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("122e4e60c4412a31d0aeb3cce879e841")));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -61,20 +64,26 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
*
|
||||
* This bam is simplified to replicate the exact bug with the three provided intervals.
|
||||
*/
|
||||
@Test(enabled = false)
|
||||
@Test(enabled = true)
|
||||
public void testAddingReadAfterTailingTheStash() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
|
||||
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("886b43e1f26ff18425814dc7563931c6")));
|
||||
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("647b0f0f95730de8e6bc4f74186ad4df")));
|
||||
}
|
||||
|
||||
/**
|
||||
* Divide by zero bug reported by GdA and users in the forum. Happens when the downsampler goes over a region where all reads get
|
||||
* filtered out.
|
||||
*/
|
||||
@Test(enabled = false)
|
||||
@Test(enabled = true)
|
||||
public void testDivideByZero() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
|
||||
executeTest("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("93ffdc209d4cc0fc4f0169ca9be55cc2")));
|
||||
executeTest("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("2c87985972dd43ee9dd50b463d93a511")));
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testCoReduction() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
|
||||
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("5c30fde961a1357bf72c15144c01981b")));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -350,11 +350,11 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
* @return A name for this type of walker.
|
||||
*/
|
||||
@Override
|
||||
public String getName(Class<? extends Walker> walkerType) {
|
||||
public String getName(Class walkerType) {
|
||||
String walkerName = "";
|
||||
|
||||
if (walkerType.getAnnotation(WalkerName.class) != null)
|
||||
walkerName = walkerType.getAnnotation(WalkerName.class).value().trim();
|
||||
walkerName = ((WalkerName)walkerType.getAnnotation(WalkerName.class)).value().trim();
|
||||
else
|
||||
walkerName = super.getName(walkerType);
|
||||
|
||||
|
|
|
|||
|
|
@ -198,8 +198,6 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
|
|||
|
||||
source.close();
|
||||
file.delete(); // this should be last to aid in debugging when the process fails
|
||||
} catch (UserException e) {
|
||||
throw new ReviewedStingException("BUG: intermediate file " + file + " is malformed, got error while reading", e);
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotReadInputFile(file, "Error reading file in VCFWriterStorage: ", e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
public enum GATKReportVersion {
|
||||
/**
|
||||
|
|
@ -91,6 +92,6 @@ public enum GATKReportVersion {
|
|||
if (header.startsWith("#:GATKReport.v1.1"))
|
||||
return GATKReportVersion.V1_1;
|
||||
|
||||
throw new ReviewedStingException("Unknown GATK report version in header: " + header);
|
||||
throw new UserException.BadInput("The GATK report has an unknown/unsupported version in the header: " + header);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,6 +34,9 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
|
||||
private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>();
|
||||
|
||||
// package access for unit testing
|
||||
ActivityProfile profile;
|
||||
|
||||
@Override
|
||||
public String getTraversalUnits() {
|
||||
return "active regions";
|
||||
|
|
@ -53,7 +56,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
|
||||
int minStart = Integer.MAX_VALUE;
|
||||
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
|
||||
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
|
||||
profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
|
||||
|
||||
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);
|
||||
|
||||
|
|
|
|||
|
|
@ -277,8 +277,12 @@ public class VariantAnnotatorEngine {
|
|||
if ( expression.fieldName.equals("ID") ) {
|
||||
if ( vc.hasID() )
|
||||
infoAnnotations.put(expression.fullName, vc.getID());
|
||||
} else if (expression.fieldName.equals("ALT")) {
|
||||
infoAnnotations.put(expression.fullName, vc.getAlternateAllele(0).getDisplayString());
|
||||
|
||||
} else if ( vc.hasAttribute(expression.fieldName) ) {
|
||||
infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName));
|
||||
infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName));
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,73 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.SortedSet;
|
||||
|
||||
/**
|
||||
* GenomeLocs are very useful objects to keep track of genomic locations and perform set operations
|
||||
* with them.
|
||||
*
|
||||
* However, GenomeLocs are bound to strict validation through the GenomeLocParser and cannot
|
||||
* be created easily for small tasks that do not require the rigors of the GenomeLocParser validation
|
||||
*
|
||||
* SimpleGenomeLoc is a simple utility to create GenomeLocs without going through the parser. Should
|
||||
* only be used outside of the engine.
|
||||
*
|
||||
* User: carneiro
|
||||
* Date: 10/16/12
|
||||
* Time: 2:07 PM
|
||||
*/
|
||||
public class SimpleGenomeLoc extends GenomeLoc {
|
||||
private boolean finished;
|
||||
|
||||
public SimpleGenomeLoc(String contigName, int contigIndex, int start, int stop, boolean finished) {
|
||||
super(contigName, contigIndex, start, stop);
|
||||
this.finished = finished;
|
||||
}
|
||||
|
||||
public boolean isFinished() {
|
||||
return finished;
|
||||
}
|
||||
|
||||
@Requires("a != null && b != null")
|
||||
public static SimpleGenomeLoc merge(SimpleGenomeLoc a, SimpleGenomeLoc b) throws ReviewedStingException {
|
||||
if(GenomeLoc.isUnmapped(a) || GenomeLoc.isUnmapped(b)) {
|
||||
throw new ReviewedStingException("Tried to merge unmapped genome locs");
|
||||
}
|
||||
|
||||
if (!(a.contiguousP(b))) {
|
||||
throw new ReviewedStingException("The two genome locs need to be contiguous");
|
||||
}
|
||||
|
||||
|
||||
return new SimpleGenomeLoc(a.getContig(), a.contigIndex,
|
||||
Math.min(a.getStart(), b.getStart()),
|
||||
Math.max(a.getStop(), b.getStop()),
|
||||
a.isFinished());
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges a list of *sorted* *contiguous* locs into one
|
||||
*
|
||||
* @param sortedLocs a sorted list of contiguous locs
|
||||
* @return one merged loc
|
||||
*/
|
||||
public static SimpleGenomeLoc merge(SortedSet<SimpleGenomeLoc> sortedLocs) {
|
||||
SimpleGenomeLoc previousLoc = null;
|
||||
for (SimpleGenomeLoc loc : sortedLocs) {
|
||||
if (loc.isUnmapped()) {
|
||||
throw new ReviewedStingException("Tried to merge unmapped genome locs");
|
||||
}
|
||||
if (previousLoc != null && !previousLoc.contiguousP(loc)) {
|
||||
throw new ReviewedStingException("The genome locs need to be contiguous");
|
||||
}
|
||||
previousLoc = loc;
|
||||
}
|
||||
SimpleGenomeLoc firstLoc = sortedLocs.first();
|
||||
SimpleGenomeLoc lastLoc = sortedLocs.last();
|
||||
return merge(firstLoc, lastLoc);
|
||||
}
|
||||
}
|
||||
|
|
@ -190,6 +190,10 @@ public class UnifiedGenotyperEngine {
|
|||
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap);
|
||||
if ( vc != null )
|
||||
results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true, perReadAlleleLikelihoodMap));
|
||||
// todo - uncomment if we want to also emit a null ref call (with no QUAL) if there's no evidence for REF and if EMIT_ALL_SITES is set
|
||||
// else if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES)
|
||||
// results.add(generateEmptyContext(tracker, refContext, null, rawContext));
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -183,6 +183,10 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
|
|||
@Argument(fullName="keepAC0", shortName="keepAC0", doc="If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes", required=false)
|
||||
private boolean keepSitesWithAC0 = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="numSamples", shortName="numSamples", doc="If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes", required=false)
|
||||
private int numSamplesFromArgument = 0;
|
||||
|
||||
/**
|
||||
* If true, VariantEval will treat -eval 1 -eval 2 as separate tracks from the same underlying
|
||||
* variant set, and evaluate the union of the results. Useful when you want to do -eval chr1.vcf -eval chr2.vcf etc.
|
||||
|
|
@ -589,6 +593,14 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
|
|||
public boolean isSubsettingToSpecificSamples() { return isSubsettingSamples; }
|
||||
public Set<String> getSampleNamesForEvaluation() { return sampleNamesForEvaluation; }
|
||||
|
||||
public int getNumberOfSamplesForEvaluation() {
|
||||
if (sampleNamesForEvaluation!= null && !sampleNamesForEvaluation.isEmpty())
|
||||
return sampleNamesForEvaluation.size();
|
||||
else {
|
||||
return numSamplesFromArgument;
|
||||
}
|
||||
|
||||
}
|
||||
public Set<String> getSampleNamesForStratification() { return sampleNamesForStratification; }
|
||||
|
||||
public List<RodBinding<VariantContext>> getComps() { return comps; }
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class AlleleCount extends VariantStratifier {
|
|||
|
||||
// There are ploidy x n sample chromosomes
|
||||
// TODO -- generalize to handle multiple ploidy
|
||||
nchrom = getVariantEvalWalker().getSampleNamesForEvaluation().size() * getVariantEvalWalker().getSamplePloidy();
|
||||
nchrom = getVariantEvalWalker().getNumberOfSamplesForEvaluation() * getVariantEvalWalker().getSamplePloidy();
|
||||
if ( nchrom < 2 )
|
||||
throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification requires an eval vcf with at least one sample");
|
||||
|
||||
|
|
|
|||
|
|
@ -659,7 +659,10 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
return (g !=null && !g.isHomRef() && (g.isCalled() || (g.isFiltered() && !EXCLUDE_FILTERED)));
|
||||
}
|
||||
|
||||
private boolean haveSameGenotypes(Genotype g1, Genotype g2) {
|
||||
private boolean haveSameGenotypes(final Genotype g1, final Genotype g2) {
|
||||
if ( g1 == null || g2 == null )
|
||||
return false;
|
||||
|
||||
if ((g1.isCalled() && g2.isFiltered()) ||
|
||||
(g2.isCalled() && g1.isFiltered()) ||
|
||||
(g1.isFiltered() && g2.isFiltered() && EXCLUDE_FILTERED))
|
||||
|
|
|
|||
|
|
@ -687,23 +687,69 @@ public class Utils {
|
|||
array[i] = value;
|
||||
}
|
||||
|
||||
public static void setupWriter(StingSAMFileWriter writer, GenomeAnalysisEngine toolkit, boolean preSorted, boolean KEEP_ALL_PG_RECORDS, Object walker, String PROGRAM_RECORD_NAME) {
|
||||
final SAMProgramRecord programRecord = createProgramRecord(toolkit, walker, PROGRAM_RECORD_NAME);
|
||||
|
||||
SAMFileHeader header = toolkit.getSAMFileHeader();
|
||||
/**
|
||||
* Creates a program record for the program, adds it to the list of program records (@PG tags) in the bam file and sets
|
||||
* up the writer with the header and presorted status.
|
||||
*
|
||||
* @param toolkit the engine
|
||||
* @param originalHeader original header
|
||||
* @param KEEP_ALL_PG_RECORDS whether or not to keep all the other program records already existing in this BAM file
|
||||
* @param programRecord the program record for this program
|
||||
*/
|
||||
public static SAMFileHeader setupWriter(GenomeAnalysisEngine toolkit, SAMFileHeader originalHeader, boolean KEEP_ALL_PG_RECORDS, SAMProgramRecord programRecord) {
|
||||
SAMFileHeader header = originalHeader.clone();
|
||||
List<SAMProgramRecord> oldRecords = header.getProgramRecords();
|
||||
List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
|
||||
for ( SAMProgramRecord record : oldRecords )
|
||||
if ( !record.getId().startsWith(PROGRAM_RECORD_NAME) || KEEP_ALL_PG_RECORDS )
|
||||
if ( !record.getId().startsWith(programRecord.getId()) || KEEP_ALL_PG_RECORDS )
|
||||
newRecords.add(record);
|
||||
|
||||
newRecords.add(programRecord);
|
||||
header.setProgramRecords(newRecords);
|
||||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a program record for the program, adds it to the list of program records (@PG tags) in the bam file and returns
|
||||
* the new header to be added to the BAM writer.
|
||||
*
|
||||
* @param toolkit the engine
|
||||
* @param KEEP_ALL_PG_RECORDS whether or not to keep all the other program records already existing in this BAM file
|
||||
* @param walker the walker object (so we can extract the command line)
|
||||
* @param PROGRAM_RECORD_NAME the name for the PG tag
|
||||
* @return a pre-filled header for the bam writer
|
||||
*/
|
||||
public static SAMFileHeader setupWriter(GenomeAnalysisEngine toolkit, SAMFileHeader originalHeader, boolean KEEP_ALL_PG_RECORDS, Object walker, String PROGRAM_RECORD_NAME) {
|
||||
final SAMProgramRecord programRecord = createProgramRecord(toolkit, walker, PROGRAM_RECORD_NAME);
|
||||
return setupWriter(toolkit, originalHeader, KEEP_ALL_PG_RECORDS, programRecord);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a program record for the program, adds it to the list of program records (@PG tags) in the bam file and sets
|
||||
* up the writer with the header and presorted status.
|
||||
*
|
||||
* @param writer BAM file writer
|
||||
* @param toolkit the engine
|
||||
* @param preSorted whether or not the writer can assume reads are going to be added are already sorted
|
||||
* @param KEEP_ALL_PG_RECORDS whether or not to keep all the other program records already existing in this BAM file
|
||||
* @param walker the walker object (so we can extract the command line)
|
||||
* @param PROGRAM_RECORD_NAME the name for the PG tag
|
||||
*/
|
||||
public static void setupWriter(StingSAMFileWriter writer, GenomeAnalysisEngine toolkit, SAMFileHeader originalHeader, boolean preSorted, boolean KEEP_ALL_PG_RECORDS, Object walker, String PROGRAM_RECORD_NAME) {
|
||||
SAMFileHeader header = setupWriter(toolkit, originalHeader, KEEP_ALL_PG_RECORDS, walker, PROGRAM_RECORD_NAME);
|
||||
writer.writeHeader(header);
|
||||
writer.setPresorted(preSorted);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Creates a program record (@PG) tag
|
||||
*
|
||||
* @param toolkit the engine
|
||||
* @param walker the walker object (so we can extract the command line)
|
||||
* @param PROGRAM_RECORD_NAME the name for the PG tag
|
||||
* @return a program record for the tool
|
||||
*/
|
||||
public static SAMProgramRecord createProgramRecord(GenomeAnalysisEngine toolkit, Object walker, String PROGRAM_RECORD_NAME) {
|
||||
final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME);
|
||||
final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText");
|
||||
|
|
@ -858,4 +904,5 @@ public class Utils {
|
|||
}
|
||||
return subLists;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -103,6 +103,11 @@ public class ActivityProfile {
|
|||
isActiveList.add(result);
|
||||
}
|
||||
|
||||
// for unit testing
|
||||
public List<ActivityProfileResult> getActiveList() {
|
||||
return isActiveList;
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return isActiveList.size();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ public class PluginManager<PluginType> {
|
|||
* Create a new plugin manager.
|
||||
* @param pluginType Core type for a plugin.
|
||||
*/
|
||||
public PluginManager(Class<PluginType> pluginType) {
|
||||
public PluginManager(Class pluginType) {
|
||||
this(pluginType, pluginType.getSimpleName().toLowerCase(), pluginType.getSimpleName(), null);
|
||||
}
|
||||
|
||||
|
|
@ -110,7 +110,7 @@ public class PluginManager<PluginType> {
|
|||
* @param pluginType Core type for a plugin.
|
||||
* @param classpath Custom class path to search for classes.
|
||||
*/
|
||||
public PluginManager(Class<PluginType> pluginType, List<URL> classpath) {
|
||||
public PluginManager(Class pluginType, List<URL> classpath) {
|
||||
this(pluginType, pluginType.getSimpleName().toLowerCase(), pluginType.getSimpleName(), classpath);
|
||||
}
|
||||
|
||||
|
|
@ -120,7 +120,7 @@ public class PluginManager<PluginType> {
|
|||
* @param pluginCategory Provides a category name to the plugin. Must not be null.
|
||||
* @param pluginSuffix Provides a suffix that will be trimmed off when converting to a plugin name. Can be null.
|
||||
*/
|
||||
public PluginManager(Class<PluginType> pluginType, String pluginCategory, String pluginSuffix) {
|
||||
public PluginManager(Class pluginType, String pluginCategory, String pluginSuffix) {
|
||||
this(pluginType, pluginCategory, pluginSuffix, null);
|
||||
}
|
||||
|
||||
|
|
@ -131,7 +131,7 @@ public class PluginManager<PluginType> {
|
|||
* @param pluginSuffix Provides a suffix that will be trimmed off when converting to a plugin name. Can be null.
|
||||
* @param classpath Custom class path to search for classes.
|
||||
*/
|
||||
public PluginManager(Class<PluginType> pluginType, String pluginCategory, String pluginSuffix, List<URL> classpath) {
|
||||
public PluginManager(Class pluginType, String pluginCategory, String pluginSuffix, List<URL> classpath) {
|
||||
this.pluginCategory = pluginCategory;
|
||||
this.pluginSuffix = pluginSuffix;
|
||||
|
||||
|
|
@ -149,6 +149,7 @@ public class PluginManager<PluginType> {
|
|||
}
|
||||
|
||||
// Load all classes types filtering them by concrete.
|
||||
@SuppressWarnings("unchecked")
|
||||
Set<Class<? extends PluginType>> allTypes = reflections.getSubTypesOf(pluginType);
|
||||
for( Class<? extends PluginType> type: allTypes ) {
|
||||
// The plugin manager does not support anonymous classes; to be a plugin, a class must have a name.
|
||||
|
|
@ -325,7 +326,7 @@ public class PluginManager<PluginType> {
|
|||
* @param pluginType The type of plugin.
|
||||
* @return A name for this type of plugin.
|
||||
*/
|
||||
public String getName(Class<? extends PluginType> pluginType) {
|
||||
public String getName(Class pluginType) {
|
||||
String pluginName = "";
|
||||
|
||||
if (pluginName.length() == 0) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMProgramRecord;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: carneiro
|
||||
* Date: Nov 13
|
||||
*/
|
||||
public class BySampleSAMFileWriter extends NWaySAMFileWriter{
|
||||
|
||||
private final Map<String, SAMReaderID> sampleToWriterMap;
|
||||
|
||||
public BySampleSAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
|
||||
super(toolkit, ext, order, presorted, indexOnTheFly, generateMD5, pRecord, keep_records);
|
||||
|
||||
sampleToWriterMap = new HashMap<String, SAMReaderID>(toolkit.getSAMFileHeader().getReadGroups().size() * 2);
|
||||
|
||||
for (SAMReaderID readerID : toolkit.getReadsDataSource().getReaderIDs()) {
|
||||
for (SAMReadGroupRecord rg : toolkit.getReadsDataSource().getHeader(readerID).getReadGroups()) {
|
||||
String sample = rg.getSample();
|
||||
if (sampleToWriterMap.containsKey(sample) && sampleToWriterMap.get(sample) != readerID) {
|
||||
throw new ReviewedStingException("The same sample appears in multiple files, this input cannot be multiplexed using the BySampleSAMFileWriter, try NWaySAMFileWriter instead.");
|
||||
}
|
||||
else {
|
||||
sampleToWriterMap.put(sample, readerID);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addAlignment(SAMRecord samRecord) {
|
||||
super.addAlignment(samRecord, sampleToWriterMap.get(samRecord.getReadGroup().getSample()));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,186 +1,177 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: May 31, 2011
|
||||
* Time: 3:52:49 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class NWaySAMFileWriter implements SAMFileWriter {
|
||||
|
||||
private Map<SAMReaderID,SAMFileWriter> writerMap = null;
|
||||
private boolean presorted ;
|
||||
GenomeAnalysisEngine toolkit;
|
||||
boolean KEEP_ALL_PG_RECORDS = false;
|
||||
|
||||
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
|
||||
boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
|
||||
this.presorted = presorted;
|
||||
this.toolkit = toolkit;
|
||||
this.KEEP_ALL_PG_RECORDS = keep_records;
|
||||
writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
|
||||
setupByReader(toolkit,in2out,order, presorted, indexOnTheFly, generateMD5, pRecord);
|
||||
}
|
||||
|
||||
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
|
||||
boolean presorted, boolean indexOnTheFly , boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
|
||||
this.presorted = presorted;
|
||||
this.toolkit = toolkit;
|
||||
this.KEEP_ALL_PG_RECORDS = keep_records;
|
||||
writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
|
||||
setupByReader(toolkit,ext,order, presorted, indexOnTheFly, generateMD5, pRecord);
|
||||
}
|
||||
|
||||
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
|
||||
boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
|
||||
this(toolkit, in2out, order, presorted, indexOnTheFly, generateMD5, null,false);
|
||||
}
|
||||
|
||||
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
|
||||
boolean presorted, boolean indexOnTheFly , boolean generateMD5) {
|
||||
this(toolkit, ext, order, presorted, indexOnTheFly, generateMD5, null,false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiates multiple underlying SAM writes, one per input SAM reader registered with GATK engine (those will be retrieved
|
||||
* from <code>toolkit</code>). The <code>in2out</code> map must contain an entry for each input filename and map it
|
||||
* onto a unique output file name.
|
||||
* @param toolkit
|
||||
* @param in2out
|
||||
*/
|
||||
public void setupByReader(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
|
||||
boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord) {
|
||||
if ( in2out==null ) throw new StingException("input-output bam filename map for n-way-out writing is NULL");
|
||||
for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
|
||||
|
||||
String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName();
|
||||
|
||||
String outName;
|
||||
if ( ! in2out.containsKey(fName) )
|
||||
throw new UserException.BadInput("Input-output bam filename map does not contain an entry for the input file "+fName);
|
||||
outName = in2out.get(fName);
|
||||
|
||||
if ( writerMap.containsKey( rid ) )
|
||||
throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered; "+
|
||||
"map file likely contains multiple entries for this input file");
|
||||
|
||||
addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5, pRecord);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiates multiple underlying SAM writes, one per input SAM reader registered with GATK engine (those will be retrieved
|
||||
* from <code>toolkit</code>). The output file names will be generated automatically by stripping ".sam" or ".bam" off the
|
||||
* input file name and adding ext instead (e.g. ".cleaned.bam").
|
||||
* onto a unique output file name.
|
||||
* @param toolkit
|
||||
* @param ext
|
||||
*/
|
||||
public void setupByReader(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
|
||||
boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord) {
|
||||
for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
|
||||
|
||||
String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName();
|
||||
|
||||
String outName;
|
||||
int pos ;
|
||||
if ( fName.toUpperCase().endsWith(".BAM") ) pos = fName.toUpperCase().lastIndexOf(".BAM");
|
||||
else {
|
||||
if ( fName.toUpperCase().endsWith(".SAM") ) pos = fName.toUpperCase().lastIndexOf(".SAM");
|
||||
else throw new UserException.BadInput("Input file name "+fName+" does not end with .sam or .bam");
|
||||
}
|
||||
String prefix = fName.substring(0,pos);
|
||||
outName = prefix+ext;
|
||||
|
||||
if ( writerMap.containsKey( rid ) )
|
||||
throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");
|
||||
addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5, pRecord);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted,
|
||||
boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord programRecord) {
|
||||
File f = new File(outName);
|
||||
SAMFileHeader header = toolkit.getSAMFileHeader(id).clone();
|
||||
header.setSortOrder(order);
|
||||
|
||||
if ( programRecord != null ) {
|
||||
// --->> add program record
|
||||
List<SAMProgramRecord> oldRecords = header.getProgramRecords();
|
||||
List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
|
||||
for ( SAMProgramRecord record : oldRecords ) {
|
||||
if ( !record.getId().startsWith(programRecord.getId()) || KEEP_ALL_PG_RECORDS )
|
||||
newRecords.add(record);
|
||||
}
|
||||
newRecords.add(programRecord);
|
||||
header.setProgramRecords(newRecords);
|
||||
// <-- add program record ends here
|
||||
}
|
||||
SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
||||
factory.setCreateIndex(indexOnTheFly);
|
||||
factory.setCreateMd5File(generateMD5);
|
||||
SAMFileWriter sw = factory.makeSAMOrBAMWriter(header, presorted, f);
|
||||
writerMap.put(id,sw);
|
||||
}
|
||||
|
||||
public Collection<SAMFileWriter> getWriters() {
|
||||
return writerMap.values();
|
||||
}
|
||||
|
||||
public void addAlignment(SAMRecord samRecord) {
|
||||
final SAMReaderID id = toolkit.getReaderIDForRead(samRecord);
|
||||
String rg = samRecord.getStringAttribute("RG");
|
||||
if ( rg != null ) {
|
||||
String rg_orig = toolkit.getReadsDataSource().getOriginalReadGroupId(rg);
|
||||
samRecord.setAttribute("RG",rg_orig);
|
||||
}
|
||||
writerMap.get(id).addAlignment(samRecord);
|
||||
}
|
||||
|
||||
public SAMFileHeader getFileHeader() {
|
||||
return toolkit.getSAMFileHeader();
|
||||
}
|
||||
|
||||
public void close() {
|
||||
for ( SAMFileWriter w : writerMap.values() ) w.close();
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: May 31, 2011
|
||||
* Time: 3:52:49 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class NWaySAMFileWriter implements SAMFileWriter {
|
||||
|
||||
private Map<SAMReaderID,SAMFileWriter> writerMap = null;
|
||||
private boolean presorted ;
|
||||
GenomeAnalysisEngine toolkit;
|
||||
boolean KEEP_ALL_PG_RECORDS = false;
|
||||
|
||||
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
|
||||
boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
|
||||
this.presorted = presorted;
|
||||
this.toolkit = toolkit;
|
||||
this.KEEP_ALL_PG_RECORDS = keep_records;
|
||||
writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
|
||||
setupByReader(toolkit,in2out,order, presorted, indexOnTheFly, generateMD5, pRecord);
|
||||
}
|
||||
|
||||
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
|
||||
boolean presorted, boolean indexOnTheFly , boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
|
||||
this.presorted = presorted;
|
||||
this.toolkit = toolkit;
|
||||
this.KEEP_ALL_PG_RECORDS = keep_records;
|
||||
writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
|
||||
setupByReader(toolkit,ext,order, presorted, indexOnTheFly, generateMD5, pRecord);
|
||||
}
|
||||
|
||||
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
|
||||
boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
|
||||
this(toolkit, in2out, order, presorted, indexOnTheFly, generateMD5, null,false);
|
||||
}
|
||||
|
||||
public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
|
||||
boolean presorted, boolean indexOnTheFly , boolean generateMD5) {
|
||||
this(toolkit, ext, order, presorted, indexOnTheFly, generateMD5, null,false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiates multiple underlying SAM writes, one per input SAM reader registered with GATK engine (those will be retrieved
|
||||
* from <code>toolkit</code>). The <code>in2out</code> map must contain an entry for each input filename and map it
|
||||
* onto a unique output file name.
|
||||
* @param toolkit
|
||||
* @param in2out
|
||||
*/
|
||||
public void setupByReader(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
|
||||
boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord) {
|
||||
if ( in2out==null ) throw new StingException("input-output bam filename map for n-way-out writing is NULL");
|
||||
for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
|
||||
|
||||
String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName();
|
||||
|
||||
String outName;
|
||||
if ( ! in2out.containsKey(fName) )
|
||||
throw new UserException.BadInput("Input-output bam filename map does not contain an entry for the input file "+fName);
|
||||
outName = in2out.get(fName);
|
||||
|
||||
if ( writerMap.containsKey( rid ) )
|
||||
throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered; "+
|
||||
"map file likely contains multiple entries for this input file");
|
||||
|
||||
addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5, pRecord);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiates multiple underlying SAM writes, one per input SAM reader registered with GATK engine (those will be retrieved
|
||||
* from <code>toolkit</code>). The output file names will be generated automatically by stripping ".sam" or ".bam" off the
|
||||
* input file name and adding ext instead (e.g. ".cleaned.bam").
|
||||
* onto a unique output file name.
|
||||
* @param toolkit
|
||||
* @param ext
|
||||
*/
|
||||
public void setupByReader(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
|
||||
boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord) {
|
||||
for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
|
||||
|
||||
String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName();
|
||||
|
||||
String outName;
|
||||
int pos ;
|
||||
if ( fName.toUpperCase().endsWith(".BAM") ) pos = fName.toUpperCase().lastIndexOf(".BAM");
|
||||
else {
|
||||
if ( fName.toUpperCase().endsWith(".SAM") ) pos = fName.toUpperCase().lastIndexOf(".SAM");
|
||||
else throw new UserException.BadInput("Input file name "+fName+" does not end with .sam or .bam");
|
||||
}
|
||||
String prefix = fName.substring(0,pos);
|
||||
outName = prefix+ext;
|
||||
|
||||
if ( writerMap.containsKey( rid ) )
|
||||
throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");
|
||||
addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5, pRecord);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted,
|
||||
boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord programRecord) {
|
||||
File f = new File(outName);
|
||||
SAMFileHeader header = Utils.setupWriter(toolkit, toolkit.getSAMFileHeader(id), KEEP_ALL_PG_RECORDS, programRecord);
|
||||
SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
||||
factory.setCreateIndex(indexOnTheFly);
|
||||
factory.setCreateMd5File(generateMD5);
|
||||
SAMFileWriter sw = factory.makeSAMOrBAMWriter(header, presorted, f);
|
||||
writerMap.put(id,sw);
|
||||
}
|
||||
|
||||
public Collection<SAMFileWriter> getWriters() {
|
||||
return writerMap.values();
|
||||
}
|
||||
|
||||
public void addAlignment(SAMRecord samRecord) {
|
||||
final SAMReaderID id = toolkit.getReaderIDForRead(samRecord);
|
||||
String rg = samRecord.getStringAttribute("RG");
|
||||
if ( rg != null ) {
|
||||
String rg_orig = toolkit.getReadsDataSource().getOriginalReadGroupId(rg);
|
||||
samRecord.setAttribute("RG",rg_orig);
|
||||
}
|
||||
addAlignment(samRecord, id);
|
||||
}
|
||||
|
||||
public void addAlignment(SAMRecord samRecord, SAMReaderID readerID) {
|
||||
writerMap.get(readerID).addAlignment(samRecord);
|
||||
}
|
||||
|
||||
public SAMFileHeader getFileHeader() {
|
||||
return toolkit.getSAMFileHeader();
|
||||
}
|
||||
|
||||
public void close() {
|
||||
for ( SAMFileWriter w : writerMap.values() ) w.close();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,126 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import org.testng.Assert;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.executive.WindowMaker;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: thibault
|
||||
* Date: 11/13/12
|
||||
* Time: 2:47 PM
|
||||
*/
|
||||
public class TraverseActiveRegionsTest extends BaseTest {
|
||||
|
||||
private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
|
||||
private final double prob;
|
||||
|
||||
public DummyActiveRegionWalker() {
|
||||
this.prob = 1.0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return new ActivityProfileResult(ref.getLocus(), prob);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
private final TraverseActiveRegions<Integer, Integer> t = new TraverseActiveRegions<Integer, Integer>();
|
||||
|
||||
private IndexedFastaSequenceFile reference;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
private ActiveRegionWalker<Integer, Integer> walker;
|
||||
|
||||
@BeforeClass
|
||||
private void init() throws FileNotFoundException {
|
||||
reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference));
|
||||
SAMSequenceDictionary dictionary = reference.getSequenceDictionary();
|
||||
genomeLocParser = new GenomeLocParser(dictionary);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAllIntervalsSeen() throws Exception {
|
||||
List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
|
||||
GenomeLoc interval = genomeLocParser.createGenomeLoc("1", 1, 1);
|
||||
intervals.add(interval);
|
||||
|
||||
LocusShardDataProvider dataProvider = createDataProvider(intervals);
|
||||
|
||||
t.traverse(walker, dataProvider, 0);
|
||||
|
||||
boolean allGenomeLocsSeen = true;
|
||||
for (GenomeLoc loc : intervals) {
|
||||
boolean thisGenomeLocSeen = false;
|
||||
for (ActivityProfileResult active : t.profile.getActiveList()) {
|
||||
if (loc.equals(active.getLoc())) {
|
||||
thisGenomeLocSeen = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!thisGenomeLocSeen) {
|
||||
allGenomeLocsSeen = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertTrue(allGenomeLocsSeen, "Some intervals missing from activity profile");
|
||||
}
|
||||
|
||||
private LocusShardDataProvider createDataProvider(List<GenomeLoc> intervals) {
|
||||
walker = new DummyActiveRegionWalker();
|
||||
|
||||
StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList<SAMRecord>());
|
||||
Shard shard = new MockLocusShard(genomeLocParser, intervals);
|
||||
WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser,iterator,shard.getGenomeLocs());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
|
||||
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
//engine.setReferenceDataSource(reference);
|
||||
engine.setGenomeLocParser(genomeLocParser);
|
||||
t.initialize(engine);
|
||||
|
||||
return new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>());
|
||||
}
|
||||
}
|
||||
|
|
@ -147,13 +147,13 @@ class GATKResourcesBundle extends QScript {
|
|||
//
|
||||
// example call set for wiki tutorial
|
||||
//
|
||||
addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/exampleCalls/NA12878.HiSeq.WGS.bwa.cleaned.raw.hg19.subset.vcf",
|
||||
addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/exampleCalls/NA12878.HiSeq.WGS.bwa.cleaned.raw.b37.subset.vcf",
|
||||
"NA12878.HiSeq.WGS.bwa.cleaned.raw.subset", b37, true, true))
|
||||
|
||||
//
|
||||
// Test BAM file, specific to each reference
|
||||
//
|
||||
addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam",
|
||||
addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.bam",
|
||||
"IGNORE", b37, false, false))
|
||||
|
||||
//
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@
|
|||
|
||||
package org.broadinstitute.sting.queue
|
||||
|
||||
import function.QFunction
|
||||
import java.io.File
|
||||
import org.broadinstitute.sting.commandline._
|
||||
import org.broadinstitute.sting.queue.util._
|
||||
|
|
@ -93,21 +92,25 @@ class QCommandLine extends CommandLineProgram with Logging {
|
|||
private lazy val qScriptPluginManager = {
|
||||
qScriptClasses = IOUtils.tempDir("Q-Classes-", "", settings.qSettings.tempDirectory)
|
||||
qScriptManager.loadScripts(scripts, qScriptClasses)
|
||||
new PluginManager[QScript](classOf[QScript], Seq(qScriptClasses.toURI.toURL))
|
||||
new PluginManager[QScript](qPluginType, Seq(qScriptClasses.toURI.toURL))
|
||||
}
|
||||
|
||||
private lazy val qStatusMessengerPluginManager = {
|
||||
new PluginManager[QStatusMessenger](classOf[QStatusMessenger])
|
||||
private lazy val qCommandPlugin = {
|
||||
new PluginManager[QCommandPlugin](classOf[QCommandPlugin])
|
||||
}
|
||||
|
||||
ClassFieldCache.parsingEngine = new ParsingEngine(this)
|
||||
private lazy val allCommandPlugins = qCommandPlugin.createAllTypes()
|
||||
|
||||
private lazy val qPluginType: Class[_ <: QScript] = {
|
||||
allCommandPlugins.map(_.qScriptClass).headOption.getOrElse(classOf[QScript])
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes the QScripts passed in, runs their script() methods, retrieves their generated
|
||||
* functions, and then builds and runs a QGraph based on the dependencies.
|
||||
*/
|
||||
def execute = {
|
||||
val allStatusMessengers = qStatusMessengerPluginManager.createAllTypes()
|
||||
ClassFieldCache.parsingEngine = this.parser
|
||||
|
||||
if (settings.qSettings.runName == null)
|
||||
settings.qSettings.runName = FilenameUtils.removeExtension(scripts.head.getName)
|
||||
|
|
@ -115,18 +118,31 @@ class QCommandLine extends CommandLineProgram with Logging {
|
|||
settings.qSettings.tempDirectory = IOUtils.absolute(settings.qSettings.runDirectory, ".queue/tmp")
|
||||
qGraph.initializeWithSettings(settings)
|
||||
|
||||
for (statusMessenger <- allStatusMessengers) {
|
||||
loadArgumentsIntoObject(statusMessenger)
|
||||
for (commandPlugin <- allCommandPlugins) {
|
||||
loadArgumentsIntoObject(commandPlugin)
|
||||
}
|
||||
|
||||
for (statusMessenger <- allStatusMessengers) {
|
||||
statusMessenger.started()
|
||||
for (commandPlugin <- allCommandPlugins) {
|
||||
if (commandPlugin.statusMessenger != null)
|
||||
commandPlugin.statusMessenger.started()
|
||||
}
|
||||
|
||||
qGraph.messengers = allCommandPlugins.filter(_.statusMessenger != null).map(_.statusMessenger).toSeq
|
||||
|
||||
// TODO: Default command plugin argument?
|
||||
val remoteFileConverter = (
|
||||
for (commandPlugin <- allCommandPlugins if (commandPlugin.remoteFileConverter != null))
|
||||
yield commandPlugin.remoteFileConverter
|
||||
).headOption.getOrElse(null)
|
||||
|
||||
if (remoteFileConverter != null)
|
||||
loadArgumentsIntoObject(remoteFileConverter)
|
||||
|
||||
val allQScripts = qScriptPluginManager.createAllTypes()
|
||||
for (script <- allQScripts) {
|
||||
logger.info("Scripting " + qScriptPluginManager.getName(script.getClass.asSubclass(classOf[QScript])))
|
||||
loadArgumentsIntoObject(script)
|
||||
allCommandPlugins.foreach(_.initScript(script))
|
||||
// TODO: Pulling inputs can be time/io expensive! Some scripts are using the files to generate functions-- even for dry runs-- so pull it all down for now.
|
||||
//if (settings.run)
|
||||
script.pullInputs()
|
||||
|
|
@ -137,10 +153,15 @@ class QCommandLine extends CommandLineProgram with Logging {
|
|||
case e: Exception =>
|
||||
throw new UserException.CannotExecuteQScript(script.getClass.getSimpleName + ".script() threw the following exception: " + e, e)
|
||||
}
|
||||
|
||||
if (remoteFileConverter != null) {
|
||||
if (remoteFileConverter.convertToRemoteEnabled)
|
||||
script.mkRemoteOutputs(remoteFileConverter)
|
||||
}
|
||||
|
||||
script.functions.foreach(qGraph.add(_))
|
||||
logger.info("Added " + script.functions.size + " functions")
|
||||
}
|
||||
|
||||
// Execute the job graph
|
||||
qGraph.run()
|
||||
|
||||
|
|
@ -162,14 +183,19 @@ class QCommandLine extends CommandLineProgram with Logging {
|
|||
if (!success) {
|
||||
logger.info("Done with errors")
|
||||
qGraph.logFailed()
|
||||
for (statusMessenger <- allStatusMessengers)
|
||||
statusMessenger.exit("Done with errors")
|
||||
for (commandPlugin <- allCommandPlugins)
|
||||
if (commandPlugin.statusMessenger != null)
|
||||
commandPlugin.statusMessenger.exit("Done with errors: %s".format(qGraph.formattedStatusCounts))
|
||||
1
|
||||
} else {
|
||||
if (settings.run) {
|
||||
allQScripts.foreach(_.pushOutputs())
|
||||
for (statusMessenger <- allStatusMessengers)
|
||||
statusMessenger.done(allQScripts.map(_.remoteOutputs))
|
||||
for (commandPlugin <- allCommandPlugins)
|
||||
if (commandPlugin.statusMessenger != null) {
|
||||
val allInputs = allQScripts.map(_.remoteInputs)
|
||||
val allOutputs = allQScripts.map(_.remoteOutputs)
|
||||
commandPlugin.statusMessenger.done(allInputs, allOutputs)
|
||||
}
|
||||
}
|
||||
0
|
||||
}
|
||||
|
|
@ -189,7 +215,7 @@ class QCommandLine extends CommandLineProgram with Logging {
|
|||
override def getArgumentSources = {
|
||||
var plugins = Seq.empty[Class[_]]
|
||||
plugins ++= qScriptPluginManager.getPlugins
|
||||
plugins ++= qStatusMessengerPluginManager.getPlugins
|
||||
plugins ++= qCommandPlugin.getPlugins
|
||||
plugins.toArray
|
||||
}
|
||||
|
||||
|
|
@ -200,11 +226,10 @@ class QCommandLine extends CommandLineProgram with Logging {
|
|||
override def getArgumentSourceName(source: Class[_]) = {
|
||||
if (classOf[QScript].isAssignableFrom(source))
|
||||
qScriptPluginManager.getName(source.asSubclass(classOf[QScript]))
|
||||
else if (classOf[QStatusMessenger].isAssignableFrom(source))
|
||||
qStatusMessengerPluginManager.getName(source.asSubclass(classOf[QStatusMessenger]))
|
||||
else if (classOf[QCommandPlugin].isAssignableFrom(source))
|
||||
qCommandPlugin.getName(source.asSubclass(classOf[QCommandPlugin]))
|
||||
else
|
||||
null
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -0,0 +1,11 @@
|
|||
package org.broadinstitute.sting.queue
|
||||
|
||||
import engine.QStatusMessenger
|
||||
import util.RemoteFileConverter
|
||||
|
||||
trait QCommandPlugin {
|
||||
def statusMessenger: QStatusMessenger = null
|
||||
def remoteFileConverter: RemoteFileConverter = null
|
||||
def qScriptClass: Class[_ <: QScript] = classOf[QScript]
|
||||
def initScript(script: QScript) {}
|
||||
}
|
||||
|
|
@ -108,6 +108,24 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
|
|||
functions.foreach( f => add(f) )
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert all @Output files to remote output files.
|
||||
* @param remoteFileConverter Converter for files to remote files.
|
||||
*/
|
||||
def mkRemoteOutputs(remoteFileConverter: RemoteFileConverter) {
|
||||
for (field <- outputFields) {
|
||||
val fieldFile = ClassFieldCache.getFieldFile(this, field)
|
||||
if (fieldFile != null && !fieldFile.isInstanceOf[RemoteFile]) {
|
||||
val fieldName = ClassFieldCache.fullName(field)
|
||||
val remoteFile = remoteFileConverter.convertToRemote(fieldFile, fieldName)
|
||||
ClassFieldCache.setFieldValue(this, field, remoteFile)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Pull all remote files to the local disk.
|
||||
*/
|
||||
def pullInputs() {
|
||||
val inputs = ClassFieldCache.getFieldFiles(this, inputFields)
|
||||
for (remoteFile <- filterRemoteFiles(inputs)) {
|
||||
|
|
@ -116,6 +134,9 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Push all remote files from the local disk.
|
||||
*/
|
||||
def pushOutputs() {
|
||||
val outputs = ClassFieldCache.getFieldFiles(this, outputFields)
|
||||
for (remoteFile <- filterRemoteFiles(outputs)) {
|
||||
|
|
@ -124,8 +145,25 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
|
|||
}
|
||||
}
|
||||
|
||||
def remoteOutputs: Map[ArgumentSource, Seq[RemoteFile]] =
|
||||
outputFields.map(field => (field -> filterRemoteFiles(ClassFieldCache.getFieldFiles(this, field)))).filter(tuple => !tuple._2.isEmpty).toMap
|
||||
/**
|
||||
* List out the remote outputs
|
||||
* @return the RemoteFile outputs by argument source
|
||||
*/
|
||||
def remoteInputs: Map[String, Seq[RemoteFile]] = tagMap(remoteFieldMap(inputFields))
|
||||
|
||||
/**
|
||||
* List out the remote outputs
|
||||
* @return the RemoteFile outputs by argument source
|
||||
*/
|
||||
def remoteOutputs: Map[String, Seq[RemoteFile]] = tagMap(remoteFieldMap(outputFields))
|
||||
|
||||
private def tagMap(remoteFieldMap: Map[ArgumentSource, Seq[RemoteFile]]): Map[String, Seq[RemoteFile]] = {
|
||||
remoteFieldMap.collect{ case (k, v) => ClassFieldCache.fullName(k) -> v }.toMap
|
||||
}
|
||||
|
||||
private def remoteFieldMap(fields: Seq[ArgumentSource]): Map[ArgumentSource, Seq[RemoteFile]] = {
|
||||
fields.map(field => (field -> filterRemoteFiles(ClassFieldCache.getFieldFiles(this, field)))).filter(tuple => !tuple._2.isEmpty).toMap
|
||||
}
|
||||
|
||||
private def filterRemoteFiles(fields: Seq[File]): Seq[RemoteFile] =
|
||||
fields.filter(field => field != null && field.isInstanceOf[RemoteFile]).map(_.asInstanceOf[RemoteFile])
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ import java.io.{OutputStreamWriter, File}
|
|||
*/
|
||||
class QGraph extends Logging {
|
||||
var settings: QGraphSettings = _
|
||||
var messengers: Seq[QStatusMessenger] = Nil
|
||||
|
||||
private def dryRun = !settings.run
|
||||
private var numMissingValues = 0
|
||||
|
|
@ -95,7 +96,7 @@ class QGraph extends Logging {
|
|||
* The settings aren't necessarily available until after this QGraph object has been constructed, so
|
||||
* this function must be called once the QGraphSettings have been filled in.
|
||||
*
|
||||
* @param settings
|
||||
* @param settings QGraphSettings
|
||||
*/
|
||||
def initializeWithSettings(settings: QGraphSettings) {
|
||||
this.settings = settings
|
||||
|
|
@ -430,6 +431,7 @@ class QGraph extends Logging {
|
|||
val edge = readyJobs.head
|
||||
edge.runner = newRunner(edge.function)
|
||||
edge.start()
|
||||
messengers.foreach(_.started(jobShortName(edge.function)))
|
||||
startedJobs += edge
|
||||
readyJobs -= edge
|
||||
logNextStatusCounts = true
|
||||
|
|
@ -465,8 +467,14 @@ class QGraph extends Logging {
|
|||
updateStatus()
|
||||
|
||||
runningJobs.foreach(edge => edge.status match {
|
||||
case RunnerStatus.DONE => doneJobs += edge
|
||||
case RunnerStatus.FAILED => failedJobs += edge
|
||||
case RunnerStatus.DONE => {
|
||||
doneJobs += edge
|
||||
messengers.foreach(_.done(jobShortName(edge.function)))
|
||||
}
|
||||
case RunnerStatus.FAILED => {
|
||||
failedJobs += edge
|
||||
messengers.foreach(_.exit(jobShortName(edge.function), edge.function.jobErrorLines.mkString("%n".format())))
|
||||
}
|
||||
case RunnerStatus.RUNNING => /* do nothing while still running */
|
||||
})
|
||||
|
||||
|
|
@ -493,7 +501,7 @@ class QGraph extends Logging {
|
|||
// incremental
|
||||
if ( logNextStatusCounts && INCREMENTAL_JOBS_REPORT ) {
|
||||
logger.info("Writing incremental jobs reports...")
|
||||
writeJobsReport(false)
|
||||
writeJobsReport(plot = false)
|
||||
}
|
||||
|
||||
readyJobs ++= getReadyJobs
|
||||
|
|
@ -516,9 +524,13 @@ class QGraph extends Logging {
|
|||
private def nextRunningCheck(lastRunningCheck: Long) =
|
||||
((30 * 1000L) - (System.currentTimeMillis - lastRunningCheck))
|
||||
|
||||
def formattedStatusCounts: String = {
|
||||
"%d Pend, %d Run, %d Fail, %d Done".format(
|
||||
statusCounts.pending, statusCounts.running, statusCounts.failed, statusCounts.done)
|
||||
}
|
||||
|
||||
private def logStatusCounts() {
|
||||
logger.info("%d Pend, %d Run, %d Fail, %d Done".format(
|
||||
statusCounts.pending, statusCounts.running, statusCounts.failed, statusCounts.done))
|
||||
logger.info(formattedStatusCounts)
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -533,6 +545,16 @@ class QGraph extends Logging {
|
|||
traverseFunctions(edge => recheckDone(edge))
|
||||
}
|
||||
|
||||
// TODO: Yet another field to add (with overloads) to QFunction?
|
||||
private def jobShortName(function: QFunction): String = {
|
||||
var name = function.analysisName
|
||||
if (function.isInstanceOf[CloneFunction]) {
|
||||
val cloneFunction = function.asInstanceOf[CloneFunction]
|
||||
name += " %d of %d".format(cloneFunction.cloneIndex, cloneFunction.cloneCount)
|
||||
}
|
||||
name
|
||||
}
|
||||
|
||||
/**
|
||||
* First pass that checks if an edge is done or if it's an intermediate edge if it can be skipped.
|
||||
* This function may modify the status of previous edges if it discovers that the edge passed in
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
package org.broadinstitute.sting.queue.engine
|
||||
|
||||
import org.broadinstitute.sting.commandline.ArgumentSource
|
||||
import org.broadinstitute.sting.queue.util.RemoteFile
|
||||
|
||||
/**
|
||||
|
|
@ -8,6 +7,10 @@ import org.broadinstitute.sting.queue.util.RemoteFile
|
|||
*/
|
||||
trait QStatusMessenger {
|
||||
def started()
|
||||
def done(files: Seq[Map[ArgumentSource, Seq[RemoteFile]]])
|
||||
def done(inputs: Seq[Map[String, Seq[RemoteFile]]], outputs: Seq[Map[String, Seq[RemoteFile]]])
|
||||
def exit(message: String)
|
||||
|
||||
def started(job: String)
|
||||
def done(job: String)
|
||||
def exit(job: String, message: String)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,7 +39,6 @@ class VcfGatherFunction extends CombineVariants with GatherFunction with RetryMe
|
|||
private lazy val originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK]
|
||||
|
||||
override def freezeFieldValues() {
|
||||
this.jarFile = this.originalGATK.jarFile
|
||||
this.variant = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input"+index) }
|
||||
this.out = this.originalOutput
|
||||
GATKIntervals.copyIntervalArguments(this.originalGATK, this)
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.queue.extensions.picard
|
|||
|
||||
import org.broadinstitute.sting.commandline.{Argument, Output, Input}
|
||||
import java.io.File
|
||||
import net.sf.picard.analysis.MetricAccumulationLevel
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
|
|
@ -10,9 +11,8 @@ import java.io.File
|
|||
* Time: 5:59 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class CalculateHsMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction {
|
||||
class CalculateHsMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardMetricsFunction {
|
||||
analysisName = "CalculateHsMetrics"
|
||||
javaMainClass = "net.sf.picard.sam.CalculateHsMetrics"
|
||||
|
||||
@Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true)
|
||||
var input: Seq[File] = Nil
|
||||
|
|
@ -28,33 +28,15 @@ class CalculateHsMetrics extends org.broadinstitute.sting.queue.function.JavaCom
|
|||
|
||||
@Argument(doc="Reference file", shortName = "reference", fullName = "reference", required = true)
|
||||
var reference: File = _
|
||||
/*
|
||||
@Argument(doc = "Maximum number of file handles to keep open when spilling read ends to disk. Set this number a little lower than the per-process maximum number of file that may be open. This number can be found by executing the 'ulimit -n' command on a Unix system.", shortName = "max_file_handles", fullName ="max_file_handles_for_read_ends_maps", required=false)
|
||||
var MAX_FILE_HANDLES_FOR_READ_ENDS_MAP: Int = -1;
|
||||
|
||||
@Argument(doc = "This number, plus the maximum RAM available to the JVM, determine the memory footprint used by some of the sorting collections. If you are running out of memory, try reducing this number.", shortName = "sorting_ratio", fullName = "sorting_collection_size_ratio", required = false)
|
||||
var SORTING_COLLECTION_SIZE_RATIO: Double = -1
|
||||
*/
|
||||
override def freezeFieldValues() {
|
||||
super.freezeFieldValues()
|
||||
// if (outputIndex == null && output != null)
|
||||
// outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai")
|
||||
}
|
||||
|
||||
val level = "SAMPLE"
|
||||
val level = MetricAccumulationLevel.SAMPLE
|
||||
|
||||
override def inputBams = input
|
||||
override def outputBam = output
|
||||
//this.sortOrder = null
|
||||
//this.createIndex = Some(true)
|
||||
override def outputFile = output
|
||||
override def commandLine = super.commandLine +
|
||||
required("BAIT_INTERVALS=" + baits) +
|
||||
required("TARGET_INTERVALS=" + targets) +
|
||||
required("REFERENCE_SEQUENCE=" + reference) +
|
||||
optional("METRIC_ACCUMULATION_LEVEL="+level)/*+
|
||||
conditional(REMOVE_DUPLICATES, "REMOVE_DUPLICATES=true") +
|
||||
conditional(MAX_FILE_HANDLES_FOR_READ_ENDS_MAP > 0, "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=" + MAX_FILE_HANDLES_FOR_READ_ENDS_MAP.toString) +
|
||||
conditional(SORTING_COLLECTION_SIZE_RATIO > 0, "SORTING_COLLECTION_SIZE_RATIO=" + SORTING_COLLECTION_SIZE_RATIO.toString) */
|
||||
|
||||
optional("METRIC_ACCUMULATION_LEVEL="+level)
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,9 +10,8 @@ import java.io.File
|
|||
* Time: 10:37 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class CollectGcBiasMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction {
|
||||
analysisName = "CalculateGcMetrics"
|
||||
javaMainClass = "net.sf.picard.sam.CalculateGcMetrics"
|
||||
class CollectGcBiasMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardMetricsFunction {
|
||||
analysisName = "CollectGcBiasMetrics"
|
||||
|
||||
@Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true)
|
||||
var input: Seq[File] = Nil
|
||||
|
|
@ -24,8 +23,9 @@ class CollectGcBiasMetrics extends org.broadinstitute.sting.queue.function.JavaC
|
|||
var reference: File = _
|
||||
|
||||
override def inputBams = input
|
||||
override def outputBam = output
|
||||
override def outputFile = output
|
||||
override def commandLine = super.commandLine +
|
||||
required("SUMMARY_OUTPUT=" + output) +
|
||||
required("CHART_OUTPUT=" + output+".pdf") +
|
||||
required("REFERENCE_SEQUENCE=" + reference) +
|
||||
required("ASSUME_SORTED=true")
|
||||
|
|
|
|||
|
|
@ -10,9 +10,8 @@ import java.io.File
|
|||
* Time: 10:37 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class CollectMultipleMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction{
|
||||
analysisName = "CalculateMultipleMetrics"
|
||||
javaMainClass = "net.sf.picard.sam.CalculateMultipleMetrics"
|
||||
class CollectMultipleMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardMetricsFunction{
|
||||
analysisName = "CollectMultipleMetrics"
|
||||
|
||||
@Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true)
|
||||
var input: Seq[File] = Nil
|
||||
|
|
@ -24,7 +23,7 @@ class CollectMultipleMetrics extends org.broadinstitute.sting.queue.function.Jav
|
|||
var reference: File = _
|
||||
|
||||
override def inputBams = input
|
||||
override def outputBam = output
|
||||
override def outputFile = output
|
||||
override def commandLine = super.commandLine +
|
||||
required("REFERENCE_SEQUENCE=" + reference) +
|
||||
required("ASSUME_SORTED=true") +
|
||||
|
|
|
|||
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.extensions.picard
|
||||
|
||||
import java.io.File
|
||||
import org.broadinstitute.sting.queue.function.JavaCommandLineFunction
|
||||
import net.sf.samtools.SAMFileReader.ValidationStringency
|
||||
import net.sf.samtools.SAMFileHeader.SortOrder
|
||||
|
||||
/**
|
||||
* Wraps a Picard function that operates on BAM files but doesn't output a new BAM file (i.e. QC metric files).
|
||||
* See http://picard.sourceforge.net/ for more info.
|
||||
*
|
||||
* Since the various BAM utilities take slightly different arguments
|
||||
* some values are optional.
|
||||
*/
|
||||
trait PicardMetricsFunction extends JavaCommandLineFunction {
|
||||
var validationStringency = ValidationStringency.SILENT
|
||||
var maxRecordsInRam: Option[Int] = None
|
||||
var assumeSorted: Option[Boolean] = None
|
||||
protected def inputBams: Seq[File]
|
||||
protected def outputFile: File
|
||||
|
||||
abstract override def commandLine = super.commandLine +
|
||||
repeat("INPUT=", inputBams, spaceSeparated=false) +
|
||||
required("TMP_DIR=" + jobTempDir) +
|
||||
optional("VALIDATION_STRINGENCY=", validationStringency, spaceSeparated=false) +
|
||||
optional("OUTPUT=", outputFile, spaceSeparated=false) +
|
||||
optional("MAX_RECORDS_IN_RAM=", maxRecordsInRam, spaceSeparated=false) +
|
||||
optional("ASSUME_SORTED=", assumeSorted, spaceSeparated=false)
|
||||
}
|
||||
|
|
@ -38,6 +38,7 @@ object CloneFunction {
|
|||
class CloneFunction extends CommandLineFunction {
|
||||
var originalFunction: ScatterGatherableFunction = _
|
||||
var cloneIndex: Int = _
|
||||
var cloneCount: Int = _
|
||||
|
||||
private var overriddenFields = Map.empty[ArgumentSource, Any]
|
||||
private var withScatterPartCount = 0
|
||||
|
|
|
|||
|
|
@ -176,6 +176,7 @@ trait ScatterGatherableFunction extends CommandLineFunction {
|
|||
cloneFunction.originalFunction = this
|
||||
cloneFunction.analysisName = this.analysisName
|
||||
cloneFunction.cloneIndex = i
|
||||
cloneFunction.cloneCount = numClones
|
||||
cloneFunction.commandDirectory = this.scatterGatherTempDir(dirFormat.format(i))
|
||||
cloneFunction.jobOutputFile = if (IOUtils.isSpecialFile(this.jobOutputFile)) this.jobOutputFile else new File(this.jobOutputFile.getName)
|
||||
if (this.jobErrorFile != null)
|
||||
|
|
|
|||
|
|
@ -180,4 +180,15 @@ object ClassFieldCache {
|
|||
case unknown => throw new QException("Non-file found. Try removing the annotation, change the annotation to @Argument, or extend File with FileExtension: %s: %s".format(field.field, unknown))
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// other utilities
|
||||
//
|
||||
|
||||
/**
|
||||
* Retrieves the fullName of the argument
|
||||
* @param field ArgumentSource to check
|
||||
* @return Full name of the argument source
|
||||
*/
|
||||
def fullName(field: ArgumentSource) = field.createArgumentDefinitions().get(0).fullName
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,21 @@
|
|||
package org.broadinstitute.sting.queue.util
|
||||
|
||||
import java.io.File
|
||||
|
||||
trait RemoteFileConverter {
|
||||
type RemoteFileType <: RemoteFile
|
||||
|
||||
/**
|
||||
* If this remote file creator is capable of converting to a remote file.
|
||||
* @return true if ready to convert
|
||||
*/
|
||||
def convertToRemoteEnabled: Boolean
|
||||
|
||||
/**
|
||||
* Converts to a remote file
|
||||
* @param file The original file
|
||||
* @param name A "name" to use for the remote file
|
||||
* @return The new version of this remote file.
|
||||
*/
|
||||
def convertToRemote(file: File, name: String): RemoteFileType
|
||||
}
|
||||
Loading…
Reference in New Issue