This commit is contained in:
Mark DePristo 2011-09-09 15:44:47 -04:00
parent 3c8445b934
commit 72536e5d6d
3 changed files with 522 additions and 552 deletions

View File

@ -855,8 +855,8 @@
<jvmarg value="-Dpipeline.run=${pipeline.run}" /> <jvmarg value="-Dpipeline.run=${pipeline.run}" />
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" /> <jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
<jvmarg line="${cofoja.jvm.args}"/> <jvmarg line="${cofoja.jvm.args}"/>
<!-- <jvmarg value="-Xdebug"/> --> <jvmarg value="-Xdebug"/>
<!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/> --> <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/>
<classpath> <classpath>
<path refid="external.dependencies" /> <path refid="external.dependencies" />
<pathelement location="${java.classes}" /> <pathelement location="${java.classes}" />

View File

@ -333,28 +333,6 @@ public class IntervalUtils {
throw new UserException.BadArgumentValue("scatterParts", String.format("Only able to write contigs into %d of %d files.", fileIndex + 1, scatterParts.size())); throw new UserException.BadArgumentValue("scatterParts", String.format("Only able to write contigs into %d of %d files.", fileIndex + 1, scatterParts.size()));
} }
/**
* Splits an interval list into multiple sublists.
* @param locs The genome locs to split.
* @param splits The stop points for the genome locs returned by splitFixedIntervals.
* @return A list of lists of genome locs, split according to splits
*/
public static List<List<GenomeLoc>> splitIntervalsToSubLists(List<GenomeLoc> locs, List<Integer> splits) {
int locIndex = 1;
int start = 0;
List<List<GenomeLoc>> sublists = new ArrayList<List<GenomeLoc>>(splits.size());
for (Integer stop: splits) {
List<GenomeLoc> curList = new ArrayList<GenomeLoc>();
for (int i = start; i < stop; i++)
curList.add(locs.get(i));
start = stop;
sublists.add(curList);
}
return sublists;
}
/** /**
* Splits an interval list into multiple files. * Splits an interval list into multiple files.
* @param fileHeader The sam file header. * @param fileHeader The sam file header.
@ -384,39 +362,27 @@ public class IntervalUtils {
public static List<List<GenomeLoc>> splitFixedIntervals(List<GenomeLoc> locs, int numParts) { public static List<List<GenomeLoc>> splitFixedIntervals(List<GenomeLoc> locs, int numParts) {
if (locs.size() < numParts) if (locs.size() < numParts)
throw new UserException.BadArgumentValue("scatterParts", String.format("Cannot scatter %d locs into %d parts.", locs.size(), numParts)); throw new UserException.BadArgumentValue("scatterParts", String.format("Cannot scatter %d locs into %d parts.", locs.size(), numParts));
final long locsSize = intervalSize(locs); final long locsSize = intervalSize(locs);
final List<Integer> splitPoints = new ArrayList<Integer>(); final double idealSplitSize = locsSize / numParts;
addFixedSplit(splitPoints, locs, locsSize, 0, locs.size(), numParts); final List<List<GenomeLoc>> splits = new ArrayList<List<GenomeLoc>>(numParts);
Collections.sort(splitPoints); final LinkedList<GenomeLoc> remainingLocs = new LinkedList<GenomeLoc>(locs);
splitPoints.add(locs.size());
return splitIntervalsToSubLists(locs, splitPoints);
}
private static void addFixedSplit(List<Integer> splitPoints, List<GenomeLoc> locs, long locsSize, int startIndex, int stopIndex, int numParts) { for ( int i = 0; i < numParts; i++ ) {
if (numParts < 2)
return;
int halfParts = (numParts + 1) / 2;
Pair<Integer, Long> splitPoint = getFixedSplit(locs, locsSize, startIndex, stopIndex, halfParts, numParts - halfParts);
int splitIndex = splitPoint.first;
long splitSize = splitPoint.second;
splitPoints.add(splitIndex);
addFixedSplit(splitPoints, locs, splitSize, startIndex, splitIndex, halfParts);
addFixedSplit(splitPoints, locs, locsSize - splitSize, splitIndex, stopIndex, numParts - halfParts);
}
private static Pair<Integer, Long> getFixedSplit(List<GenomeLoc> locs, long locsSize, int startIndex, int stopIndex, int minLocs, int maxLocs) {
int splitIndex = startIndex;
long splitSize = 0; long splitSize = 0;
for (int i = 0; i < minLocs; i++) { List<GenomeLoc> split = new ArrayList<GenomeLoc>();
splitSize += locs.get(splitIndex).size(); while ( ! remainingLocs.isEmpty() ) {
splitIndex++; final GenomeLoc toAdd = remainingLocs.pop();
splitSize += toAdd.size();
split.add(toAdd);
final long nextEltSize = remainingLocs.isEmpty() ? 0 : remainingLocs.peek().size();
if ( splitSize + (i % 2 == 0 ? 0 : nextEltSize) > idealSplitSize )
break;
} }
long halfSize = locsSize / 2; splits.add(split);
while (splitIndex < (stopIndex - maxLocs) && splitSize < halfSize) {
splitSize += locs.get(splitIndex).size();
splitIndex++;
} }
return new Pair<Integer, Long>(splitIndex, splitSize);
return splits;
} }
/** /**