V3 of the distributed GATK. High-efficiency implementation. Support for status tracking for debugging and display. Still not safe for production use due to NFS filelock problem. V4 will use alternative file locking mechanism
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5063 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
fd864e8e3a
commit
c50f39a147
|
|
@ -1,8 +1,8 @@
|
||||||
# todo -- add replicate number to system
|
d <- read.table("results.new.dat", header=T)
|
||||||
# tood -- add scatter gather comparison
|
|
||||||
d <- read.table("results.dat", header=T)
|
|
||||||
require("lattice")
|
require("lattice")
|
||||||
|
|
||||||
|
plot1 <- function(d, name) {
|
||||||
|
d = subset(d, dataset == name)
|
||||||
subd = data.frame(parallel.type=d$parallel.type, nWaysParallel=d$nWaysParallel, end.to.end.time=d$end.to.end.time,per.1M.sites = d$per.1M.sites, job.run.time = d$job.run.time)
|
subd = data.frame(parallel.type=d$parallel.type, nWaysParallel=d$nWaysParallel, end.to.end.time=d$end.to.end.time,per.1M.sites = d$per.1M.sites, job.run.time = d$job.run.time)
|
||||||
|
|
||||||
nways = unique(subd$nWaysParallel)
|
nways = unique(subd$nWaysParallel)
|
||||||
|
|
@ -18,5 +18,8 @@ subd = rbind(subd, theo)
|
||||||
|
|
||||||
print(summary(subd))
|
print(summary(subd))
|
||||||
|
|
||||||
print(xyplot(end.to.end.time + per.1M.sites + job.run.time ~ nWaysParallel, data=subd[order(subd$nWaysParallel),], group=parallel.type, type="b", outer=T, scale=list(relation="free"), auto.key=T, lwd=c(2,2,1)))
|
print(xyplot(log10(end.to.end.time) + per.1M.sites + log10(job.run.time) ~ log2(nWaysParallel), data=subd[order(subd$nWaysParallel),], group=parallel.type, type="b", outer=T, scale=list(relation="free"), auto.key=T, lwd=c(2,2,1), main=name))
|
||||||
|
}
|
||||||
|
|
||||||
|
plot1(d, "NA12878Trio.WEx")
|
||||||
|
plot1(d, "NA12878.HiSeq")
|
||||||
|
|
|
||||||
|
|
@ -13,16 +13,22 @@ class DistributedGATKPerformance extends QScript {
|
||||||
@Argument(shortName="outputDir", doc="output directory", required=false)
|
@Argument(shortName="outputDir", doc="output directory", required=false)
|
||||||
var outputDir: String = ""
|
var outputDir: String = ""
|
||||||
|
|
||||||
@Argument(shortName="dataset", doc="selects the datasets to run. If not provided, all datasets will be used", required=false)
|
@Argument(shortName="dataset", doc="selects the datasets to run. If not provided, all datasets will be used", required=true)
|
||||||
var datasets: List[String] = Nil
|
var datasets: List[String] = Nil
|
||||||
|
|
||||||
|
@Argument(shortName="waysParallel", doc="selects the datasets to run. If not provided, all datasets will be used", required=false)
|
||||||
|
var waysParallelArg: List[Int] = Nil
|
||||||
|
|
||||||
@Argument(shortName="long", doc="runs long calculations", required=false)
|
@Argument(shortName="long", doc="runs long calculations", required=false)
|
||||||
var long: Boolean = false
|
var long: Boolean = false
|
||||||
|
|
||||||
|
@Argument(shortName="test", doc="runs long calculations", required=false)
|
||||||
|
var test: Boolean = false
|
||||||
|
|
||||||
//@Argument(shortName="noBAQ", doc="turns off BAQ calculation", required=false)
|
//@Argument(shortName="noBAQ", doc="turns off BAQ calculation", required=false)
|
||||||
var noBAQ: Boolean = false
|
var noBAQ: Boolean = false
|
||||||
|
|
||||||
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK { logging_level = "INFO"; jarFile = gatkJarFile; memoryLimit = Some(2); }
|
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK { logging_level = "DEBUG"; jarFile = gatkJarFile; memoryLimit = Some(2); }
|
||||||
|
|
||||||
class Target(
|
class Target(
|
||||||
val baseName: String,
|
val baseName: String,
|
||||||
|
|
@ -102,7 +108,7 @@ class DistributedGATKPerformance extends QScript {
|
||||||
new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 2.3, lowPass),
|
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 2.3, lowPass),
|
||||||
"WExTrio" -> new Target("NA12878Trio.WEx", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
|
"WExTrio" -> new Target("NA12878Trio.WEx", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||||
new File("/humgen/gsa-scr1/carneiro/prj/trio/NA12878Trio.WEx.hg19.bam"),
|
new File("/humgen/gsa-scr1/carneiro/prj/trio/data/NA12878Trio.WEx.hg19.recal.bam"),
|
||||||
new File("/humgen/gsa-scr1/delangel/NewUG/calls/AugustRelease.filtered_Q50_QD5.0_SB0.0.allSamples.SNPs_hg19.WEx_UG_newUG_MQC.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
new File("/humgen/gsa-scr1/delangel/NewUG/calls/AugustRelease.filtered_Q50_QD5.0_SB0.0.allSamples.SNPs_hg19.WEx_UG_newUG_MQC.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||||
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 2.6, !lowPass)
|
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 2.6, !lowPass)
|
||||||
)
|
)
|
||||||
|
|
@ -111,24 +117,25 @@ class DistributedGATKPerformance extends QScript {
|
||||||
|
|
||||||
// Selects the datasets in the -dataset argument and adds them to targets.
|
// Selects the datasets in the -dataset argument and adds them to targets.
|
||||||
var targets: List[Target] = List()
|
var targets: List[Target] = List()
|
||||||
if (!datasets.isEmpty)
|
|
||||||
for (ds <- datasets)
|
for (ds <- datasets)
|
||||||
targets ::= targetDataSets(ds) // Could check if ds was mispelled, but this way an exception will be thrown, maybe it's better this way?
|
targets ::= targetDataSets(ds) // Could check if ds was mispelled, but this way an exception will be thrown, maybe it's better this way?
|
||||||
else // If -dataset is not specified, all datasets are used.
|
|
||||||
for (targetDS <- targetDataSets.valuesIterator) // for Scala 2.7 or older, use targetDataSets.values
|
|
||||||
targets ::= targetDS
|
|
||||||
|
|
||||||
val nWays = if (long) List(1, 2, 4) else List(8, 16, 32, 64, 96)
|
var nWays = if (long) List(1, 2, 4, 8) else List(16, 32, 64, 96)
|
||||||
|
if ( ! waysParallelArg.isEmpty )
|
||||||
|
nWays = waysParallelArg
|
||||||
|
|
||||||
//val nWays = List(2)
|
//val nWays = List(2)
|
||||||
|
|
||||||
for (target <- targets) {
|
for (target <- targets) {
|
||||||
for ( scatterP <- List(true, false) )
|
for ( scatterP <- if ( test ) List(false) else List(true, false) )
|
||||||
for (nWaysParallel <- nWays) {
|
for (nWaysParallel <- if ( test ) List(32) else nWays) {
|
||||||
val aname = "ptype_%s.nways_%d".format(if ( scatterP ) "sg" else "dist", nWaysParallel)
|
val aname = "ptype_%s.nways_%d".format(if ( scatterP ) "sg" else "dist", nWaysParallel)
|
||||||
|
|
||||||
def addUG(ug: UnifiedGenotyper) = {
|
def addUG(ug: UnifiedGenotyper) = {
|
||||||
if ( ! long )
|
if ( ! long )
|
||||||
ug.jobLimitSeconds = Some(60 * 60 * 4)
|
ug.jobLimitSeconds = Some(60 * 60 * 4)
|
||||||
|
if ( test )
|
||||||
|
ug.jobLimitSeconds = Some(60 * 30)
|
||||||
add(ug);
|
add(ug);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -141,10 +148,14 @@ class DistributedGATKPerformance extends QScript {
|
||||||
} else {
|
} else {
|
||||||
for ( part <- 1 to nWaysParallel) {
|
for ( part <- 1 to nWaysParallel) {
|
||||||
var ug: UnifiedGenotyper = new UnifiedGenotyper(target, aname + ".part" + part)
|
var ug: UnifiedGenotyper = new UnifiedGenotyper(target, aname + ".part" + part)
|
||||||
|
if ( target.name.equals("NA12878.HiSeq"))
|
||||||
ug.intervalsString ++= List(CHROMOSOME)
|
ug.intervalsString ++= List(CHROMOSOME)
|
||||||
|
else
|
||||||
|
ug.intervalsString ++= List(target.intervals)
|
||||||
ug.processingTracker = new File(target.name + "." + aname + ".distributed.txt")
|
ug.processingTracker = new File(target.name + "." + aname + ".distributed.txt")
|
||||||
if ( part == 1 )
|
if ( part == 1 )
|
||||||
ug.performanceLog = new File("%s.%s.pf.log".format(target.name, aname))
|
ug.performanceLog = new File("%s.%s.pf.log".format(target.name, aname))
|
||||||
|
ug.processingTrackerStatusFile = new File("%s.%s.%d.ptstatus.log".format(target.name, aname, part))
|
||||||
addUG(ug)
|
addUG(ug)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -212,6 +212,11 @@ public class GATKArgumentCollection {
|
||||||
@Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
|
@Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
|
||||||
public List<String> readGroupBlackList = null;
|
public List<String> readGroupBlackList = null;
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// distributed GATK arguments
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
@Element(required=false)
|
@Element(required=false)
|
||||||
@Argument(fullName="processingTracker",shortName="C",doc="A lockable, shared file for coordinating distributed GATK runs",required=false)
|
@Argument(fullName="processingTracker",shortName="C",doc="A lockable, shared file for coordinating distributed GATK runs",required=false)
|
||||||
@Hidden
|
@Hidden
|
||||||
|
|
@ -222,6 +227,17 @@ public class GATKArgumentCollection {
|
||||||
@Hidden
|
@Hidden
|
||||||
public boolean restartProcessingTracker = false;
|
public boolean restartProcessingTracker = false;
|
||||||
|
|
||||||
|
@Element(required=false)
|
||||||
|
@Argument(fullName="processingTrackerStatusFile",shortName="CSF",doc="If provided, a detailed accounting of the state of the process tracker is written to this file. For debugging, only",required=false)
|
||||||
|
@Hidden
|
||||||
|
public File processingTrackerStatusFile = null;
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// methods
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* marshal the data out to a object
|
* marshal the data out to a object
|
||||||
*
|
*
|
||||||
|
|
@ -387,6 +403,10 @@ public class GATKArgumentCollection {
|
||||||
(other.processingTrackerFile != null && !other.processingTrackerFile.equals(this.processingTrackerFile)))
|
(other.processingTrackerFile != null && !other.processingTrackerFile.equals(this.processingTrackerFile)))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
if ((other.processingTrackerStatusFile == null && this.processingTrackerStatusFile != null) ||
|
||||||
|
(other.processingTrackerStatusFile != null && !other.processingTrackerStatusFile.equals(this.processingTrackerStatusFile)))
|
||||||
|
return false;
|
||||||
|
|
||||||
if ( restartProcessingTracker != other.restartProcessingTracker )
|
if ( restartProcessingTracker != other.restartProcessingTracker )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -38,6 +38,10 @@ import org.broadinstitute.sting.gatk.iterators.NullSAMIterator;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.ReadMetrics;
|
import org.broadinstitute.sting.gatk.ReadMetrics;
|
||||||
|
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.io.PrintStream;
|
||||||
import java.lang.management.ManagementFactory;
|
import java.lang.management.ManagementFactory;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -167,8 +171,17 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
||||||
logger.info("Deleting ProcessingTracker file " + engine.getArguments().processingTrackerFile);
|
logger.info("Deleting ProcessingTracker file " + engine.getArguments().processingTrackerFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
processingTracker = GenomeLocProcessingTracker.createFileBackedDistributed(engine.getArguments().processingTrackerFile, engine.getGenomeLocParser());
|
PrintStream statusStream = null;
|
||||||
logger.info("Creating ProcessingTracker using shared file " + engine.getArguments().processingTrackerFile);
|
if ( engine.getArguments().processingTrackerStatusFile != null ) {
|
||||||
|
try {
|
||||||
|
statusStream = new PrintStream(new FileOutputStream(engine.getArguments().processingTrackerStatusFile));
|
||||||
|
} catch ( FileNotFoundException e) {
|
||||||
|
throw new UserException.CouldNotCreateOutputFile(engine.getArguments().processingTrackerStatusFile, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
processingTracker = GenomeLocProcessingTracker.createFileBackedDistributed(engine.getArguments().processingTrackerFile, engine.getGenomeLocParser(), false, statusStream);
|
||||||
|
logger.info("Creating ProcessingTracker using shared file " + engine.getArguments().processingTrackerFile + " process.id = " + engine.getName());
|
||||||
} else {
|
} else {
|
||||||
processingTracker = GenomeLocProcessingTracker.createNoOp();
|
processingTracker = GenomeLocProcessingTracker.createNoOp();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -11,5 +11,6 @@ import java.util.concurrent.locks.ReentrantLock;
|
||||||
* Simple extension of a ReentrantLock that supports a close method
|
* Simple extension of a ReentrantLock that supports a close method
|
||||||
*/
|
*/
|
||||||
public class ClosableReentrantLock extends ReentrantLock {
|
public class ClosableReentrantLock extends ReentrantLock {
|
||||||
|
public boolean ownsLock() { return super.isHeldByCurrentThread(); }
|
||||||
public void close() {}
|
public void close() {}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,10 +6,8 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.*;
|
||||||
import java.io.FileNotFoundException;
|
import java.nio.ByteBuffer;
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.RandomAccessFile;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
@ -20,34 +18,50 @@ import java.util.concurrent.locks.ReentrantLock;
|
||||||
* Keeps a copy of the processing locks in a file, in addition to tracking in memory via the base class
|
* Keeps a copy of the processing locks in a file, in addition to tracking in memory via the base class
|
||||||
*/
|
*/
|
||||||
public class FileBackedGenomeLocProcessingTracker extends GenomeLocProcessingTracker {
|
public class FileBackedGenomeLocProcessingTracker extends GenomeLocProcessingTracker {
|
||||||
private static Logger logger = Logger.getLogger(FileBackedGenomeLocProcessingTracker.class);
|
private static final Logger logger = Logger.getLogger(FileBackedGenomeLocProcessingTracker.class);
|
||||||
private static final boolean DEBUG = false;
|
private static final boolean DEBUG = false;
|
||||||
private File sharedFile = null;
|
private static final String READ_MODE = "r";
|
||||||
private GenomeLocParser parser;
|
private static final String WRITE_MODE = "rws";
|
||||||
private RandomAccessFile raFile;
|
|
||||||
|
private final File sharedFile;
|
||||||
|
private final GenomeLocParser parser;
|
||||||
private long lastReadPosition = 0;
|
private long lastReadPosition = 0;
|
||||||
|
|
||||||
protected FileBackedGenomeLocProcessingTracker(File sharedFile, RandomAccessFile raFile, GenomeLocParser parser, ClosableReentrantLock lock) {
|
protected FileBackedGenomeLocProcessingTracker(File sharedFile, GenomeLocParser parser, ClosableReentrantLock lock, PrintStream status) {
|
||||||
super(lock);
|
super(lock, status);
|
||||||
|
|
||||||
this.sharedFile = sharedFile;
|
this.sharedFile = sharedFile;
|
||||||
this.raFile = raFile;
|
|
||||||
this.parser = parser;
|
this.parser = parser;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void close() {
|
private RandomAccessFile openFile(String mode) {
|
||||||
super.close();
|
|
||||||
try {
|
try {
|
||||||
raFile.close();
|
return new RandomAccessFile(sharedFile, mode);
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
throw new UserException.CouldNotCreateOutputFile(sharedFile, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void closeFile(RandomAccessFile raFile) {
|
||||||
|
try {
|
||||||
|
if ( raFile != null ) raFile.close();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new UserException.CouldNotCreateOutputFile(sharedFile, e);
|
throw new UserException.CouldNotCreateOutputFile(sharedFile, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// protected void close() {
|
||||||
|
// super.close();
|
||||||
|
// }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<ProcessingLoc> readNewLocs() {
|
protected List<ProcessingLoc> readNewLocs() {
|
||||||
List<ProcessingLoc> newPLocs = new ArrayList<ProcessingLoc>(); // todo -- gratitous object creation
|
List<ProcessingLoc> newPLocs = new ArrayList<ProcessingLoc>(); // todo -- gratitous object creation
|
||||||
|
|
||||||
|
if ( sharedFile.exists() ) {
|
||||||
|
RandomAccessFile raFile = null;
|
||||||
try {
|
try {
|
||||||
|
raFile = openFile(READ_MODE);
|
||||||
//logger.warn(String.format("Reading new locs at: file.length=%d last=%d", raFile.length(), lastReadPosition));
|
//logger.warn(String.format("Reading new locs at: file.length=%d last=%d", raFile.length(), lastReadPosition));
|
||||||
if ( raFile.length() > lastReadPosition ) {
|
if ( raFile.length() > lastReadPosition ) {
|
||||||
raFile.seek(lastReadPosition);
|
raFile.seek(lastReadPosition);
|
||||||
|
|
@ -71,6 +85,9 @@ public class FileBackedGenomeLocProcessingTracker extends GenomeLocProcessingTra
|
||||||
throw new UserException.CouldNotReadInputFile(sharedFile, e);
|
throw new UserException.CouldNotReadInputFile(sharedFile, e);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new ReviewedStingException("Couldn't read sharedFile " + sharedFile, e);
|
throw new ReviewedStingException("Couldn't read sharedFile " + sharedFile, e);
|
||||||
|
} finally {
|
||||||
|
closeFile(raFile);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return newPLocs;
|
return newPLocs;
|
||||||
|
|
@ -78,18 +95,26 @@ public class FileBackedGenomeLocProcessingTracker extends GenomeLocProcessingTra
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void registerNewLocs(Collection<ProcessingLoc> plocs) {
|
protected void registerNewLocs(Collection<ProcessingLoc> plocs) {
|
||||||
|
RandomAccessFile raFile = null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
raFile = openFile(WRITE_MODE);
|
||||||
long startPos = raFile.getFilePointer();
|
long startPos = raFile.getFilePointer();
|
||||||
raFile.seek(raFile.length());
|
raFile.seek(raFile.length());
|
||||||
|
StringBuffer bytes = new StringBuffer();
|
||||||
for ( ProcessingLoc ploc : plocs ) {
|
for ( ProcessingLoc ploc : plocs ) {
|
||||||
String packet = String.format("%s %s%n", ploc.getLocation(), ploc.getOwner());
|
String packet = String.format("%s %s%n", ploc.getLocation(), ploc.getOwner());
|
||||||
raFile.write(packet.getBytes());
|
bytes.append(packet);
|
||||||
if ( DEBUG ) logger.warn(String.format("Wrote loc %s to file: %d + %d bytes ending at %d", ploc, startPos, packet.length(), raFile.getFilePointer()));
|
if ( DEBUG ) logger.warn(String.format("Wrote loc %s to file: %d + %d bytes ending at %d", ploc, startPos, packet.length(), raFile.getFilePointer()));
|
||||||
}
|
}
|
||||||
|
raFile.write(bytes.toString().getBytes());
|
||||||
|
//raFile.getChannel().force(true);
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
throw new UserException.CouldNotCreateOutputFile(sharedFile, e);
|
throw new UserException.CouldNotCreateOutputFile(sharedFile, e);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new UserException.CouldNotCreateOutputFile(sharedFile, e);
|
throw new UserException.CouldNotCreateOutputFile(sharedFile, e);
|
||||||
|
} finally {
|
||||||
|
closeFile(raFile);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,9 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.PrintStream;
|
||||||
import java.io.RandomAccessFile;
|
import java.io.RandomAccessFile;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.locks.ReentrantLock;
|
import java.util.concurrent.locks.ReentrantLock;
|
||||||
|
|
||||||
|
|
@ -18,9 +20,17 @@ import java.util.concurrent.locks.ReentrantLock;
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public abstract class GenomeLocProcessingTracker {
|
public abstract class GenomeLocProcessingTracker {
|
||||||
private static Logger logger = Logger.getLogger(FileBackedGenomeLocProcessingTracker.class);
|
private final static Logger logger = Logger.getLogger(FileBackedGenomeLocProcessingTracker.class);
|
||||||
private Map<GenomeLoc, ProcessingLoc> processingLocs;
|
private final static SimpleDateFormat STATUS_FORMAT = new SimpleDateFormat("HH:mm:ss,SSS");
|
||||||
private ClosableReentrantLock lock;
|
private final static int DEFAULT_OWNERSHIP_ITERATOR_SIZE = 100;
|
||||||
|
|
||||||
|
private final static String GOING_FOR_LOCK = "going_for_lock";
|
||||||
|
private final static String HAVE_LOCK = "have_lock";
|
||||||
|
private final static String RUNNING = "running";
|
||||||
|
|
||||||
|
private final Map<GenomeLoc, ProcessingLoc> processingLocs;
|
||||||
|
private final ClosableReentrantLock lock;
|
||||||
|
private final PrintStream status;
|
||||||
|
|
||||||
protected SimpleTimer writeTimer = new SimpleTimer("writeTimer");
|
protected SimpleTimer writeTimer = new SimpleTimer("writeTimer");
|
||||||
protected SimpleTimer readTimer = new SimpleTimer("readTimer");
|
protected SimpleTimer readTimer = new SimpleTimer("readTimer");
|
||||||
|
|
@ -41,24 +51,18 @@ public abstract class GenomeLocProcessingTracker {
|
||||||
return new SharedMemoryGenomeLocProcessingTracker(new ClosableReentrantLock());
|
return new SharedMemoryGenomeLocProcessingTracker(new ClosableReentrantLock());
|
||||||
}
|
}
|
||||||
|
|
||||||
public static GenomeLocProcessingTracker createFileBackedThreaded(File sharedFile, GenomeLocParser parser) {
|
public static GenomeLocProcessingTracker createFileBackedThreaded(File sharedFile, GenomeLocParser parser, PrintStream status) {
|
||||||
return createFileBacked(sharedFile, parser, false);
|
return createFileBacked(sharedFile, parser, false, false, status);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static GenomeLocProcessingTracker createFileBackedDistributed(File sharedFile, GenomeLocParser parser) {
|
public static GenomeLocProcessingTracker createFileBackedDistributed(File sharedFile, GenomeLocParser parser, boolean blockingP, PrintStream status) {
|
||||||
return createFileBacked(sharedFile, parser, true);
|
return createFileBacked(sharedFile, parser, blockingP, true, status);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static FileBackedGenomeLocProcessingTracker createFileBacked(File sharedFile, GenomeLocParser parser, boolean useFileLockToo) {
|
private static FileBackedGenomeLocProcessingTracker createFileBacked(File sharedFile, GenomeLocParser parser, boolean blockP, boolean useFileLockToo, PrintStream status) {
|
||||||
try {
|
|
||||||
//logger.warn("Creating file backed GLPT at " + sharedFile);
|
//logger.warn("Creating file backed GLPT at " + sharedFile);
|
||||||
RandomAccessFile raFile = new RandomAccessFile(sharedFile, "rws");
|
ClosableReentrantLock lock = useFileLockToo ? new SharedFileThreadSafeLock(sharedFile, blockP) : new ClosableReentrantLock();
|
||||||
ClosableReentrantLock lock = useFileLockToo ? new SharedFileThreadSafeLock(raFile.getChannel()) : new ClosableReentrantLock();
|
return new FileBackedGenomeLocProcessingTracker(sharedFile, parser, lock, status);
|
||||||
return new FileBackedGenomeLocProcessingTracker(sharedFile, raFile, parser, lock);
|
|
||||||
}
|
|
||||||
catch (FileNotFoundException e) {
|
|
||||||
throw new UserException.CouldNotCreateOutputFile(sharedFile, e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------
|
||||||
|
|
@ -66,9 +70,11 @@ public abstract class GenomeLocProcessingTracker {
|
||||||
// Creating ProcessingTrackers
|
// Creating ProcessingTrackers
|
||||||
//
|
//
|
||||||
// --------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------
|
||||||
public GenomeLocProcessingTracker(ClosableReentrantLock lock) {
|
public GenomeLocProcessingTracker(ClosableReentrantLock lock, PrintStream status) {
|
||||||
processingLocs = new HashMap<GenomeLoc, ProcessingLoc>();
|
this.processingLocs = new HashMap<GenomeLoc, ProcessingLoc>();
|
||||||
|
this.status = status;
|
||||||
this.lock = lock;
|
this.lock = lock;
|
||||||
|
printStatusHeader();
|
||||||
}
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------
|
||||||
|
|
@ -84,16 +90,16 @@ public abstract class GenomeLocProcessingTracker {
|
||||||
* @param loc
|
* @param loc
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public final boolean locIsOwned(GenomeLoc loc) {
|
public final boolean locIsOwned(GenomeLoc loc, String id) {
|
||||||
return findOwner(loc) != null;
|
return findOwner(loc, id) != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public final ProcessingLoc findOwner(GenomeLoc loc) {
|
protected final ProcessingLoc findOwner(GenomeLoc loc, String id) {
|
||||||
// fast path to check if we already have the existing genome loc in memory for ownership claims
|
// fast path to check if we already have the existing genome loc in memory for ownership claims
|
||||||
// getProcessingLocs() may be expensive [reading from disk, for example] so we shouldn't call it
|
// getProcessingLocs() may be expensive [reading from disk, for example] so we shouldn't call it
|
||||||
// unless necessary
|
// unless necessary
|
||||||
ProcessingLoc x = findOwnerInMap(loc, processingLocs);
|
ProcessingLoc x = findOwnerInMap(loc, processingLocs);
|
||||||
return x == null ? findOwnerInMap(loc, updateLocs()) : x;
|
return x == null ? findOwnerInMap(loc, updateLocs(id)) : x;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -110,19 +116,19 @@ public abstract class GenomeLocProcessingTracker {
|
||||||
public final ProcessingLoc claimOwnership(GenomeLoc loc, String myName) {
|
public final ProcessingLoc claimOwnership(GenomeLoc loc, String myName) {
|
||||||
// processingLocs is a shared memory synchronized object, and this
|
// processingLocs is a shared memory synchronized object, and this
|
||||||
// method is synchronized, so we can just do our processing
|
// method is synchronized, so we can just do our processing
|
||||||
lock();
|
lock(myName);
|
||||||
try {
|
try {
|
||||||
ProcessingLoc owner = findOwner(loc);
|
ProcessingLoc owner = findOwner(loc, myName);
|
||||||
|
|
||||||
if ( owner == null ) { // we are unowned
|
if ( owner == null ) { // we are unowned
|
||||||
owner = new ProcessingLoc(loc, myName);
|
owner = new ProcessingLoc(loc, myName);
|
||||||
registerNewLocsWithTimers(Arrays.asList(owner));
|
registerNewLocsWithTimers(Arrays.asList(owner), myName);
|
||||||
}
|
}
|
||||||
|
|
||||||
return owner;
|
return owner;
|
||||||
//logger.warn(String.format("%s.claimOwnership(%s,%s) => %s", this, loc, myName, owner));
|
//logger.warn(String.format("%s.claimOwnership(%s,%s) => %s", this, loc, myName, owner));
|
||||||
} finally {
|
} finally {
|
||||||
unlock();
|
unlock(myName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -154,7 +160,7 @@ public abstract class GenomeLocProcessingTracker {
|
||||||
private final int cacheSize;
|
private final int cacheSize;
|
||||||
|
|
||||||
public OwnershipIterator(Iterator<T> subit, String myName) {
|
public OwnershipIterator(Iterator<T> subit, String myName) {
|
||||||
this(subit, myName, 10);
|
this(subit, myName, DEFAULT_OWNERSHIP_ITERATOR_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
public OwnershipIterator(Iterator<T> subit, String myName, int cacheSize) {
|
public OwnershipIterator(Iterator<T> subit, String myName, int cacheSize) {
|
||||||
|
|
@ -185,10 +191,11 @@ public abstract class GenomeLocProcessingTracker {
|
||||||
return elt;
|
return elt;
|
||||||
else {
|
else {
|
||||||
// cache is empty, we need to fill up the cache and return the first element of the queue
|
// cache is empty, we need to fill up the cache and return the first element of the queue
|
||||||
lock();
|
lock(myName);
|
||||||
try {
|
try {
|
||||||
|
|
||||||
// read once the database of owners at the start
|
// read once the database of owners at the start
|
||||||
updateLocs();
|
updateLocs(myName);
|
||||||
|
|
||||||
boolean done = false;
|
boolean done = false;
|
||||||
Queue<ProcessingLoc> pwns = new LinkedList<ProcessingLoc>(); // ;-)
|
Queue<ProcessingLoc> pwns = new LinkedList<ProcessingLoc>(); // ;-)
|
||||||
|
|
@ -208,7 +215,7 @@ public abstract class GenomeLocProcessingTracker {
|
||||||
// if not, we continue our search
|
// if not, we continue our search
|
||||||
}
|
}
|
||||||
|
|
||||||
registerNewLocsWithTimers(pwns);
|
registerNewLocsWithTimers(pwns, myName);
|
||||||
|
|
||||||
// we've either filled up the cache or run out of elements. Either way we return
|
// we've either filled up the cache or run out of elements. Either way we return
|
||||||
// the first element of the cache. If the cache is empty, we return null here.
|
// the first element of the cache. If the cache is empty, we return null here.
|
||||||
|
|
@ -217,7 +224,7 @@ public abstract class GenomeLocProcessingTracker {
|
||||||
|
|
||||||
return cache.poll();
|
return cache.poll();
|
||||||
} finally {
|
} finally {
|
||||||
unlock();
|
unlock(myName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -240,12 +247,12 @@ public abstract class GenomeLocProcessingTracker {
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
protected final Collection<ProcessingLoc> getProcessingLocs() {
|
protected final Collection<ProcessingLoc> getProcessingLocs(String myName) {
|
||||||
return updateLocs().values();
|
return updateLocs(myName).values();
|
||||||
}
|
}
|
||||||
|
|
||||||
private final Map<GenomeLoc, ProcessingLoc> updateLocs() {
|
private final Map<GenomeLoc, ProcessingLoc> updateLocs(String myName) {
|
||||||
lock();
|
lock(myName);
|
||||||
try {
|
try {
|
||||||
readTimer.restart();
|
readTimer.restart();
|
||||||
for ( ProcessingLoc p : readNewLocs() )
|
for ( ProcessingLoc p : readNewLocs() )
|
||||||
|
|
@ -254,11 +261,11 @@ public abstract class GenomeLocProcessingTracker {
|
||||||
nReads++;
|
nReads++;
|
||||||
return processingLocs;
|
return processingLocs;
|
||||||
} finally {
|
} finally {
|
||||||
unlock();
|
unlock(myName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected final void registerNewLocsWithTimers(Collection<ProcessingLoc> plocs) {
|
protected final void registerNewLocsWithTimers(Collection<ProcessingLoc> plocs, String myName) {
|
||||||
writeTimer.restart();
|
writeTimer.restart();
|
||||||
registerNewLocs(plocs);
|
registerNewLocs(plocs);
|
||||||
nWrites++;
|
nWrites++;
|
||||||
|
|
@ -270,17 +277,37 @@ public abstract class GenomeLocProcessingTracker {
|
||||||
// Low-level accessors / manipulators and utility functions
|
// Low-level accessors / manipulators and utility functions
|
||||||
//
|
//
|
||||||
// --------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------
|
||||||
|
private final boolean hasStatus() {
|
||||||
private final void lock() {
|
return status != null;
|
||||||
lockWaitTimer.restart();
|
|
||||||
if ( ! lock.isHeldByCurrentThread() )
|
|
||||||
nLocks++;
|
|
||||||
lock.lock();
|
|
||||||
lockWaitTimer.stop();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private final void unlock() {
|
private final void printStatusHeader() {
|
||||||
|
if ( hasStatus() ) status.printf("process.id\thr.time\ttime\tstate%n");
|
||||||
|
}
|
||||||
|
|
||||||
|
private final void printStatus(String id, long machineTime, String state) {
|
||||||
|
// prints a line like processID human-readable-time machine-time state
|
||||||
|
if ( hasStatus() ) {
|
||||||
|
status.printf("%s\t%s\t%d\t%s%n", id, STATUS_FORMAT.format(machineTime), machineTime, state);
|
||||||
|
status.flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final void lock(String id) {
|
||||||
|
lockWaitTimer.restart();
|
||||||
|
boolean hadLock = lock.ownsLock();
|
||||||
|
if ( ! hadLock ) {
|
||||||
|
nLocks++;
|
||||||
|
printStatus(id, lockWaitTimer.currentTime(), GOING_FOR_LOCK);
|
||||||
|
}
|
||||||
|
lock.lock();
|
||||||
|
lockWaitTimer.stop();
|
||||||
|
if ( ! hadLock ) printStatus(id, lockWaitTimer.currentTime(), HAVE_LOCK);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final void unlock(String id) {
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
|
if ( ! lock.ownsLock() ) printStatus(id, lockWaitTimer.currentTime(), RUNNING);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected final static ProcessingLoc findOwnerInCollection(GenomeLoc loc, Collection<ProcessingLoc> locs) {
|
protected final static ProcessingLoc findOwnerInCollection(GenomeLoc loc, Collection<ProcessingLoc> locs) {
|
||||||
|
|
@ -312,8 +339,8 @@ public abstract class GenomeLocProcessingTracker {
|
||||||
|
|
||||||
protected void close() {
|
protected void close() {
|
||||||
lock.close();
|
lock.close();
|
||||||
logger.warn("Locking events: " + nLocks);
|
if ( hasStatus() ) status.close();
|
||||||
// by default we don't do anything
|
//logger.warn("Locking events: " + nLocks);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract void registerNewLocs(Collection<ProcessingLoc> plocs);
|
protected abstract void registerNewLocs(Collection<ProcessingLoc> plocs);
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
public class NoOpGenomeLocProcessingTracker extends GenomeLocProcessingTracker {
|
public class NoOpGenomeLocProcessingTracker extends GenomeLocProcessingTracker {
|
||||||
protected NoOpGenomeLocProcessingTracker() {
|
protected NoOpGenomeLocProcessingTracker() {
|
||||||
super(new ClosableReentrantLock()); // todo -- should be lighter weight
|
super(new ClosableReentrantLock(), null); // todo -- should be lighter weight
|
||||||
}
|
}
|
||||||
|
|
||||||
// @Override
|
// @Override
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,10 @@ import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.RandomAccessFile;
|
||||||
import java.nio.channels.*;
|
import java.nio.channels.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -20,28 +23,136 @@ public class SharedFileThreadSafeLock extends ClosableReentrantLock {
|
||||||
private static Logger logger = Logger.getLogger(SharedFileThreadSafeLock.class);
|
private static Logger logger = Logger.getLogger(SharedFileThreadSafeLock.class);
|
||||||
private static final boolean DEBUG = false;
|
private static final boolean DEBUG = false;
|
||||||
|
|
||||||
|
// 100 seconds of trying -> failure
|
||||||
|
private static final int DEFAULT_N_TRIES = 1000;
|
||||||
|
private static final long DEFAULT_MILLISECONDS_PER_TRY = 100;
|
||||||
|
|
||||||
|
/** The file we are locking */
|
||||||
|
private final File file;
|
||||||
|
|
||||||
/** The file lock itself that guards the file */
|
/** The file lock itself that guards the file */
|
||||||
FileLock fileLock;
|
FileLock fileLock;
|
||||||
|
|
||||||
/** the channel object that 'owns' the file lock, and we use to request the lock */
|
/** the channel object that 'owns' the file lock, and we use to request the lock */
|
||||||
FileChannel channel;
|
FileChannel channel;
|
||||||
int fileLockReentrantCounter = 0;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a SharedFileThreadSafeLock object locking the file associated with channel
|
* A counter that indicates the number of 'locks' on this file.
|
||||||
* @param channel
|
* If locks == 2, then two unlocks are required
|
||||||
|
* before any resources are freed.
|
||||||
*/
|
*/
|
||||||
public SharedFileThreadSafeLock(FileChannel channel) {
|
int fileLockReentrantCounter = 0;
|
||||||
|
|
||||||
|
// type of locking
|
||||||
|
private final boolean blockOnLock;
|
||||||
|
private final int nRetries;
|
||||||
|
private final long milliSecPerTry;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a SharedFileThreadSafeLock object locking the file
|
||||||
|
* @param file
|
||||||
|
*/
|
||||||
|
public SharedFileThreadSafeLock(File file, boolean blockOnLock, int nRetries, long milliSecPerTry) {
|
||||||
super();
|
super();
|
||||||
this.channel = channel;
|
this.file = file;
|
||||||
|
this.blockOnLock = blockOnLock;
|
||||||
|
this.nRetries = nRetries;
|
||||||
|
this.milliSecPerTry = milliSecPerTry;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SharedFileThreadSafeLock(File file, boolean blockOnLock) {
|
||||||
|
this(file, blockOnLock, DEFAULT_N_TRIES, DEFAULT_MILLISECONDS_PER_TRY);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private FileChannel getChannel() {
|
||||||
|
if ( DEBUG ) logger.warn(" Get channel: " + Thread.currentThread().getName() + " channel = " + channel);
|
||||||
|
if ( channel == null ) {
|
||||||
|
try {
|
||||||
|
if ( DEBUG ) logger.warn(" opening channel: " + Thread.currentThread().getName());
|
||||||
|
this.channel = new RandomAccessFile(file, "rw").getChannel();
|
||||||
|
if ( DEBUG ) logger.warn(" opened channel: " + Thread.currentThread().getName());
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
throw new UserException.CouldNotCreateOutputFile(file, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.channel;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void closeChannel() {
|
||||||
|
try {
|
||||||
|
if ( channel != null ) {
|
||||||
|
channel.close();
|
||||||
|
channel = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (IOException e) {
|
||||||
|
throw new UserException("Count not close channel associated with file" + file, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() {
|
public void close() {
|
||||||
try {
|
closeChannel();
|
||||||
channel.close();
|
|
||||||
}
|
}
|
||||||
catch (IOException e) {
|
|
||||||
throw new UserException("Count not close channel " + channel, e);
|
public boolean ownsLock() {
|
||||||
|
return super.isHeldByCurrentThread() && fileLockReentrantCounter > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// workhorse routines -- acquiring file locks
|
||||||
|
//
|
||||||
|
// ------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private void acquireFileLock() {
|
||||||
|
try {
|
||||||
|
// Precondition -- lock is always null while we don't have a lock
|
||||||
|
if ( fileLock != null )
|
||||||
|
throw new ReviewedStingException("BUG: lock() function called when a lock already is owned!");
|
||||||
|
|
||||||
|
if ( blockOnLock ) {
|
||||||
|
//
|
||||||
|
// blocking code
|
||||||
|
//
|
||||||
|
fileLock = getChannel().lock();
|
||||||
|
} else {
|
||||||
|
//
|
||||||
|
// polling code
|
||||||
|
//
|
||||||
|
int i = 0;
|
||||||
|
for ( ; fileLock == null && i < nRetries; i++ ) {
|
||||||
|
fileLock = getChannel().tryLock();
|
||||||
|
if ( fileLock == null ) {
|
||||||
|
try {
|
||||||
|
//logger.warn("tryLock failed on try " + i + ", waiting " + milliSecPerTry + " millseconds for retry");
|
||||||
|
Thread.sleep(milliSecPerTry);
|
||||||
|
} catch ( InterruptedException e ) {
|
||||||
|
throw new UserException("SharedFileThreadSafeLock interrupted during wait for file lock", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( i > 1 ) logger.warn("tryLock required " + i + " tries before completing, waited " + i * milliSecPerTry + " millseconds");
|
||||||
|
|
||||||
|
if ( fileLock == null ) {
|
||||||
|
// filelock == null -> we never managed to acquire the lock!
|
||||||
|
throw new UserException("SharedFileThreadSafeLock failed to obtain the lock after " + nRetries + " attempts");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( DEBUG ) logger.warn(" Have filelock: " + Thread.currentThread().getName());
|
||||||
|
} catch (ClosedChannelException e) {
|
||||||
|
throw new ReviewedStingException("Unable to lock file because the file channel is closed. " + file, e);
|
||||||
|
} catch (FileLockInterruptionException e) {
|
||||||
|
throw new ReviewedStingException("File lock interrupted", e);
|
||||||
|
} catch (NonWritableChannelException e) {
|
||||||
|
throw new ReviewedStingException("File channel not writable", e);
|
||||||
|
} catch (OverlappingFileLockException e) {
|
||||||
|
// this only happens when multiple threads are running, and one is waiting
|
||||||
|
// for the lock above and we come here.
|
||||||
|
throw new ReviewedStingException("BUG: Failed to acquire lock, should never happen.");
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ReviewedStingException("Coordination file could not be created because a lock could not be obtained.", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -61,27 +172,9 @@ public class SharedFileThreadSafeLock extends ClosableReentrantLock {
|
||||||
} else {
|
} else {
|
||||||
super.lock();
|
super.lock();
|
||||||
if ( DEBUG ) logger.warn(" Have thread-lock, going for filelock: " + Thread.currentThread().getName());
|
if ( DEBUG ) logger.warn(" Have thread-lock, going for filelock: " + Thread.currentThread().getName());
|
||||||
try {
|
|
||||||
// Precondition -- lock is always null while we don't have a lock
|
|
||||||
if ( fileLock != null )
|
|
||||||
throw new ReviewedStingException("BUG: lock() function called when a lock already is owned!");
|
|
||||||
if ( fileLockReentrantCounter == 0 )
|
if ( fileLockReentrantCounter == 0 )
|
||||||
fileLock = channel.lock();
|
acquireFileLock();
|
||||||
fileLockReentrantCounter++;
|
fileLockReentrantCounter++;
|
||||||
if ( DEBUG ) logger.warn(" Have filelock: " + Thread.currentThread().getName());
|
|
||||||
} catch (ClosedChannelException e) {
|
|
||||||
throw new ReviewedStingException("Unable to lock file because the file channel is closed. " + channel, e);
|
|
||||||
} catch (FileLockInterruptionException e) {
|
|
||||||
throw new ReviewedStingException("File lock interrupted", e);
|
|
||||||
} catch (NonWritableChannelException e) {
|
|
||||||
throw new ReviewedStingException("File channel not writable", e);
|
|
||||||
} catch (OverlappingFileLockException e) {
|
|
||||||
// this only happens when multiple threads are running, and one is waiting
|
|
||||||
// for the lock above and we come here.
|
|
||||||
throw new ReviewedStingException("BUG: Failed to acquire lock, should never happen.");
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new ReviewedStingException("Coordination file could not be created because a lock could not be obtained.", e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -97,13 +190,14 @@ public class SharedFileThreadSafeLock extends ClosableReentrantLock {
|
||||||
|
|
||||||
if ( DEBUG ) logger.warn(" going to release filelock: " + Thread.currentThread().getName());
|
if ( DEBUG ) logger.warn(" going to release filelock: " + Thread.currentThread().getName());
|
||||||
fileLock.release();
|
fileLock.release();
|
||||||
|
closeChannel();
|
||||||
fileLock = null;
|
fileLock = null;
|
||||||
if ( DEBUG ) logger.warn(" released filelock: " + Thread.currentThread().getName());
|
if ( DEBUG ) logger.warn(" released filelock: " + Thread.currentThread().getName());
|
||||||
} else {
|
} else {
|
||||||
if ( DEBUG ) logger.warn(" skipping filelock release, reenterring unlock via multiple threads " + Thread.currentThread().getName());
|
if ( DEBUG ) logger.warn(" skipping filelock release, reenterring unlock via multiple threads " + Thread.currentThread().getName());
|
||||||
}
|
}
|
||||||
} catch ( IOException e ) {
|
} catch ( IOException e ) {
|
||||||
throw new ReviewedStingException("Could not free lock on file " + channel, e);
|
throw new ReviewedStingException("Could not free lock on file " + file, e);
|
||||||
} finally {
|
} finally {
|
||||||
if ( DEBUG ) logger.warn(" going to release threadlock: " + Thread.currentThread().getName());
|
if ( DEBUG ) logger.warn(" going to release threadlock: " + Thread.currentThread().getName());
|
||||||
super.unlock();
|
super.unlock();
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.threading;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
|
import java.io.PrintStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -15,7 +16,11 @@ public class SharedMemoryGenomeLocProcessingTracker extends GenomeLocProcessingT
|
||||||
private List<ProcessingLoc> newPLocs = new ArrayList<ProcessingLoc>();
|
private List<ProcessingLoc> newPLocs = new ArrayList<ProcessingLoc>();
|
||||||
|
|
||||||
protected SharedMemoryGenomeLocProcessingTracker(ClosableReentrantLock lock) {
|
protected SharedMemoryGenomeLocProcessingTracker(ClosableReentrantLock lock) {
|
||||||
super(lock);
|
super(lock, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SharedMemoryGenomeLocProcessingTracker(ClosableReentrantLock lock, PrintStream status) {
|
||||||
|
super(lock, status);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -96,8 +96,6 @@ public class GenomeLocProcessingTrackerUnitTest extends BaseTest {
|
||||||
List<TestTarget> params = new ArrayList<TestTarget>();
|
List<TestTarget> params = new ArrayList<TestTarget>();
|
||||||
|
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
// for ( int nShard : Arrays.asList(10,100,1000) ) {
|
|
||||||
// for ( int shardSize : Arrays.asList(10) ) {
|
|
||||||
for ( int nShard : nShards ) {
|
for ( int nShard : nShards ) {
|
||||||
for ( int shardSize : shardSizes ) {
|
for ( int shardSize : shardSizes ) {
|
||||||
// shared mem -- canonical implementation
|
// shared mem -- canonical implementation
|
||||||
|
|
@ -108,7 +106,7 @@ public class GenomeLocProcessingTrackerUnitTest extends BaseTest {
|
||||||
|
|
||||||
final File file1 = new File(String.format("%s_ThreadSafeFileBacked_%d_%d", FILE_ROOT, counter++, nShard, shardSize));
|
final File file1 = new File(String.format("%s_ThreadSafeFileBacked_%d_%d", FILE_ROOT, counter++, nShard, shardSize));
|
||||||
params.add(new TestTarget("ThreadSafeFileBacked", nShard, shardSize) {
|
params.add(new TestTarget("ThreadSafeFileBacked", nShard, shardSize) {
|
||||||
GenomeLocProcessingTracker tracker = GenomeLocProcessingTracker.createFileBackedThreaded(file1, genomeLocParser);
|
GenomeLocProcessingTracker tracker = GenomeLocProcessingTracker.createFileBackedThreaded(file1, genomeLocParser, null);
|
||||||
public GenomeLocProcessingTracker getTracker() { return tracker; }
|
public GenomeLocProcessingTracker getTracker() { return tracker; }
|
||||||
public void init() {
|
public void init() {
|
||||||
if ( file1.exists() )
|
if ( file1.exists() )
|
||||||
|
|
@ -116,9 +114,10 @@ public class GenomeLocProcessingTrackerUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
final File file2 = new File(String.format("%s_ThreadSafeFileLockingFileBacked_%d_%d", FILE_ROOT, counter++, nShard, shardSize));
|
for ( final boolean blocking : Arrays.asList(true, false) ) {
|
||||||
params.add(new TestTarget("ThreadSafeFileLockingFileBacked", nShard, shardSize) {
|
final File file2 = new File(String.format("%s_ThreadSafeFileLockingFile_blocking%b_%d_%d", FILE_ROOT, blocking, counter++, nShard, shardSize));
|
||||||
GenomeLocProcessingTracker tracker = GenomeLocProcessingTracker.createFileBackedDistributed(file2, genomeLocParser);
|
params.add(new TestTarget("ThreadSafeFileLockingFileBackedBlocking" + blocking, nShard, shardSize) {
|
||||||
|
GenomeLocProcessingTracker tracker = GenomeLocProcessingTracker.createFileBackedDistributed(file2, genomeLocParser, blocking, null);
|
||||||
public GenomeLocProcessingTracker getTracker() { return tracker; }
|
public GenomeLocProcessingTracker getTracker() { return tracker; }
|
||||||
public void init() {
|
public void init() {
|
||||||
if ( file2.exists() )
|
if ( file2.exists() )
|
||||||
|
|
@ -127,6 +126,7 @@ public class GenomeLocProcessingTrackerUnitTest extends BaseTest {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
List<Object[]> params2 = new ArrayList<Object[]>();
|
List<Object[]> params2 = new ArrayList<Object[]>();
|
||||||
for ( TestTarget x : params ) params2.add(new Object[]{x});
|
for ( TestTarget x : params ) params2.add(new Object[]{x});
|
||||||
|
|
@ -149,7 +149,7 @@ public class GenomeLocProcessingTrackerUnitTest extends BaseTest {
|
||||||
GenomeLoc loc = genomeLocParser.createGenomeLoc(chr1, start, start +1);
|
GenomeLoc loc = genomeLocParser.createGenomeLoc(chr1, start, start +1);
|
||||||
ProcessingLoc ploc = tracker.claimOwnership(loc, NAME_ONE);
|
ProcessingLoc ploc = tracker.claimOwnership(loc, NAME_ONE);
|
||||||
Assert.assertTrue(ploc.isOwnedBy(NAME_ONE));
|
Assert.assertTrue(ploc.isOwnedBy(NAME_ONE));
|
||||||
Assert.assertEquals(tracker.getProcessingLocs().size(), 0);
|
Assert.assertEquals(tracker.getProcessingLocs(NAME_ONE).size(), 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -164,8 +164,8 @@ public class GenomeLocProcessingTrackerUnitTest extends BaseTest {
|
||||||
for ( GenomeLoc shard : shards ) {
|
for ( GenomeLoc shard : shards ) {
|
||||||
counter++;
|
counter++;
|
||||||
|
|
||||||
Assert.assertNull(tracker.findOwner(shard));
|
Assert.assertNull(tracker.findOwner(shard, NAME_ONE));
|
||||||
Assert.assertFalse(tracker.locIsOwned(shard));
|
Assert.assertFalse(tracker.locIsOwned(shard, NAME_ONE));
|
||||||
|
|
||||||
ProcessingLoc proc = tracker.claimOwnership(shard,NAME_ONE);
|
ProcessingLoc proc = tracker.claimOwnership(shard,NAME_ONE);
|
||||||
Assert.assertNotNull(proc);
|
Assert.assertNotNull(proc);
|
||||||
|
|
@ -173,10 +173,10 @@ public class GenomeLocProcessingTrackerUnitTest extends BaseTest {
|
||||||
Assert.assertNotNull(proc.getOwner());
|
Assert.assertNotNull(proc.getOwner());
|
||||||
Assert.assertEquals(proc.getLocation(), shard);
|
Assert.assertEquals(proc.getLocation(), shard);
|
||||||
Assert.assertEquals(proc.getOwner(), NAME_ONE);
|
Assert.assertEquals(proc.getOwner(), NAME_ONE);
|
||||||
Assert.assertEquals(tracker.findOwner(shard), proc);
|
Assert.assertEquals(tracker.findOwner(shard, NAME_ONE), proc);
|
||||||
Assert.assertTrue(tracker.locIsOwned(shard));
|
Assert.assertTrue(tracker.locIsOwned(shard, NAME_ONE));
|
||||||
Assert.assertNotNull(tracker.getProcessingLocs());
|
Assert.assertNotNull(tracker.getProcessingLocs(NAME_ONE));
|
||||||
Assert.assertEquals(tracker.getProcessingLocs().size(), counter);
|
Assert.assertEquals(tracker.getProcessingLocs(NAME_ONE).size(), counter);
|
||||||
|
|
||||||
ProcessingLoc badClaimAttempt = tracker.claimOwnership(shard,NAME_TWO);
|
ProcessingLoc badClaimAttempt = tracker.claimOwnership(shard,NAME_TWO);
|
||||||
Assert.assertFalse(badClaimAttempt.getOwner().equals(NAME_TWO));
|
Assert.assertFalse(badClaimAttempt.getOwner().equals(NAME_TWO));
|
||||||
|
|
@ -211,7 +211,7 @@ public class GenomeLocProcessingTrackerUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(nFound, toFind.size(), "Didn't find all of the available shards");
|
Assert.assertEquals(nFound, toFind.size(), "Didn't find all of the available shards");
|
||||||
} else {
|
} else {
|
||||||
nFound++;
|
nFound++;
|
||||||
ProcessingLoc proc = tracker.findOwner(shard);
|
ProcessingLoc proc = tracker.findOwner(shard, NAME_ONE);
|
||||||
|
|
||||||
Assert.assertTrue(proc.isOwnedBy(NAME_ONE));
|
Assert.assertTrue(proc.isOwnedBy(NAME_ONE));
|
||||||
Assert.assertTrue(! markedShards.contains(shard), "Ran process was already marked!");
|
Assert.assertTrue(! markedShards.contains(shard), "Ran process was already marked!");
|
||||||
|
|
@ -246,7 +246,7 @@ public class GenomeLocProcessingTrackerUnitTest extends BaseTest {
|
||||||
Assert.assertTrue(markedShards.contains(shard), "Unran process wasn't marked");
|
Assert.assertTrue(markedShards.contains(shard), "Unran process wasn't marked");
|
||||||
|
|
||||||
if ( ! markedShards.contains(shard) ) {
|
if ( ! markedShards.contains(shard) ) {
|
||||||
Assert.assertEquals(tracker.findOwner(shard), proc);
|
Assert.assertEquals(tracker.findOwner(shard, NAME_ONE), proc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -357,12 +357,12 @@ public class GenomeLocProcessingTrackerUnitTest extends BaseTest {
|
||||||
assertAllThreadsFinished(results);
|
assertAllThreadsFinished(results);
|
||||||
|
|
||||||
// we ran everything
|
// we ran everything
|
||||||
Assert.assertEquals(tracker.getProcessingLocs().size(), shards.size(), "Not all shards were run");
|
Assert.assertEquals(tracker.getProcessingLocs(NAME_ONE).size(), shards.size(), "Not all shards were run");
|
||||||
|
|
||||||
for ( GenomeLoc shard : shards ) {
|
for ( GenomeLoc shard : shards ) {
|
||||||
Assert.assertTrue(tracker.locIsOwned(shard), "Unowned shard");
|
Assert.assertTrue(tracker.locIsOwned(shard, NAME_ONE), "Unowned shard");
|
||||||
|
|
||||||
ProcessingLoc proc = tracker.findOwner(shard);
|
ProcessingLoc proc = tracker.findOwner(shard, NAME_ONE);
|
||||||
Assert.assertNotNull(proc, "Proc was null");
|
Assert.assertNotNull(proc, "Proc was null");
|
||||||
|
|
||||||
Assert.assertNotNull(proc.getOwner(), "Owner was null");
|
Assert.assertNotNull(proc.getOwner(), "Owner was null");
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
#!/bin/tcsh
|
#!/bin/tcsh
|
||||||
|
|
||||||
setenv HERE "java tribble"
|
setenv HERE "java tribble scala analysis"
|
||||||
setenv THERE \~/dev/GenomeAnalysisTKFromLaptop/trunk
|
setenv THERE \~/dev/GenomeAnalysisTKFromLaptop/trunk
|
||||||
|
|
||||||
rsync -e ssh -aCvz $HERE depristo@gsa1:$THERE
|
rsync -e ssh -aCvz $HERE depristo@gsa1:$THERE
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue