Merge branch 'master' of ssh://copper.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
d1aa5204d7
2
ivy.xml
2
ivy.xml
|
|
@ -72,7 +72,7 @@
|
|||
<dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>
|
||||
|
||||
<!-- Dependencies for amazon.com S3 support -->
|
||||
<dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.0"/>
|
||||
<dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.1"/>
|
||||
|
||||
<!-- Dependencies for GridEngine -->
|
||||
<dependency org="net.sf.gridscheduler" name="drmaa" rev="latest.integration"/>
|
||||
|
|
|
|||
|
|
@ -264,22 +264,8 @@ public class GATKRunReport {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens the destination file and writes a gzipped version of the XML report there.
|
||||
*
|
||||
* @param destination
|
||||
* @throws IOException
|
||||
*/
|
||||
private void postReportToFile(File destination) throws IOException {
|
||||
BufferedOutputStream out =
|
||||
new BufferedOutputStream(
|
||||
new GZIPOutputStream(
|
||||
new FileOutputStream(destination)));
|
||||
try {
|
||||
postReportToStream(out);
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
private final String getKey() {
|
||||
return getID() + ".report.xml.gz";
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -288,16 +274,21 @@ public class GATKRunReport {
|
|||
* That is, postReport() is guarenteed not to fail for any reason.
|
||||
*/
|
||||
private File postReportToLocalDisk(File rootDir) {
|
||||
String filename = getID() + ".report.xml.gz";
|
||||
File file = new File(rootDir, filename);
|
||||
final String filename = getKey();
|
||||
final File destination = new File(rootDir, filename);
|
||||
|
||||
try {
|
||||
postReportToFile(file);
|
||||
logger.debug("Wrote report to " + file);
|
||||
return file;
|
||||
final BufferedOutputStream out = new BufferedOutputStream(
|
||||
new GZIPOutputStream(
|
||||
new FileOutputStream(destination)));
|
||||
postReportToStream(out);
|
||||
out.close();
|
||||
logger.debug("Wrote report to " + destination);
|
||||
return destination;
|
||||
} catch ( Exception e ) {
|
||||
// we catch everything, and no matter what eat the error
|
||||
exceptDuringRunReport("Couldn't read report file", e);
|
||||
file.delete();
|
||||
destination.delete();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -305,42 +296,46 @@ public class GATKRunReport {
|
|||
private void postReportToAWSS3() {
|
||||
// modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html
|
||||
this.hostName = Utils.resolveHostname(); // we want to fill in the host name
|
||||
File localFile = postReportToLocalDisk(new File("./"));
|
||||
logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
|
||||
if ( localFile != null ) { // we succeeded in creating the local file
|
||||
localFile.deleteOnExit();
|
||||
try {
|
||||
// stop us from printing the annoying, and meaningless, mime types warning
|
||||
Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
|
||||
mimeTypeLogger.setLevel(Level.FATAL);
|
||||
final String key = getKey();
|
||||
logger.debug("Generating GATK report to AWS S3 with key " + key);
|
||||
try {
|
||||
// create an byte output stream so we can capture the output as a byte[]
|
||||
final ByteArrayOutputStream byteStream = new ByteArrayOutputStream(8096);
|
||||
final OutputStream outputStream = new GZIPOutputStream(byteStream);
|
||||
postReportToStream(outputStream);
|
||||
outputStream.close();
|
||||
final byte[] report = byteStream.toByteArray();
|
||||
|
||||
// Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials
|
||||
// are stored in an AWSCredentials object:
|
||||
// stop us from printing the annoying, and meaningless, mime types warning
|
||||
Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
|
||||
mimeTypeLogger.setLevel(Level.FATAL);
|
||||
|
||||
// IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket
|
||||
String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user
|
||||
String awsSecretKey = "uQLTduhK6Gy8mbOycpoZIxr8ZoVj1SQaglTWjpYA"; // GATK AWS user
|
||||
AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey);
|
||||
// Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials
|
||||
// are stored in an AWSCredentials object:
|
||||
|
||||
// To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP
|
||||
// implementation based on HttpClient, as this is the most robust implementation provided with JetS3t.
|
||||
S3Service s3Service = new RestS3Service(awsCredentials);
|
||||
// IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket
|
||||
String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user
|
||||
String awsSecretKey = "uQLTduhK6Gy8mbOycpoZIxr8ZoVj1SQaglTWjpYA"; // GATK AWS user
|
||||
AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey);
|
||||
|
||||
// Create an S3Object based on a file, with Content-Length set automatically and
|
||||
// Content-Type set based on the file's extension (using the Mimetypes utility class)
|
||||
S3Object fileObject = new S3Object(localFile);
|
||||
//logger.info("Created S3Object" + fileObject);
|
||||
//logger.info("Uploading " + localFile + " to AWS bucket");
|
||||
S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
|
||||
logger.debug("Uploaded to AWS: " + s3Object);
|
||||
logger.info("Uploaded run statistics report to AWS S3");
|
||||
} catch ( S3ServiceException e ) {
|
||||
exceptDuringRunReport("S3 exception occurred", e);
|
||||
} catch ( NoSuchAlgorithmException e ) {
|
||||
exceptDuringRunReport("Couldn't calculate MD5", e);
|
||||
} catch ( IOException e ) {
|
||||
exceptDuringRunReport("Couldn't read report file", e);
|
||||
}
|
||||
// To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP
|
||||
// implementation based on HttpClient, as this is the most robust implementation provided with JetS3t.
|
||||
S3Service s3Service = new RestS3Service(awsCredentials);
|
||||
|
||||
// Create an S3Object based on a file, with Content-Length set automatically and
|
||||
// Content-Type set based on the file's extension (using the Mimetypes utility class)
|
||||
S3Object fileObject = new S3Object(key, report);
|
||||
//logger.info("Created S3Object" + fileObject);
|
||||
//logger.info("Uploading " + localFile + " to AWS bucket");
|
||||
S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
|
||||
logger.debug("Uploaded to AWS: " + s3Object);
|
||||
logger.info("Uploaded run statistics report to AWS S3");
|
||||
} catch ( S3ServiceException e ) {
|
||||
exceptDuringRunReport("S3 exception occurred", e);
|
||||
} catch ( NoSuchAlgorithmException e ) {
|
||||
exceptDuringRunReport("Couldn't calculate MD5", e);
|
||||
} catch ( IOException e ) {
|
||||
exceptDuringRunReport("Couldn't read report file", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -63,25 +62,26 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
referenceOrderedDataView = (RodLocusView)locusView;
|
||||
|
||||
// We keep processing while the next reference location is within the interval
|
||||
GenomeLoc prevLoc = null;
|
||||
while( locusView.hasNext() ) {
|
||||
final AlignmentContext locus = locusView.next();
|
||||
GenomeLoc location = locus.getLocation();
|
||||
if(prevLoc != null) {
|
||||
for(int iii = prevLoc.getStart() + 1; iii < location.getStart(); iii++ ) {
|
||||
final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
|
||||
if( initialIntervals.overlaps( fakeLoc ) ) {
|
||||
final double isActiveProb = ( walker.presetActiveRegions == null ? walker.isActive( null, null, null )
|
||||
: ( walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ) );
|
||||
isActiveList.add( isActiveProb );
|
||||
if( firstIsActiveStart == null ) {
|
||||
firstIsActiveStart = fakeLoc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
||||
if ( locus.hasExtendedEventPileup() ) {
|
||||
// if the alignment context we received holds an "extended" pileup (i.e. pileup of insertions/deletions
|
||||
// associated with the current site), we need to update the location. The updated location still starts
|
||||
// at the current genomic position, but it has to span the length of the longest deletion (if any).
|
||||
location = engine.getGenomeLocParser().setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength());
|
||||
|
||||
// it is possible that the new expanded location spans the current shard boundary; the next method ensures
|
||||
// that when it is the case, the reference sequence held by the ReferenceView will be reloaded so that
|
||||
// the view has all the bases we are gonna need. If the location fits within the current view bounds,
|
||||
// the next call will not do anything to the view:
|
||||
referenceView.expandBoundsToAccomodateLoc(location);
|
||||
}
|
||||
|
||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
|
@ -95,7 +95,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
: ( walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0 ) );
|
||||
isActiveList.add( isActiveProb );
|
||||
if( firstIsActiveStart == null ) {
|
||||
firstIsActiveStart = locus.getLocation();
|
||||
firstIsActiveStart = location;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -118,6 +118,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
if( read.getAlignmentStart() < minStart ) { minStart = read.getAlignmentStart(); }
|
||||
}
|
||||
}
|
||||
prevLoc = location;
|
||||
printProgress(dataProvider.getShard(), locus.getLocation());
|
||||
}
|
||||
|
||||
|
|
@ -230,7 +231,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
final int MAX_ACTIVE_REGION = 200;
|
||||
for( int iii = 0; iii < activeProbArray.length; iii++ ) {
|
||||
double maxVal = 0;
|
||||
for( int jjj = Math.max( 0, iii-FILTER_SIZE); jjj < Math.min( activeList.size(), iii+FILTER_SIZE); jjj++ ) {
|
||||
for( int jjj = Math.max(0, iii-FILTER_SIZE); jjj < Math.min(activeList.size(), iii+FILTER_SIZE); jjj++ ) {
|
||||
if( activeProbArray[jjj] > maxVal ) { maxVal = activeProbArray[jjj]; }
|
||||
}
|
||||
filteredProbArray[iii] = maxVal;
|
||||
|
|
@ -241,14 +242,18 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
for(int iii = 1; iii < filteredProbArray.length; iii++ ) {
|
||||
final boolean thisStatus = filteredProbArray[iii] > ACTIVE_PROB_THRESHOLD;
|
||||
if( curStatus != thisStatus || (iii-curStart) > MAX_ACTIVE_REGION ) {
|
||||
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (iii-1)),
|
||||
returnList.add( new ActiveRegion(
|
||||
engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (iii-1)),
|
||||
curStatus, engine.getGenomeLocParser(), activeRegionExtension ) );
|
||||
curStatus = thisStatus;
|
||||
curStart = iii;
|
||||
}
|
||||
}
|
||||
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (filteredProbArray.length-1)),
|
||||
curStatus, engine.getGenomeLocParser(), activeRegionExtension ) );
|
||||
if( curStart != filteredProbArray.length-1 ) {
|
||||
returnList.add( new ActiveRegion(
|
||||
engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (filteredProbArray.length-1)),
|
||||
curStatus, engine.getGenomeLocParser(), activeRegionExtension ) );
|
||||
}
|
||||
return returnList;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ public class QualityUtils {
|
|||
|
||||
private static double qualToErrorProbCache[] = new double[256];
|
||||
static {
|
||||
for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw((byte)i);
|
||||
for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw(i);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -44,15 +44,15 @@ public class QualityUtils {
|
|||
* Convert a quality score to a probability of error. This is the Phred-style
|
||||
* conversion, *not* the Illumina-style conversion (though asymptotically, they're the same).
|
||||
*
|
||||
* @param qual a quality score (0-40)
|
||||
* @return a probability (0.0-1.0)
|
||||
* @param qual a quality score (0 - 255)
|
||||
* @return a probability (0.0 - 1.0)
|
||||
*/
|
||||
static public double qualToErrorProbRaw(byte qual) {
|
||||
static private double qualToErrorProbRaw(int qual) {
|
||||
return Math.pow(10.0, ((double) qual)/-10.0);
|
||||
}
|
||||
|
||||
static public double qualToErrorProb(byte qual) {
|
||||
return qualToErrorProbCache[qual];
|
||||
return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in New Issue