S3 upload now directly creates the XML report in memory and puts that in S3

-- This is a partial fix for the problem with uploading S3 logs reported by Mauricio.  There the problem is that the java.io.tmpdir is not accessible (network just hangs).  Because of that the s3 upload fails because the underlying system uses tmpdir for caching, etc.  As far as I can tell there's no way around this bug -- you cannot overload the java.io.tmpdir programmatically and even if I could what value would we use?  The only solution seems to me is to detect that tmpdir is hanging (how?!) and fail with a meaningful error.
This commit is contained in:
Mark DePristo 2012-01-29 15:14:58 -05:00
parent 0e17cbbce9
commit 3164c8dee5
2 changed files with 50 additions and 55 deletions

View File

@ -72,7 +72,7 @@
<dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/> <dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>
<!-- Dependencies for amazon.com S3 support --> <!-- Dependencies for amazon.com S3 support -->
<dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.0"/> <dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.1"/>
<!-- Dependencies for GridEngine --> <!-- Dependencies for GridEngine -->
<dependency org="net.sf.gridscheduler" name="drmaa" rev="latest.integration"/> <dependency org="net.sf.gridscheduler" name="drmaa" rev="latest.integration"/>

View File

@ -264,22 +264,8 @@ public class GATKRunReport {
} }
} }
/** private final String getKey() {
* Opens the destination file and writes a gzipped version of the XML report there. return getID() + ".report.xml.gz";
*
* @param destination
* @throws IOException
*/
private void postReportToFile(File destination) throws IOException {
BufferedOutputStream out =
new BufferedOutputStream(
new GZIPOutputStream(
new FileOutputStream(destination)));
try {
postReportToStream(out);
} finally {
out.close();
}
} }
/** /**
@ -288,16 +274,21 @@ public class GATKRunReport {
* That is, postReport() is guarenteed not to fail for any reason. * That is, postReport() is guarenteed not to fail for any reason.
*/ */
private File postReportToLocalDisk(File rootDir) { private File postReportToLocalDisk(File rootDir) {
String filename = getID() + ".report.xml.gz"; final String filename = getKey();
File file = new File(rootDir, filename); final File destination = new File(rootDir, filename);
try { try {
postReportToFile(file); final BufferedOutputStream out = new BufferedOutputStream(
logger.debug("Wrote report to " + file); new GZIPOutputStream(
return file; new FileOutputStream(destination)));
postReportToStream(out);
out.close();
logger.debug("Wrote report to " + destination);
return destination;
} catch ( Exception e ) { } catch ( Exception e ) {
// we catch everything, and no matter what eat the error // we catch everything, and no matter what eat the error
exceptDuringRunReport("Couldn't read report file", e); exceptDuringRunReport("Couldn't read report file", e);
file.delete(); destination.delete();
return null; return null;
} }
} }
@ -305,42 +296,46 @@ public class GATKRunReport {
private void postReportToAWSS3() { private void postReportToAWSS3() {
// modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html // modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html
this.hostName = Utils.resolveHostname(); // we want to fill in the host name this.hostName = Utils.resolveHostname(); // we want to fill in the host name
File localFile = postReportToLocalDisk(new File("./")); final String key = getKey();
logger.debug("Generating GATK report to AWS S3 based on local file " + localFile); logger.debug("Generating GATK report to AWS S3 with key " + key);
if ( localFile != null ) { // we succeeded in creating the local file try {
localFile.deleteOnExit(); // create an byte output stream so we can capture the output as a byte[]
try { final ByteArrayOutputStream byteStream = new ByteArrayOutputStream(8096);
// stop us from printing the annoying, and meaningless, mime types warning final OutputStream outputStream = new GZIPOutputStream(byteStream);
Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class); postReportToStream(outputStream);
mimeTypeLogger.setLevel(Level.FATAL); outputStream.close();
final byte[] report = byteStream.toByteArray();
// Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials // stop us from printing the annoying, and meaningless, mime types warning
// are stored in an AWSCredentials object: Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
mimeTypeLogger.setLevel(Level.FATAL);
// IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket // Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials
String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user // are stored in an AWSCredentials object:
String awsSecretKey = "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"; // GATK AWS user
AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey);
// To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP // IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket
// implementation based on HttpClient, as this is the most robust implementation provided with JetS3t. String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user
S3Service s3Service = new RestS3Service(awsCredentials); String awsSecretKey = "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"; // GATK AWS user
AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey);
// Create an S3Object based on a file, with Content-Length set automatically and // To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP
// Content-Type set based on the file's extension (using the Mimetypes utility class) // implementation based on HttpClient, as this is the most robust implementation provided with JetS3t.
S3Object fileObject = new S3Object(localFile); S3Service s3Service = new RestS3Service(awsCredentials);
//logger.info("Created S3Object" + fileObject);
//logger.info("Uploading " + localFile + " to AWS bucket"); // Create an S3Object based on a file, with Content-Length set automatically and
S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject); // Content-Type set based on the file's extension (using the Mimetypes utility class)
logger.debug("Uploaded to AWS: " + s3Object); S3Object fileObject = new S3Object(key, report);
logger.info("Uploaded run statistics report to AWS S3"); //logger.info("Created S3Object" + fileObject);
} catch ( S3ServiceException e ) { //logger.info("Uploading " + localFile + " to AWS bucket");
exceptDuringRunReport("S3 exception occurred", e); S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
} catch ( NoSuchAlgorithmException e ) { logger.debug("Uploaded to AWS: " + s3Object);
exceptDuringRunReport("Couldn't calculate MD5", e); logger.info("Uploaded run statistics report to AWS S3");
} catch ( IOException e ) { } catch ( S3ServiceException e ) {
exceptDuringRunReport("Couldn't read report file", e); exceptDuringRunReport("S3 exception occurred", e);
} } catch ( NoSuchAlgorithmException e ) {
exceptDuringRunReport("Couldn't calculate MD5", e);
} catch ( IOException e ) {
exceptDuringRunReport("Couldn't read report file", e);
} }
} }