Scripts to create the GATK IAM user and give him/her rights to PutObject (and only PutObject) into the S3 storage instance. Updated the GATKRunReport to now upload using the GATK user, not mark@depristo.com. Running with -et AWS_S3 sends run reports up to the Amazon S3 cloud now. Going to request a few external users try this option so we can see it running at scale. I'm sure S3 can handle a few hundred thousand 1Kb uploads per days, though

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5132 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2011-01-31 03:48:33 +00:00
parent e26da9b047
commit b5d1aab8dc
6 changed files with 99 additions and 49 deletions

View File

@ -0,0 +1,12 @@
{
"Statement": [
{
"Sid": "Stmt1296439478068",
"Action": [
"s3:PutObject"
],
"Effect": "Allow",
"Resource": "arn:aws:s3:::GATK_Run_Reports/*"
}
]
}

View File

@ -0,0 +1,2 @@
AKIAJXU7VIHBPDW4TDSQ
uQLTduhK6Gy8mbOycpoZIxr8ZoVj1SQaglTWjpYA

View File

@ -0,0 +1,8 @@
{
"Statement":[{
"Effect":"Allow",
"Action":"*",
"Resource":"*"
}
]
}

View File

@ -0,0 +1,45 @@
#!/bin/tcsh
# download CLI tools
# http://aws.amazon.com/developertools/AWS-Identity-and-Access-Management/4143
setenv JAVA_HOME /usr/
setenv AWS_IAM_HOME ~/Downloads/IAMCli-1.1.0
setenv PATH $AWS_IAM_HOME/bin:$PATH
setenv AWS_CREDENTIAL_FILE /Users/depristo/Desktop/broadLocal/GATK/trunk/account-key
setenv CREATE_GROUPS false
setenv CREATE_GATK_USER false
setenv UPDATE_USER_KEYS false
setenv UPDATE_USER_POLICY true
# Create the administrators group:
# we aren't actually using this, in fact
if ( $CREATE_GROUPS == true ) then
iam-groupcreate -g Admins
iam-grouplistbypath
iam-groupuploadpolicy -g Admins -p AdminsGroupPolicy -f GroupPolicy.txt
iam-grouplistpolicies -g Admins
endif
# Create the GATK user -- uncomment if the GATK user needs to be created from scratch
# update the secret key
if $CREATE_GATK_USER == true then
iam-usercreate -u GATK -k -v > GATK_cred.txt
endif
# the user access and secret keys are in the GATK source file GATKRunReport.java
# and must be updated to be the most current ones
if $UPDATE_USER_KEYS == true then
iam-userdelkey -u GATK -k $1 # $1 -> current access key
iam-useraddkey -u GATK > GATK_cred.txt
cat GATK_cred.txt
endif
echo "GATK user policies"
if $UPDATE_USER_POLICY == true then
echo "Deleting policy"
iam-userdelpolicy -u GATK -p GATKRunReportUploading
iam-useruploadpolicy -u GATK -p GATKRunReportUploading -f GATKPolicy.txt
endif
iam-userlistpolicies -u GATK -v

View File

@ -86,11 +86,6 @@ public class GATKArgumentCollection {
@Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? Standard is the default, can be verbose or NO_ET so nothing is posted to the run repository", required = false) @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? Standard is the default, can be verbose or NO_ET so nothing is posted to the run repository", required = false)
public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD; public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD;
@Element(required = false)
@Argument(fullName = "S3SecretKey", shortName = "s3sk", doc="Secret key to be used for AWS S3 interactions", required = false)
public String S3SecretKey = null;
@ElementList(required = false) @ElementList(required = false)
@Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually.", required = false) @Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually.", required = false)
public List<String> readFilters = new ArrayList<String>(); public List<String> readFilters = new ArrayList<String>();
@ -400,10 +395,6 @@ public class GATKArgumentCollection {
if (other.phoneHomeType != this.phoneHomeType) { if (other.phoneHomeType != this.phoneHomeType) {
return false; return false;
} }
if ((other.S3SecretKey == null && this.S3SecretKey != null) ||
(other.S3SecretKey != null && !other.S3SecretKey.equals(this.S3SecretKey))) {
return false;
}
if (BTIMergeRule != other.BTIMergeRule) if (BTIMergeRule != other.BTIMergeRule)
return false; return false;

View File

@ -159,8 +159,6 @@ public class GATKRunReport {
// todo md5 all filenames // todo md5 all filenames
// todo size of filenames // todo size of filenames
private String S3SecretKey = null;
public enum PhoneHomeOption { public enum PhoneHomeOption {
NO_ET, NO_ET,
STANDARD, STANDARD,
@ -228,8 +226,6 @@ public class GATKRunReport {
// if there was an exception, capture it // if there was an exception, capture it
this.mException = e == null ? null : new ExceptionToXML(e); this.mException = e == null ? null : new ExceptionToXML(e);
this.S3SecretKey = engine.getArguments().S3SecretKey;
} }
public String getID() { public String getID() {
@ -331,51 +327,47 @@ public class GATKRunReport {
private void postReportToAWSS3() { private void postReportToAWSS3() {
// modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html // modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html
if ( S3SecretKey == null ) this.hostName = resolveHostname(); // we want to fill in the host name
exceptDuringRunReport("Cannot upload run reports to AWS S3 without providing a secret key on the command line"); File localFile = postReportToLocalDisk(new File("./"));
else { logger.info("Generating GATK report to AWS S3 based on local file " + localFile);
this.hostName = resolveHostname(); // we want to fill in the host name if ( localFile != null ) {
File localFile = postReportToLocalDisk(new File("./")); try {
logger.info("Generating GATK report to AWS S3 based on local file " + localFile); // we succeeded in creating the local file
if ( localFile != null ) {
try {
// we succeeded in creating the local file
// Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials // Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials
// are stored in an AWSCredentials object: // are stored in an AWSCredentials object:
String awsAccessKey = "AKIAJQQEIHTAHSM333EQ";
AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, S3SecretKey);
// To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP // IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket
// implementation based on HttpClient, as this is the most robust implementation provided with JetS3t. String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user
S3Service s3Service = new RestS3Service(awsCredentials); String awsSecretKey = "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"; // GATK AWS user
AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey);
// grab the reports bucket // To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP
S3Bucket reportsBucket = s3Service.getBucket(REPORT_BUCKET_NAME); // implementation based on HttpClient, as this is the most robust implementation provided with JetS3t.
logger.info("Uploading to bucket: " + reportsBucket); S3Service s3Service = new RestS3Service(awsCredentials);
// Create an S3Object based on a file, with Content-Length set automatically and // Create an S3Object based on a file, with Content-Length set automatically and
// Content-Type set based on the file's extension (using the Mimetypes utility class) // Content-Type set based on the file's extension (using the Mimetypes utility class)
S3Object fileObject = new S3Object(localFile); S3Object fileObject = new S3Object(localFile);
logger.info("Created S3Object" + fileObject); logger.info("Created S3Object" + fileObject);
logger.info("Uploading " + localFile + " to AWS bucket"); logger.info("Uploading " + localFile + " to AWS bucket");
s3Service.putObject(reportsBucket, fileObject); S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
//logger.info("Done. File hash value: " + fileObject.getMd5HashAsHex()); logger.info("Uploaded: " + s3Object);
} catch ( S3ServiceException e ) { } catch ( S3ServiceException e ) {
exceptDuringRunReport("S3 exception occurred", e); exceptDuringRunReport("S3 exception occurred", e);
} catch ( NoSuchAlgorithmException e ) { } catch ( NoSuchAlgorithmException e ) {
exceptDuringRunReport("Couldn't calculate MD5", e); exceptDuringRunReport("Couldn't calculate MD5", e);
} catch ( IOException e ) { } catch ( IOException e ) {
exceptDuringRunReport("Couldn't read report file", e); exceptDuringRunReport("Couldn't read report file", e);
} finally { } finally {
localFile.delete(); localFile.delete();
}
} }
} }
} }
private void exceptDuringRunReport(String msg, Throwable e) { private void exceptDuringRunReport(String msg, Throwable e) {
logger.warn("An occurred during GATK run reporting [everything is fine, but no report could be generated]. Message is: " + msg + ". Error message is: " + e.getMessage() + ". Stack track follows" + e.getStackTrace()); logger.warn("An occurred during GATK run reporting [everything is fine, but no report could be generated]. Message is: " + msg + ". Error message is: " + e.getMessage() + ". Stack track follows");
e.printStackTrace();
} }
private void exceptDuringRunReport(String msg) { private void exceptDuringRunReport(String msg) {