Scripts to create the GATK IAM user and give him/her rights to PutObject (and only PutObject) into the S3 storage instance. Updated the GATKRunReport to now upload using the GATK user, not mark@depristo.com. Running with -et AWS_S3 sends run reports up to the Amazon S3 cloud now. Going to request a few external users try this option so we can see it running at scale. I'm sure S3 can handle a few hundred thousand 1Kb uploads per days, though

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5132 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2011-01-31 03:48:33 +00:00
parent e26da9b047
commit b5d1aab8dc
6 changed files with 99 additions and 49 deletions

View File

@ -0,0 +1,12 @@
{
"Statement": [
{
"Sid": "Stmt1296439478068",
"Action": [
"s3:PutObject"
],
"Effect": "Allow",
"Resource": "arn:aws:s3:::GATK_Run_Reports/*"
}
]
}

View File

@ -0,0 +1,2 @@
AKIAJXU7VIHBPDW4TDSQ
uQLTduhK6Gy8mbOycpoZIxr8ZoVj1SQaglTWjpYA

View File

@ -0,0 +1,8 @@
{
"Statement":[{
"Effect":"Allow",
"Action":"*",
"Resource":"*"
}
]
}

View File

@ -0,0 +1,45 @@
#!/bin/tcsh
# download CLI tools
# http://aws.amazon.com/developertools/AWS-Identity-and-Access-Management/4143
setenv JAVA_HOME /usr/
setenv AWS_IAM_HOME ~/Downloads/IAMCli-1.1.0
setenv PATH $AWS_IAM_HOME/bin:$PATH
setenv AWS_CREDENTIAL_FILE /Users/depristo/Desktop/broadLocal/GATK/trunk/account-key
setenv CREATE_GROUPS false
setenv CREATE_GATK_USER false
setenv UPDATE_USER_KEYS false
setenv UPDATE_USER_POLICY true
# Create the administrators group:
# we aren't actually using this, in fact
if ( $CREATE_GROUPS == true ) then
iam-groupcreate -g Admins
iam-grouplistbypath
iam-groupuploadpolicy -g Admins -p AdminsGroupPolicy -f GroupPolicy.txt
iam-grouplistpolicies -g Admins
endif
# Create the GATK user -- uncomment if the GATK user needs to be created from scratch
# update the secret key
if $CREATE_GATK_USER == true then
iam-usercreate -u GATK -k -v > GATK_cred.txt
endif
# the user access and secret keys are in the GATK source file GATKRunReport.java
# and must be updated to be the most current ones
if $UPDATE_USER_KEYS == true then
iam-userdelkey -u GATK -k $1 # $1 -> current access key
iam-useraddkey -u GATK > GATK_cred.txt
cat GATK_cred.txt
endif
echo "GATK user policies"
if $UPDATE_USER_POLICY == true then
echo "Deleting policy"
iam-userdelpolicy -u GATK -p GATKRunReportUploading
iam-useruploadpolicy -u GATK -p GATKRunReportUploading -f GATKPolicy.txt
endif
iam-userlistpolicies -u GATK -v

View File

@ -86,11 +86,6 @@ public class GATKArgumentCollection {
@Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? Standard is the default, can be verbose or NO_ET so nothing is posted to the run repository", required = false)
public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD;
@Element(required = false)
@Argument(fullName = "S3SecretKey", shortName = "s3sk", doc="Secret key to be used for AWS S3 interactions", required = false)
public String S3SecretKey = null;
@ElementList(required = false)
@Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually.", required = false)
public List<String> readFilters = new ArrayList<String>();
@ -400,10 +395,6 @@ public class GATKArgumentCollection {
if (other.phoneHomeType != this.phoneHomeType) {
return false;
}
if ((other.S3SecretKey == null && this.S3SecretKey != null) ||
(other.S3SecretKey != null && !other.S3SecretKey.equals(this.S3SecretKey))) {
return false;
}
if (BTIMergeRule != other.BTIMergeRule)
return false;

View File

@ -159,8 +159,6 @@ public class GATKRunReport {
// todo md5 all filenames
// todo size of filenames
private String S3SecretKey = null;
public enum PhoneHomeOption {
NO_ET,
STANDARD,
@ -228,8 +226,6 @@ public class GATKRunReport {
// if there was an exception, capture it
this.mException = e == null ? null : new ExceptionToXML(e);
this.S3SecretKey = engine.getArguments().S3SecretKey;
}
public String getID() {
@ -331,51 +327,47 @@ public class GATKRunReport {
private void postReportToAWSS3() {
// modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html
if ( S3SecretKey == null )
exceptDuringRunReport("Cannot upload run reports to AWS S3 without providing a secret key on the command line");
else {
this.hostName = resolveHostname(); // we want to fill in the host name
File localFile = postReportToLocalDisk(new File("./"));
logger.info("Generating GATK report to AWS S3 based on local file " + localFile);
if ( localFile != null ) {
try {
// we succeeded in creating the local file
this.hostName = resolveHostname(); // we want to fill in the host name
File localFile = postReportToLocalDisk(new File("./"));
logger.info("Generating GATK report to AWS S3 based on local file " + localFile);
if ( localFile != null ) {
try {
// we succeeded in creating the local file
// Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials
// are stored in an AWSCredentials object:
String awsAccessKey = "AKIAJQQEIHTAHSM333EQ";
AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, S3SecretKey);
// Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials
// are stored in an AWSCredentials object:
// To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP
// implementation based on HttpClient, as this is the most robust implementation provided with JetS3t.
S3Service s3Service = new RestS3Service(awsCredentials);
// IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket
String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user
String awsSecretKey = "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"; // GATK AWS user
AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey);
// grab the reports bucket
S3Bucket reportsBucket = s3Service.getBucket(REPORT_BUCKET_NAME);
logger.info("Uploading to bucket: " + reportsBucket);
// To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP
// implementation based on HttpClient, as this is the most robust implementation provided with JetS3t.
S3Service s3Service = new RestS3Service(awsCredentials);
// Create an S3Object based on a file, with Content-Length set automatically and
// Content-Type set based on the file's extension (using the Mimetypes utility class)
S3Object fileObject = new S3Object(localFile);
logger.info("Created S3Object" + fileObject);
logger.info("Uploading " + localFile + " to AWS bucket");
s3Service.putObject(reportsBucket, fileObject);
//logger.info("Done. File hash value: " + fileObject.getMd5HashAsHex());
} catch ( S3ServiceException e ) {
exceptDuringRunReport("S3 exception occurred", e);
} catch ( NoSuchAlgorithmException e ) {
exceptDuringRunReport("Couldn't calculate MD5", e);
} catch ( IOException e ) {
exceptDuringRunReport("Couldn't read report file", e);
} finally {
localFile.delete();
}
// Create an S3Object based on a file, with Content-Length set automatically and
// Content-Type set based on the file's extension (using the Mimetypes utility class)
S3Object fileObject = new S3Object(localFile);
logger.info("Created S3Object" + fileObject);
logger.info("Uploading " + localFile + " to AWS bucket");
S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
logger.info("Uploaded: " + s3Object);
} catch ( S3ServiceException e ) {
exceptDuringRunReport("S3 exception occurred", e);
} catch ( NoSuchAlgorithmException e ) {
exceptDuringRunReport("Couldn't calculate MD5", e);
} catch ( IOException e ) {
exceptDuringRunReport("Couldn't read report file", e);
} finally {
localFile.delete();
}
}
}
private void exceptDuringRunReport(String msg, Throwable e) {
logger.warn("An occurred during GATK run reporting [everything is fine, but no report could be generated]. Message is: " + msg + ". Error message is: " + e.getMessage() + ". Stack track follows" + e.getStackTrace());
logger.warn("An occurred during GATK run reporting [everything is fine, but no report could be generated]. Message is: " + msg + ". Error message is: " + e.getMessage() + ". Stack track follows");
e.printStackTrace();
}
private void exceptDuringRunReport(String msg) {