From fc08235ff374e3eba8d326c67e9698f9c9280523 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Fri, 27 Jan 2012 15:12:37 -0500 Subject: [PATCH 1/3] Bug fix in active region traversal, locusView.getNext() skips over pileups with zero coverage but still need to count them in the active probability integrator --- .../traversals/TraverseActiveRegions.java | 43 +++++++++++-------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 83daa4d80..562a6d1d0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -63,25 +62,26 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine extends TraversalEngine maxVal ) { maxVal = activeProbArray[jjj]; } } filteredProbArray[iii] = maxVal; @@ -241,14 +242,18 @@ public class TraverseActiveRegions extends TraversalEngine ACTIVE_PROB_THRESHOLD; if( curStatus != thisStatus || (iii-curStart) > MAX_ACTIVE_REGION ) { - returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (iii-1)), + returnList.add( new ActiveRegion( + engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (iii-1)), curStatus, engine.getGenomeLocParser(), activeRegionExtension ) ); curStatus = thisStatus; curStart = iii; } } - returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (filteredProbArray.length-1)), - curStatus, engine.getGenomeLocParser(), activeRegionExtension ) ); + if( curStart != filteredProbArray.length-1 ) { + returnList.add( new ActiveRegion( + engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (filteredProbArray.length-1)), + curStatus, engine.getGenomeLocParser(), activeRegionExtension ) ); + } return returnList; } } From a9671b73ca0fc50d7c2722727e90d731d38037aa Mon Sep 17 00:00:00 2001 From: Menachem Fromer Date: Fri, 27 Jan 2012 16:01:30 -0500 Subject: [PATCH 2/3] Fix to permit proper handling of mapping qualities between 128 to 255 (which get converted to byte values of -128 to -1) --- .../org/broadinstitute/sting/utils/QualityUtils.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java index 19e03a19d..7ec6a74d7 100755 --- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java @@ -17,7 +17,7 @@ public class QualityUtils { private static double qualToErrorProbCache[] = new double[256]; static { - for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw((byte)i); + for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw(i); } /** @@ -44,15 +44,15 @@ public class QualityUtils { * Convert a quality score to a probability of error. This is the Phred-style * conversion, *not* the Illumina-style conversion (though asymptotically, they're the same). * - * @param qual a quality score (0-40) - * @return a probability (0.0-1.0) + * @param qual a quality score (0 - 255) + * @return a probability (0.0 - 1.0) */ - static public double qualToErrorProbRaw(byte qual) { + static private double qualToErrorProbRaw(int qual) { return Math.pow(10.0, ((double) qual)/-10.0); } static public double qualToErrorProb(byte qual) { - return qualToErrorProbCache[qual]; + return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc. } /** From 3164c8dee57cb84a3c60c38f67e196a7fc25038e Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 29 Jan 2012 15:14:58 -0500 Subject: [PATCH 3/3] S3 upload now directly creates the XML report in memory and puts that in S3 -- This is a partial fix for the problem with uploading S3 logs reported by Mauricio. There the problem is that the java.io.tmpdir is not accessible (network just hangs). Because of that the s3 upload fails because the underlying system uses tmpdir for caching, etc. As far as I can tell there's no way around this bug -- you cannot overload the java.io.tmpdir programmatically and even if I could what value would we use? The only solution seems to me is to detect that tmpdir is hanging (how?!) and fail with a meaningful error. --- ivy.xml | 2 +- .../sting/gatk/phonehome/GATKRunReport.java | 103 +++++++++--------- 2 files changed, 50 insertions(+), 55 deletions(-) diff --git a/ivy.xml b/ivy.xml index f7c64aec6..06296c6b4 100644 --- a/ivy.xml +++ b/ivy.xml @@ -72,7 +72,7 @@ - + diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index f09865537..e8627ef4c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -264,22 +264,8 @@ public class GATKRunReport { } } - /** - * Opens the destination file and writes a gzipped version of the XML report there. - * - * @param destination - * @throws IOException - */ - private void postReportToFile(File destination) throws IOException { - BufferedOutputStream out = - new BufferedOutputStream( - new GZIPOutputStream( - new FileOutputStream(destination))); - try { - postReportToStream(out); - } finally { - out.close(); - } + private final String getKey() { + return getID() + ".report.xml.gz"; } /** @@ -288,16 +274,21 @@ public class GATKRunReport { * That is, postReport() is guarenteed not to fail for any reason. */ private File postReportToLocalDisk(File rootDir) { - String filename = getID() + ".report.xml.gz"; - File file = new File(rootDir, filename); + final String filename = getKey(); + final File destination = new File(rootDir, filename); + try { - postReportToFile(file); - logger.debug("Wrote report to " + file); - return file; + final BufferedOutputStream out = new BufferedOutputStream( + new GZIPOutputStream( + new FileOutputStream(destination))); + postReportToStream(out); + out.close(); + logger.debug("Wrote report to " + destination); + return destination; } catch ( Exception e ) { // we catch everything, and no matter what eat the error exceptDuringRunReport("Couldn't read report file", e); - file.delete(); + destination.delete(); return null; } } @@ -305,42 +296,46 @@ public class GATKRunReport { private void postReportToAWSS3() { // modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html this.hostName = Utils.resolveHostname(); // we want to fill in the host name - File localFile = postReportToLocalDisk(new File("./")); - logger.debug("Generating GATK report to AWS S3 based on local file " + localFile); - if ( localFile != null ) { // we succeeded in creating the local file - localFile.deleteOnExit(); - try { - // stop us from printing the annoying, and meaningless, mime types warning - Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class); - mimeTypeLogger.setLevel(Level.FATAL); + final String key = getKey(); + logger.debug("Generating GATK report to AWS S3 with key " + key); + try { + // create an byte output stream so we can capture the output as a byte[] + final ByteArrayOutputStream byteStream = new ByteArrayOutputStream(8096); + final OutputStream outputStream = new GZIPOutputStream(byteStream); + postReportToStream(outputStream); + outputStream.close(); + final byte[] report = byteStream.toByteArray(); - // Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials - // are stored in an AWSCredentials object: + // stop us from printing the annoying, and meaningless, mime types warning + Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class); + mimeTypeLogger.setLevel(Level.FATAL); - // IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket - String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user - String awsSecretKey = "uQLTduhK6Gy8mbOycpoZIxr8ZoVj1SQaglTWjpYA"; // GATK AWS user - AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey); + // Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials + // are stored in an AWSCredentials object: - // To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP - // implementation based on HttpClient, as this is the most robust implementation provided with JetS3t. - S3Service s3Service = new RestS3Service(awsCredentials); + // IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket + String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user + String awsSecretKey = "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"; // GATK AWS user + AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey); - // Create an S3Object based on a file, with Content-Length set automatically and - // Content-Type set based on the file's extension (using the Mimetypes utility class) - S3Object fileObject = new S3Object(localFile); - //logger.info("Created S3Object" + fileObject); - //logger.info("Uploading " + localFile + " to AWS bucket"); - S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject); - logger.debug("Uploaded to AWS: " + s3Object); - logger.info("Uploaded run statistics report to AWS S3"); - } catch ( S3ServiceException e ) { - exceptDuringRunReport("S3 exception occurred", e); - } catch ( NoSuchAlgorithmException e ) { - exceptDuringRunReport("Couldn't calculate MD5", e); - } catch ( IOException e ) { - exceptDuringRunReport("Couldn't read report file", e); - } + // To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP + // implementation based on HttpClient, as this is the most robust implementation provided with JetS3t. + S3Service s3Service = new RestS3Service(awsCredentials); + + // Create an S3Object based on a file, with Content-Length set automatically and + // Content-Type set based on the file's extension (using the Mimetypes utility class) + S3Object fileObject = new S3Object(key, report); + //logger.info("Created S3Object" + fileObject); + //logger.info("Uploading " + localFile + " to AWS bucket"); + S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject); + logger.debug("Uploaded to AWS: " + s3Object); + logger.info("Uploaded run statistics report to AWS S3"); + } catch ( S3ServiceException e ) { + exceptDuringRunReport("S3 exception occurred", e); + } catch ( NoSuchAlgorithmException e ) { + exceptDuringRunReport("Couldn't calculate MD5", e); + } catch ( IOException e ) { + exceptDuringRunReport("Couldn't read report file", e); } }