Merge branch 'master' of ssh://copper.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Menachem Fromer 2012-01-29 23:39:34 -05:00
commit d1aa5204d7
4 changed files with 79 additions and 79 deletions

View File

@ -72,7 +72,7 @@
<dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>
<!-- Dependencies for amazon.com S3 support -->
<dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.0"/>
<dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.1"/>
<!-- Dependencies for GridEngine -->
<dependency org="net.sf.gridscheduler" name="drmaa" rev="latest.integration"/>

View File

@ -264,22 +264,8 @@ public class GATKRunReport {
}
}
/**
* Opens the destination file and writes a gzipped version of the XML report there.
*
* @param destination
* @throws IOException
*/
private void postReportToFile(File destination) throws IOException {
BufferedOutputStream out =
new BufferedOutputStream(
new GZIPOutputStream(
new FileOutputStream(destination)));
try {
postReportToStream(out);
} finally {
out.close();
}
private final String getKey() {
return getID() + ".report.xml.gz";
}
/**
@ -288,16 +274,21 @@ public class GATKRunReport {
* That is, postReport() is guarenteed not to fail for any reason.
*/
private File postReportToLocalDisk(File rootDir) {
String filename = getID() + ".report.xml.gz";
File file = new File(rootDir, filename);
final String filename = getKey();
final File destination = new File(rootDir, filename);
try {
postReportToFile(file);
logger.debug("Wrote report to " + file);
return file;
final BufferedOutputStream out = new BufferedOutputStream(
new GZIPOutputStream(
new FileOutputStream(destination)));
postReportToStream(out);
out.close();
logger.debug("Wrote report to " + destination);
return destination;
} catch ( Exception e ) {
// we catch everything, and no matter what eat the error
exceptDuringRunReport("Couldn't read report file", e);
file.delete();
destination.delete();
return null;
}
}
@ -305,42 +296,46 @@ public class GATKRunReport {
private void postReportToAWSS3() {
// modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html
this.hostName = Utils.resolveHostname(); // we want to fill in the host name
File localFile = postReportToLocalDisk(new File("./"));
logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
if ( localFile != null ) { // we succeeded in creating the local file
localFile.deleteOnExit();
try {
// stop us from printing the annoying, and meaningless, mime types warning
Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
mimeTypeLogger.setLevel(Level.FATAL);
final String key = getKey();
logger.debug("Generating GATK report to AWS S3 with key " + key);
try {
// create an byte output stream so we can capture the output as a byte[]
final ByteArrayOutputStream byteStream = new ByteArrayOutputStream(8096);
final OutputStream outputStream = new GZIPOutputStream(byteStream);
postReportToStream(outputStream);
outputStream.close();
final byte[] report = byteStream.toByteArray();
// Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials
// are stored in an AWSCredentials object:
// stop us from printing the annoying, and meaningless, mime types warning
Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
mimeTypeLogger.setLevel(Level.FATAL);
// IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket
String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user
String awsSecretKey = "uQLTduhK6Gy8mbOycpoZIxr8ZoVj1SQaglTWjpYA"; // GATK AWS user
AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey);
// Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials
// are stored in an AWSCredentials object:
// To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP
// implementation based on HttpClient, as this is the most robust implementation provided with JetS3t.
S3Service s3Service = new RestS3Service(awsCredentials);
// IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket
String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user
String awsSecretKey = "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"; // GATK AWS user
AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey);
// Create an S3Object based on a file, with Content-Length set automatically and
// Content-Type set based on the file's extension (using the Mimetypes utility class)
S3Object fileObject = new S3Object(localFile);
//logger.info("Created S3Object" + fileObject);
//logger.info("Uploading " + localFile + " to AWS bucket");
S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
logger.debug("Uploaded to AWS: " + s3Object);
logger.info("Uploaded run statistics report to AWS S3");
} catch ( S3ServiceException e ) {
exceptDuringRunReport("S3 exception occurred", e);
} catch ( NoSuchAlgorithmException e ) {
exceptDuringRunReport("Couldn't calculate MD5", e);
} catch ( IOException e ) {
exceptDuringRunReport("Couldn't read report file", e);
}
// To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP
// implementation based on HttpClient, as this is the most robust implementation provided with JetS3t.
S3Service s3Service = new RestS3Service(awsCredentials);
// Create an S3Object based on a file, with Content-Length set automatically and
// Content-Type set based on the file's extension (using the Mimetypes utility class)
S3Object fileObject = new S3Object(key, report);
//logger.info("Created S3Object" + fileObject);
//logger.info("Uploading " + localFile + " to AWS bucket");
S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
logger.debug("Uploaded to AWS: " + s3Object);
logger.info("Uploaded run statistics report to AWS S3");
} catch ( S3ServiceException e ) {
exceptDuringRunReport("S3 exception occurred", e);
} catch ( NoSuchAlgorithmException e ) {
exceptDuringRunReport("Couldn't calculate MD5", e);
} catch ( IOException e ) {
exceptDuringRunReport("Couldn't read report file", e);
}
}

View File

@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -63,25 +62,26 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
referenceOrderedDataView = (RodLocusView)locusView;
// We keep processing while the next reference location is within the interval
GenomeLoc prevLoc = null;
while( locusView.hasNext() ) {
final AlignmentContext locus = locusView.next();
GenomeLoc location = locus.getLocation();
if(prevLoc != null) {
for(int iii = prevLoc.getStart() + 1; iii < location.getStart(); iii++ ) {
final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
if( initialIntervals.overlaps( fakeLoc ) ) {
final double isActiveProb = ( walker.presetActiveRegions == null ? walker.isActive( null, null, null )
: ( walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ) );
isActiveList.add( isActiveProb );
if( firstIsActiveStart == null ) {
firstIsActiveStart = fakeLoc;
}
}
}
}
dataProvider.getShard().getReadMetrics().incrementNumIterations();
if ( locus.hasExtendedEventPileup() ) {
// if the alignment context we received holds an "extended" pileup (i.e. pileup of insertions/deletions
// associated with the current site), we need to update the location. The updated location still starts
// at the current genomic position, but it has to span the length of the longest deletion (if any).
location = engine.getGenomeLocParser().setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength());
// it is possible that the new expanded location spans the current shard boundary; the next method ensures
// that when it is the case, the reference sequence held by the ReferenceView will be reloaded so that
// the view has all the bases we are gonna need. If the location fits within the current view bounds,
// the next call will not do anything to the view:
referenceView.expandBoundsToAccomodateLoc(location);
}
// create reference context. Note that if we have a pileup of "extended events", the context will
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
final ReferenceContext refContext = referenceView.getReferenceContext(location);
@ -95,7 +95,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
: ( walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0 ) );
isActiveList.add( isActiveProb );
if( firstIsActiveStart == null ) {
firstIsActiveStart = locus.getLocation();
firstIsActiveStart = location;
}
}
@ -118,6 +118,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
if( read.getAlignmentStart() < minStart ) { minStart = read.getAlignmentStart(); }
}
}
prevLoc = location;
printProgress(dataProvider.getShard(), locus.getLocation());
}
@ -230,7 +231,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
final int MAX_ACTIVE_REGION = 200;
for( int iii = 0; iii < activeProbArray.length; iii++ ) {
double maxVal = 0;
for( int jjj = Math.max( 0, iii-FILTER_SIZE); jjj < Math.min( activeList.size(), iii+FILTER_SIZE); jjj++ ) {
for( int jjj = Math.max(0, iii-FILTER_SIZE); jjj < Math.min(activeList.size(), iii+FILTER_SIZE); jjj++ ) {
if( activeProbArray[jjj] > maxVal ) { maxVal = activeProbArray[jjj]; }
}
filteredProbArray[iii] = maxVal;
@ -241,14 +242,18 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
for(int iii = 1; iii < filteredProbArray.length; iii++ ) {
final boolean thisStatus = filteredProbArray[iii] > ACTIVE_PROB_THRESHOLD;
if( curStatus != thisStatus || (iii-curStart) > MAX_ACTIVE_REGION ) {
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (iii-1)),
returnList.add( new ActiveRegion(
engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (iii-1)),
curStatus, engine.getGenomeLocParser(), activeRegionExtension ) );
curStatus = thisStatus;
curStart = iii;
}
}
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (filteredProbArray.length-1)),
curStatus, engine.getGenomeLocParser(), activeRegionExtension ) );
if( curStart != filteredProbArray.length-1 ) {
returnList.add( new ActiveRegion(
engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (filteredProbArray.length-1)),
curStatus, engine.getGenomeLocParser(), activeRegionExtension ) );
}
return returnList;
}
}

View File

@ -17,7 +17,7 @@ public class QualityUtils {
private static double qualToErrorProbCache[] = new double[256];
static {
for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw((byte)i);
for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw(i);
}
/**
@ -44,15 +44,15 @@ public class QualityUtils {
* Convert a quality score to a probability of error. This is the Phred-style
* conversion, *not* the Illumina-style conversion (though asymptotically, they're the same).
*
* @param qual a quality score (0-40)
* @return a probability (0.0-1.0)
* @param qual a quality score (0 - 255)
* @return a probability (0.0 - 1.0)
*/
static public double qualToErrorProbRaw(byte qual) {
static private double qualToErrorProbRaw(int qual) {
return Math.pow(10.0, ((double) qual)/-10.0);
}
static public double qualToErrorProb(byte qual) {
return qualToErrorProbCache[qual];
return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
}
/**