Merge branch 'master' of ssh://copper.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Menachem Fromer 2012-01-29 23:39:34 -05:00
commit d1aa5204d7
4 changed files with 79 additions and 79 deletions

View File

@ -72,7 +72,7 @@
<dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/> <dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>
<!-- Dependencies for amazon.com S3 support --> <!-- Dependencies for amazon.com S3 support -->
<dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.0"/> <dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.1"/>
<!-- Dependencies for GridEngine --> <!-- Dependencies for GridEngine -->
<dependency org="net.sf.gridscheduler" name="drmaa" rev="latest.integration"/> <dependency org="net.sf.gridscheduler" name="drmaa" rev="latest.integration"/>

View File

@ -264,22 +264,8 @@ public class GATKRunReport {
} }
} }
/** private final String getKey() {
* Opens the destination file and writes a gzipped version of the XML report there. return getID() + ".report.xml.gz";
*
* @param destination
* @throws IOException
*/
private void postReportToFile(File destination) throws IOException {
BufferedOutputStream out =
new BufferedOutputStream(
new GZIPOutputStream(
new FileOutputStream(destination)));
try {
postReportToStream(out);
} finally {
out.close();
}
} }
/** /**
@ -288,16 +274,21 @@ public class GATKRunReport {
* That is, postReport() is guarenteed not to fail for any reason. * That is, postReport() is guarenteed not to fail for any reason.
*/ */
private File postReportToLocalDisk(File rootDir) { private File postReportToLocalDisk(File rootDir) {
String filename = getID() + ".report.xml.gz"; final String filename = getKey();
File file = new File(rootDir, filename); final File destination = new File(rootDir, filename);
try { try {
postReportToFile(file); final BufferedOutputStream out = new BufferedOutputStream(
logger.debug("Wrote report to " + file); new GZIPOutputStream(
return file; new FileOutputStream(destination)));
postReportToStream(out);
out.close();
logger.debug("Wrote report to " + destination);
return destination;
} catch ( Exception e ) { } catch ( Exception e ) {
// we catch everything, and no matter what eat the error // we catch everything, and no matter what eat the error
exceptDuringRunReport("Couldn't read report file", e); exceptDuringRunReport("Couldn't read report file", e);
file.delete(); destination.delete();
return null; return null;
} }
} }
@ -305,42 +296,46 @@ public class GATKRunReport {
private void postReportToAWSS3() { private void postReportToAWSS3() {
// modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html // modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html
this.hostName = Utils.resolveHostname(); // we want to fill in the host name this.hostName = Utils.resolveHostname(); // we want to fill in the host name
File localFile = postReportToLocalDisk(new File("./")); final String key = getKey();
logger.debug("Generating GATK report to AWS S3 based on local file " + localFile); logger.debug("Generating GATK report to AWS S3 with key " + key);
if ( localFile != null ) { // we succeeded in creating the local file try {
localFile.deleteOnExit(); // create an byte output stream so we can capture the output as a byte[]
try { final ByteArrayOutputStream byteStream = new ByteArrayOutputStream(8096);
// stop us from printing the annoying, and meaningless, mime types warning final OutputStream outputStream = new GZIPOutputStream(byteStream);
Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class); postReportToStream(outputStream);
mimeTypeLogger.setLevel(Level.FATAL); outputStream.close();
final byte[] report = byteStream.toByteArray();
// Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials // stop us from printing the annoying, and meaningless, mime types warning
// are stored in an AWSCredentials object: Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
mimeTypeLogger.setLevel(Level.FATAL);
// IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket // Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials
String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user // are stored in an AWSCredentials object:
String awsSecretKey = "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"; // GATK AWS user
AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey);
// To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP // IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket
// implementation based on HttpClient, as this is the most robust implementation provided with JetS3t. String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user
S3Service s3Service = new RestS3Service(awsCredentials); String awsSecretKey = "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"; // GATK AWS user
AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey);
// Create an S3Object based on a file, with Content-Length set automatically and // To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP
// Content-Type set based on the file's extension (using the Mimetypes utility class) // implementation based on HttpClient, as this is the most robust implementation provided with JetS3t.
S3Object fileObject = new S3Object(localFile); S3Service s3Service = new RestS3Service(awsCredentials);
//logger.info("Created S3Object" + fileObject);
//logger.info("Uploading " + localFile + " to AWS bucket"); // Create an S3Object based on a file, with Content-Length set automatically and
S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject); // Content-Type set based on the file's extension (using the Mimetypes utility class)
logger.debug("Uploaded to AWS: " + s3Object); S3Object fileObject = new S3Object(key, report);
logger.info("Uploaded run statistics report to AWS S3"); //logger.info("Created S3Object" + fileObject);
} catch ( S3ServiceException e ) { //logger.info("Uploading " + localFile + " to AWS bucket");
exceptDuringRunReport("S3 exception occurred", e); S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
} catch ( NoSuchAlgorithmException e ) { logger.debug("Uploaded to AWS: " + s3Object);
exceptDuringRunReport("Couldn't calculate MD5", e); logger.info("Uploaded run statistics report to AWS S3");
} catch ( IOException e ) { } catch ( S3ServiceException e ) {
exceptDuringRunReport("Couldn't read report file", e); exceptDuringRunReport("S3 exception occurred", e);
} } catch ( NoSuchAlgorithmException e ) {
exceptDuringRunReport("Couldn't calculate MD5", e);
} catch ( IOException e ) {
exceptDuringRunReport("Couldn't read report file", e);
} }
} }

View File

@ -10,7 +10,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -63,25 +62,26 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
referenceOrderedDataView = (RodLocusView)locusView; referenceOrderedDataView = (RodLocusView)locusView;
// We keep processing while the next reference location is within the interval // We keep processing while the next reference location is within the interval
GenomeLoc prevLoc = null;
while( locusView.hasNext() ) { while( locusView.hasNext() ) {
final AlignmentContext locus = locusView.next(); final AlignmentContext locus = locusView.next();
GenomeLoc location = locus.getLocation(); GenomeLoc location = locus.getLocation();
if(prevLoc != null) {
for(int iii = prevLoc.getStart() + 1; iii < location.getStart(); iii++ ) {
final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
if( initialIntervals.overlaps( fakeLoc ) ) {
final double isActiveProb = ( walker.presetActiveRegions == null ? walker.isActive( null, null, null )
: ( walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ) );
isActiveList.add( isActiveProb );
if( firstIsActiveStart == null ) {
firstIsActiveStart = fakeLoc;
}
}
}
}
dataProvider.getShard().getReadMetrics().incrementNumIterations(); dataProvider.getShard().getReadMetrics().incrementNumIterations();
if ( locus.hasExtendedEventPileup() ) {
// if the alignment context we received holds an "extended" pileup (i.e. pileup of insertions/deletions
// associated with the current site), we need to update the location. The updated location still starts
// at the current genomic position, but it has to span the length of the longest deletion (if any).
location = engine.getGenomeLocParser().setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength());
// it is possible that the new expanded location spans the current shard boundary; the next method ensures
// that when it is the case, the reference sequence held by the ReferenceView will be reloaded so that
// the view has all the bases we are gonna need. If the location fits within the current view bounds,
// the next call will not do anything to the view:
referenceView.expandBoundsToAccomodateLoc(location);
}
// create reference context. Note that if we have a pileup of "extended events", the context will // create reference context. Note that if we have a pileup of "extended events", the context will
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup). // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
final ReferenceContext refContext = referenceView.getReferenceContext(location); final ReferenceContext refContext = referenceView.getReferenceContext(location);
@ -95,7 +95,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
: ( walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0 ) ); : ( walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0 ) );
isActiveList.add( isActiveProb ); isActiveList.add( isActiveProb );
if( firstIsActiveStart == null ) { if( firstIsActiveStart == null ) {
firstIsActiveStart = locus.getLocation(); firstIsActiveStart = location;
} }
} }
@ -118,6 +118,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
if( read.getAlignmentStart() < minStart ) { minStart = read.getAlignmentStart(); } if( read.getAlignmentStart() < minStart ) { minStart = read.getAlignmentStart(); }
} }
} }
prevLoc = location;
printProgress(dataProvider.getShard(), locus.getLocation()); printProgress(dataProvider.getShard(), locus.getLocation());
} }
@ -230,7 +231,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
final int MAX_ACTIVE_REGION = 200; final int MAX_ACTIVE_REGION = 200;
for( int iii = 0; iii < activeProbArray.length; iii++ ) { for( int iii = 0; iii < activeProbArray.length; iii++ ) {
double maxVal = 0; double maxVal = 0;
for( int jjj = Math.max( 0, iii-FILTER_SIZE); jjj < Math.min( activeList.size(), iii+FILTER_SIZE); jjj++ ) { for( int jjj = Math.max(0, iii-FILTER_SIZE); jjj < Math.min(activeList.size(), iii+FILTER_SIZE); jjj++ ) {
if( activeProbArray[jjj] > maxVal ) { maxVal = activeProbArray[jjj]; } if( activeProbArray[jjj] > maxVal ) { maxVal = activeProbArray[jjj]; }
} }
filteredProbArray[iii] = maxVal; filteredProbArray[iii] = maxVal;
@ -241,14 +242,18 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
for(int iii = 1; iii < filteredProbArray.length; iii++ ) { for(int iii = 1; iii < filteredProbArray.length; iii++ ) {
final boolean thisStatus = filteredProbArray[iii] > ACTIVE_PROB_THRESHOLD; final boolean thisStatus = filteredProbArray[iii] > ACTIVE_PROB_THRESHOLD;
if( curStatus != thisStatus || (iii-curStart) > MAX_ACTIVE_REGION ) { if( curStatus != thisStatus || (iii-curStart) > MAX_ACTIVE_REGION ) {
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (iii-1)), returnList.add( new ActiveRegion(
engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (iii-1)),
curStatus, engine.getGenomeLocParser(), activeRegionExtension ) ); curStatus, engine.getGenomeLocParser(), activeRegionExtension ) );
curStatus = thisStatus; curStatus = thisStatus;
curStart = iii; curStart = iii;
} }
} }
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (filteredProbArray.length-1)), if( curStart != filteredProbArray.length-1 ) {
curStatus, engine.getGenomeLocParser(), activeRegionExtension ) ); returnList.add( new ActiveRegion(
engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (filteredProbArray.length-1)),
curStatus, engine.getGenomeLocParser(), activeRegionExtension ) );
}
return returnList; return returnList;
} }
} }

View File

@ -17,7 +17,7 @@ public class QualityUtils {
private static double qualToErrorProbCache[] = new double[256]; private static double qualToErrorProbCache[] = new double[256];
static { static {
for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw((byte)i); for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw(i);
} }
/** /**
@ -44,15 +44,15 @@ public class QualityUtils {
* Convert a quality score to a probability of error. This is the Phred-style * Convert a quality score to a probability of error. This is the Phred-style
* conversion, *not* the Illumina-style conversion (though asymptotically, they're the same). * conversion, *not* the Illumina-style conversion (though asymptotically, they're the same).
* *
* @param qual a quality score (0-40) * @param qual a quality score (0 - 255)
* @return a probability (0.0-1.0) * @return a probability (0.0 - 1.0)
*/ */
static public double qualToErrorProbRaw(byte qual) { static private double qualToErrorProbRaw(int qual) {
return Math.pow(10.0, ((double) qual)/-10.0); return Math.pow(10.0, ((double) qual)/-10.0);
} }
static public double qualToErrorProb(byte qual) { static public double qualToErrorProb(byte qual) {
return qualToErrorProbCache[qual]; return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
} }
/** /**