Merge branch 'master' of ssh://chartl@ni.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
faba3dd530
6
ivy.xml
6
ivy.xml
|
|
@ -41,7 +41,7 @@
|
|||
<dependency org="log4j" name="log4j" rev="1.2.15"/>
|
||||
<dependency org="javax.mail" name="mail" rev="1.4.4"/>
|
||||
<dependency org="colt" name="colt" rev="1.2.0"/>
|
||||
<dependency org="jboss" name="javassist" rev="3.7.ga"/>
|
||||
<!-- <dependency org="jboss" name="javassist" rev="3.7.ga"/> -->
|
||||
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
|
||||
<dependency org="org.apache.bcel" name="bcel" rev="5.2"/>
|
||||
|
||||
|
|
@ -66,13 +66,13 @@
|
|||
<dependency org="org.apache.commons" name="commons-math" rev="2.2" />
|
||||
|
||||
<!-- Lucene core utilities -->
|
||||
<dependency org="org.apache.lucene" name="lucene-core" rev="3.0.3"/>
|
||||
<!-- <dependency org="org.apache.lucene" name="lucene-core" rev="3.0.3"/> -->
|
||||
|
||||
<!-- Dependencies for LSF, DRMAA, and other C libraries -->
|
||||
<dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>
|
||||
|
||||
<!-- Dependencies for amazon.com S3 support -->
|
||||
<dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.0"/>
|
||||
<dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.1"/>
|
||||
|
||||
<!-- Dependencies for GridEngine -->
|
||||
<dependency org="net.sf.gridscheduler" name="drmaa" rev="latest.integration"/>
|
||||
|
|
|
|||
|
|
@ -74,16 +74,16 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
|
||||
static private class SAMRecordState {
|
||||
SAMRecord read;
|
||||
int readOffset = -1; // how far are we offset from the start of the read bases?
|
||||
int genomeOffset = -1; // how far are we offset from the alignment start on the genome?
|
||||
int readOffset = -1; // how far are we offset from the start of the read bases?
|
||||
int genomeOffset = -1; // how far are we offset from the alignment start on the genome?
|
||||
|
||||
Cigar cigar = null;
|
||||
int cigarOffset = -1;
|
||||
CigarElement curElement = null;
|
||||
int nCigarElements = 0;
|
||||
|
||||
// how far are we into a single cigarElement
|
||||
int cigarElementCounter = -1;
|
||||
|
||||
int cigarElementCounter = -1; // how far are we into a single cigarElement
|
||||
|
||||
// The logical model for generating extended events is as follows: the "record state" implements the traversal
|
||||
// along the reference; thus stepForwardOnGenome() returns on every and only on actual reference bases. This
|
||||
|
|
@ -93,19 +93,19 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
// stepForwardOnGenome(). The next call to stepForwardOnGenome() will clear that memory (as we remember only extended
|
||||
// events immediately preceding the current reference base).
|
||||
|
||||
boolean generateExtendedEvents = true; // should we generate an additional, special pile for indels between the ref bases?
|
||||
// the only purpose of this flag is to shield away a few additional lines of code
|
||||
// when extended piles are not needed, it may not be even worth it...
|
||||
boolean generateExtendedEvents = true; // should we generate an additional, special pile for indels between the ref bases?
|
||||
// the only purpose of this flag is to shield away a few additional lines of code
|
||||
// when extended piles are not needed, it may not be even worth it...
|
||||
|
||||
byte[] insertedBases = null; // remember full inserted sequence if we are generating piles of extended events (indels)
|
||||
int eventLength = -1; // will be set to the length of insertion/deletion if we are generating piles of extended events
|
||||
byte eventDelayedFlag = 0; // will be set to non-0 if there was an event (indel) right before the
|
||||
// current base on the ref. We use a counter-like variable here since clearing the indel event is
|
||||
// delayed by one base, so we need to remember how long ago we have seen the actual event
|
||||
byte[] insertedBases = null; // remember full inserted sequence if we are generating piles of extended events (indels)
|
||||
int eventLength = -1; // will be set to the length of insertion/deletion if we are generating piles of extended events
|
||||
byte eventDelayedFlag = 0; // will be set to non-0 if there was an event (indel) right before the
|
||||
// current base on the ref. We use a counter-like variable here since clearing the indel event is
|
||||
// delayed by one base, so we need to remember how long ago we have seen the actual event
|
||||
|
||||
int eventStart = -1; // where on the read the extended event starts (i.e. the last position on the read prior to the
|
||||
// event, or -1 if alignment starts with an insertion); this one is easy to recompute on the fly,
|
||||
// we cache it here mainly for convenience
|
||||
int eventStart = -1; // where on the read the extended event starts (i.e. the last position on the read prior to the
|
||||
// event, or -1 if alignment starts with an insertion); this one is easy to recompute on the fly,
|
||||
// we cache it here mainly for convenience
|
||||
|
||||
|
||||
public SAMRecordState(SAMRecord read, boolean extended) {
|
||||
|
|
@ -176,6 +176,10 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
return String.format("%s ro=%d go=%d co=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarOffset, cigarElementCounter, curElement);
|
||||
}
|
||||
|
||||
public CigarOperator peekForwardOnGenome() {
|
||||
return ( cigarElementCounter + 1 > curElement.getLength() && cigarOffset + 1 < nCigarElements ? cigar.getCigarElement(cigarOffset + 1) : curElement ).getOperator();
|
||||
}
|
||||
|
||||
public CigarOperator stepForwardOnGenome() {
|
||||
// we enter this method with readOffset = index of the last processed base on the read
|
||||
// (-1 if we did not process a single base yet); this can be last matching base, or last base of an insertion
|
||||
|
|
@ -237,6 +241,8 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
readOffset += curElement.getLength();
|
||||
break;
|
||||
case D: // deletion w.r.t. the reference
|
||||
if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string
|
||||
throw new UserException.MalformedBAM(read, "Read starting with deletion. Cigar: " + read.getCigarString());
|
||||
if (generateExtendedEvents) {
|
||||
if (cigarElementCounter == 1) {
|
||||
// generate an extended event only if we just stepped into the deletion (i.e. don't
|
||||
|
|
@ -399,9 +405,9 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
final int eventStartOffset = state.getReadEventStartOffset(); // this will be -1 if base is not a deletion, or if base is the first deletion in the event. Otherwise, it will give the last base before the deletion began.
|
||||
final int eventLength = state.getEventLength();
|
||||
|
||||
// if (op != CigarOperator.N) // N's are never added to any pileup
|
||||
// continue;
|
||||
//
|
||||
if (op == CigarOperator.N) // N's are never added to any pileup
|
||||
continue;
|
||||
|
||||
if (state.hadIndel()) { // this read has an indel associated with the previous position on the ref
|
||||
size++;
|
||||
ExtendedEventPileupElement pileupElement;
|
||||
|
|
@ -409,27 +415,26 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
nDeletions++;
|
||||
maxDeletionLength = Math.max(maxDeletionLength, state.getEventLength());
|
||||
pileupElement = new ExtendedEventPileupElement(read, eventStartOffset, eventLength);
|
||||
}
|
||||
}
|
||||
else { // Insertion event
|
||||
nInsertions++;
|
||||
pileupElement = new ExtendedEventPileupElement(read, eventStartOffset, eventLength, state.getEventBases());
|
||||
}
|
||||
if (read.getMappingQuality() == 0)
|
||||
nMQ0Reads++;
|
||||
|
||||
indelPile.add(pileupElement);
|
||||
}
|
||||
|
||||
// this read has no indel associated with the previous position on the ref. Criteria to include in the pileup are:
|
||||
// we only add reads that are not N's
|
||||
// we only include deletions to the pileup if the walker requests it
|
||||
else if ( (op != CigarOperator.N) && (op != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci())) {
|
||||
// this read has no indel so add it to the pileup as a NOEVENT:
|
||||
// a deletion that didn't start here (therefore, not an extended event)
|
||||
// we add (mis)matches as no events.
|
||||
else if (op != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci()) {
|
||||
size++;
|
||||
indelPile.add(new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), readOffset));
|
||||
if (read.getMappingQuality() == 0)
|
||||
nMQ0Reads++;
|
||||
}
|
||||
|
||||
|
||||
if (state.getRead().getMappingQuality() == 0)
|
||||
nMQ0Reads++;
|
||||
|
||||
}
|
||||
|
||||
if (indelPile.size() != 0)
|
||||
|
|
@ -455,26 +460,27 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
final SAMRecordState state = iterator.next(); // state object with the read/offset information
|
||||
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
|
||||
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
|
||||
final CigarOperator nextOp = state.peekForwardOnGenome(); // next cigar operator
|
||||
final int readOffset = state.getReadOffset(); // the base offset on this read
|
||||
final int eventStartOffset = state.getReadEventStartOffset(); // this will be -1 if base is not a deletion, or if base is the first deletion in the event. Otherwise, it will give the last base before the deletion began.
|
||||
|
||||
if (op == CigarOperator.N) // N's are never added to any pileup
|
||||
continue;
|
||||
|
||||
if (read.getMappingQuality() == 0)
|
||||
nMQ0Reads++;
|
||||
|
||||
if (op == CigarOperator.D) {
|
||||
if (readInfo.includeReadsWithDeletionAtLoci()) { // only add deletions to the pileup if we are authorized to do so
|
||||
int leftAlignedStart = (eventStartOffset < 0) ? readOffset : eventStartOffset;
|
||||
pile.add(new PileupElement(read, leftAlignedStart, true));
|
||||
pile.add(new PileupElement(read, readOffset, true, nextOp == CigarOperator.I, false));
|
||||
size++;
|
||||
nDeletions++;
|
||||
if (read.getMappingQuality() == 0)
|
||||
nMQ0Reads++;
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
if (!filterBaseInRead(read, location.getStart())) {
|
||||
pile.add(new PileupElement(read, readOffset, false));
|
||||
pile.add(new PileupElement(read, readOffset, false, nextOp == CigarOperator.I, op == CigarOperator.S));
|
||||
size++;
|
||||
if (read.getMappingQuality() == 0)
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -264,22 +264,8 @@ public class GATKRunReport {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens the destination file and writes a gzipped version of the XML report there.
|
||||
*
|
||||
* @param destination
|
||||
* @throws IOException
|
||||
*/
|
||||
private void postReportToFile(File destination) throws IOException {
|
||||
BufferedOutputStream out =
|
||||
new BufferedOutputStream(
|
||||
new GZIPOutputStream(
|
||||
new FileOutputStream(destination)));
|
||||
try {
|
||||
postReportToStream(out);
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
private final String getKey() {
|
||||
return getID() + ".report.xml.gz";
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -288,16 +274,21 @@ public class GATKRunReport {
|
|||
* That is, postReport() is guarenteed not to fail for any reason.
|
||||
*/
|
||||
private File postReportToLocalDisk(File rootDir) {
|
||||
String filename = getID() + ".report.xml.gz";
|
||||
File file = new File(rootDir, filename);
|
||||
final String filename = getKey();
|
||||
final File destination = new File(rootDir, filename);
|
||||
|
||||
try {
|
||||
postReportToFile(file);
|
||||
logger.debug("Wrote report to " + file);
|
||||
return file;
|
||||
final BufferedOutputStream out = new BufferedOutputStream(
|
||||
new GZIPOutputStream(
|
||||
new FileOutputStream(destination)));
|
||||
postReportToStream(out);
|
||||
out.close();
|
||||
logger.debug("Wrote report to " + destination);
|
||||
return destination;
|
||||
} catch ( Exception e ) {
|
||||
// we catch everything, and no matter what eat the error
|
||||
exceptDuringRunReport("Couldn't read report file", e);
|
||||
file.delete();
|
||||
destination.delete();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -305,42 +296,46 @@ public class GATKRunReport {
|
|||
private void postReportToAWSS3() {
|
||||
// modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html
|
||||
this.hostName = Utils.resolveHostname(); // we want to fill in the host name
|
||||
File localFile = postReportToLocalDisk(new File("./"));
|
||||
logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
|
||||
if ( localFile != null ) { // we succeeded in creating the local file
|
||||
localFile.deleteOnExit();
|
||||
try {
|
||||
// stop us from printing the annoying, and meaningless, mime types warning
|
||||
Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
|
||||
mimeTypeLogger.setLevel(Level.FATAL);
|
||||
final String key = getKey();
|
||||
logger.debug("Generating GATK report to AWS S3 with key " + key);
|
||||
try {
|
||||
// create an byte output stream so we can capture the output as a byte[]
|
||||
final ByteArrayOutputStream byteStream = new ByteArrayOutputStream(8096);
|
||||
final OutputStream outputStream = new GZIPOutputStream(byteStream);
|
||||
postReportToStream(outputStream);
|
||||
outputStream.close();
|
||||
final byte[] report = byteStream.toByteArray();
|
||||
|
||||
// Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials
|
||||
// are stored in an AWSCredentials object:
|
||||
// stop us from printing the annoying, and meaningless, mime types warning
|
||||
Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
|
||||
mimeTypeLogger.setLevel(Level.FATAL);
|
||||
|
||||
// IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket
|
||||
String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user
|
||||
String awsSecretKey = "uQLTduhK6Gy8mbOycpoZIxr8ZoVj1SQaglTWjpYA"; // GATK AWS user
|
||||
AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey);
|
||||
// Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials
|
||||
// are stored in an AWSCredentials object:
|
||||
|
||||
// To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP
|
||||
// implementation based on HttpClient, as this is the most robust implementation provided with JetS3t.
|
||||
S3Service s3Service = new RestS3Service(awsCredentials);
|
||||
// IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket
|
||||
String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user
|
||||
String awsSecretKey = "uQLTduhK6Gy8mbOycpoZIxr8ZoVj1SQaglTWjpYA"; // GATK AWS user
|
||||
AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey);
|
||||
|
||||
// Create an S3Object based on a file, with Content-Length set automatically and
|
||||
// Content-Type set based on the file's extension (using the Mimetypes utility class)
|
||||
S3Object fileObject = new S3Object(localFile);
|
||||
//logger.info("Created S3Object" + fileObject);
|
||||
//logger.info("Uploading " + localFile + " to AWS bucket");
|
||||
S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
|
||||
logger.debug("Uploaded to AWS: " + s3Object);
|
||||
logger.info("Uploaded run statistics report to AWS S3");
|
||||
} catch ( S3ServiceException e ) {
|
||||
exceptDuringRunReport("S3 exception occurred", e);
|
||||
} catch ( NoSuchAlgorithmException e ) {
|
||||
exceptDuringRunReport("Couldn't calculate MD5", e);
|
||||
} catch ( IOException e ) {
|
||||
exceptDuringRunReport("Couldn't read report file", e);
|
||||
}
|
||||
// To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP
|
||||
// implementation based on HttpClient, as this is the most robust implementation provided with JetS3t.
|
||||
S3Service s3Service = new RestS3Service(awsCredentials);
|
||||
|
||||
// Create an S3Object based on a file, with Content-Length set automatically and
|
||||
// Content-Type set based on the file's extension (using the Mimetypes utility class)
|
||||
S3Object fileObject = new S3Object(key, report);
|
||||
//logger.info("Created S3Object" + fileObject);
|
||||
//logger.info("Uploading " + localFile + " to AWS bucket");
|
||||
S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
|
||||
logger.debug("Uploaded to AWS: " + s3Object);
|
||||
logger.info("Uploaded run statistics report to AWS S3");
|
||||
} catch ( S3ServiceException e ) {
|
||||
exceptDuringRunReport("S3 exception occurred", e);
|
||||
} catch ( NoSuchAlgorithmException e ) {
|
||||
exceptDuringRunReport("Couldn't calculate MD5", e);
|
||||
} catch ( IOException e ) {
|
||||
exceptDuringRunReport("Couldn't read report file", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -14,10 +14,7 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
|||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Queue;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -54,7 +51,8 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
|
||||
|
||||
int minStart = Integer.MAX_VALUE;
|
||||
final ArrayList<ActiveRegion> isActiveList = new ArrayList<ActiveRegion>();
|
||||
final ArrayList<Double> isActiveList = new ArrayList<Double>();
|
||||
GenomeLoc firstIsActiveStart = null;
|
||||
|
||||
//ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
||||
ReferenceOrderedView referenceOrderedDataView = null;
|
||||
|
|
@ -64,25 +62,26 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
referenceOrderedDataView = (RodLocusView)locusView;
|
||||
|
||||
// We keep processing while the next reference location is within the interval
|
||||
GenomeLoc prevLoc = null;
|
||||
while( locusView.hasNext() ) {
|
||||
final AlignmentContext locus = locusView.next();
|
||||
GenomeLoc location = locus.getLocation();
|
||||
if(prevLoc != null) {
|
||||
for(int iii = prevLoc.getStart() + 1; iii < location.getStart(); iii++ ) {
|
||||
final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
|
||||
if( initialIntervals.overlaps( fakeLoc ) ) {
|
||||
final double isActiveProb = ( walker.presetActiveRegions == null ? walker.isActive( null, null, null )
|
||||
: ( walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ) );
|
||||
isActiveList.add( isActiveProb );
|
||||
if( firstIsActiveStart == null ) {
|
||||
firstIsActiveStart = fakeLoc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
||||
if ( locus.hasExtendedEventPileup() ) {
|
||||
// if the alignment context we received holds an "extended" pileup (i.e. pileup of insertions/deletions
|
||||
// associated with the current site), we need to update the location. The updated location still starts
|
||||
// at the current genomic position, but it has to span the length of the longest deletion (if any).
|
||||
location = engine.getGenomeLocParser().setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength());
|
||||
|
||||
// it is possible that the new expanded location spans the current shard boundary; the next method ensures
|
||||
// that when it is the case, the reference sequence held by the ReferenceView will be reloaded so that
|
||||
// the view has all the bases we are gonna need. If the location fits within the current view bounds,
|
||||
// the next call will not do anything to the view:
|
||||
referenceView.expandBoundsToAccomodateLoc(location);
|
||||
}
|
||||
|
||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
|
@ -91,11 +90,15 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
||||
|
||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||
if( initialIntervals.overlaps(location) ) {
|
||||
final boolean isActive = ( walker.presetActiveRegions == null ? walker.isActive( tracker, refContext, locus ) : walker.presetActiveRegions.overlaps(location) );
|
||||
isActiveList.add( new ActiveRegion(location, isActive, engine.getGenomeLocParser(), activeRegionExtension ) );
|
||||
if( initialIntervals.overlaps( location ) ) {
|
||||
final double isActiveProb = ( walker.presetActiveRegions == null ? walker.isActive( tracker, refContext, locus )
|
||||
: ( walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0 ) );
|
||||
isActiveList.add( isActiveProb );
|
||||
if( firstIsActiveStart == null ) {
|
||||
firstIsActiveStart = location;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Grab all the previously unseen reads from this pileup and add them to the massive read list
|
||||
for( final PileupElement p : locus.getBasePileup() ) {
|
||||
final SAMRecord read = p.getRead();
|
||||
|
|
@ -104,15 +107,9 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
}
|
||||
}
|
||||
|
||||
// If this is the last pileup for this shard then need to first do a special walker.isActive() call
|
||||
// and then calculate the minimum alignment start so that we know which active regions in the work queue are now safe to process
|
||||
// If this is the last pileup for this shard calculate the minimum alignment start so that we know
|
||||
// which active regions in the work queue are now safe to process
|
||||
if( !locusView.hasNext() ) {
|
||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||
if( initialIntervals.overlaps(location) ) {
|
||||
final boolean isActive = ( walker.presetActiveRegions == null ? walker.isActive( tracker, refContext, locus ) : walker.presetActiveRegions.overlaps(location) );
|
||||
isActiveList.add( new ActiveRegion(location, isActive, engine.getGenomeLocParser(), activeRegionExtension ) );
|
||||
}
|
||||
|
||||
for( final PileupElement p : locus.getBasePileup() ) {
|
||||
final SAMRecord read = p.getRead();
|
||||
if( !myReads.contains(read) ) {
|
||||
|
|
@ -121,12 +118,13 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
if( read.getAlignmentStart() < minStart ) { minStart = read.getAlignmentStart(); }
|
||||
}
|
||||
}
|
||||
printProgress(dataProvider.getShard(),locus.getLocation());
|
||||
prevLoc = location;
|
||||
printProgress(dataProvider.getShard(), locus.getLocation());
|
||||
}
|
||||
|
||||
// Take the individual isActive calls and integrate them into contiguous active regions and
|
||||
// add these blocks of work to the work queue
|
||||
final ArrayList<ActiveRegion> activeRegions = integrateActiveList( isActiveList );
|
||||
final ArrayList<ActiveRegion> activeRegions = integrateActiveList( isActiveList, firstIsActiveStart, activeRegionExtension );
|
||||
logger.debug("Integrated " + isActiveList.size() + " isActive calls into " + activeRegions.size() + " regions." );
|
||||
if( walker.activeRegionOutStream == null ) {
|
||||
workQueue.addAll( activeRegions );
|
||||
|
|
@ -137,14 +135,11 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Since we've sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
|
||||
if( !workQueue.isEmpty() ) {
|
||||
while( workQueue.peek().getExtendedLoc().getStop() < minStart || !workQueue.peek().getExtendedLoc().getContig().equals(dataProvider.getLocus().getContig()) ) {
|
||||
final ActiveRegion activeRegion = workQueue.remove();
|
||||
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
|
||||
}
|
||||
while( workQueue.peek() != null && (workQueue.peek().getExtendedLoc().getStop() < minStart || !workQueue.peek().getExtendedLoc().getContig().equals(dataProvider.getLocus().getContig())) ) {
|
||||
final ActiveRegion activeRegion = workQueue.remove();
|
||||
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -184,7 +179,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
}
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
|
||||
activeRegion.add( (GATKSAMRecord) read );
|
||||
otherRegionToTest.add( (GATKSAMRecord) read );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -218,30 +213,46 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
||||
}
|
||||
|
||||
// integrate active regions into contiguous chunks with identical active status
|
||||
private ArrayList<ActiveRegion> integrateActiveList( final ArrayList<ActiveRegion> activeList ) {
|
||||
// band-pass filter the list of isActive probabilities and turn into active regions
|
||||
private ArrayList<ActiveRegion> integrateActiveList( final ArrayList<Double> activeList, final GenomeLoc firstIsActiveStart, final int activeRegionExtension ) {
|
||||
|
||||
final double ACTIVE_PROB_THRESHOLD = 0.2;
|
||||
final ArrayList<ActiveRegion> returnList = new ArrayList<ActiveRegion>();
|
||||
if( activeList.size() == 0 ) {
|
||||
return returnList;
|
||||
} else if( activeList.size() == 1 ) {
|
||||
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(activeList.get(0).getLocation().getContig(), activeList.get(0).getLocation().getStart(), activeList.get(0).getLocation().getStart()),
|
||||
activeList.get(0).isActive, engine.getGenomeLocParser(), activeList.get(0).getExtension() ) );
|
||||
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart(), firstIsActiveStart.getStart()),
|
||||
activeList.get(0) > ACTIVE_PROB_THRESHOLD, engine.getGenomeLocParser(), activeRegionExtension ) );
|
||||
return returnList;
|
||||
} else {
|
||||
ActiveRegion prevLocus = activeList.get(0);
|
||||
ActiveRegion startLocus = prevLocus;
|
||||
for( final ActiveRegion thisLocus : activeList ) {
|
||||
if( prevLocus.isActive != thisLocus.isActive || !prevLocus.getLocation().contiguousP( thisLocus.getLocation() ) ) {
|
||||
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(startLocus.getLocation().getContig(), startLocus.getLocation().getStart(), prevLocus.getLocation().getStart()),
|
||||
prevLocus.isActive, engine.getGenomeLocParser(), startLocus.getExtension() ) );
|
||||
startLocus = thisLocus;
|
||||
final Double[] activeProbArray = activeList.toArray(new Double[activeList.size()]);
|
||||
final double[] filteredProbArray = new double[activeProbArray.length];
|
||||
final int FILTER_SIZE = 10;
|
||||
final int MAX_ACTIVE_REGION = 200;
|
||||
for( int iii = 0; iii < activeProbArray.length; iii++ ) {
|
||||
double maxVal = 0;
|
||||
for( int jjj = Math.max(0, iii-FILTER_SIZE); jjj < Math.min(activeList.size(), iii+FILTER_SIZE); jjj++ ) {
|
||||
if( activeProbArray[jjj] > maxVal ) { maxVal = activeProbArray[jjj]; }
|
||||
}
|
||||
prevLocus = thisLocus;
|
||||
filteredProbArray[iii] = maxVal;
|
||||
}
|
||||
// output the last region if necessary
|
||||
if( startLocus != prevLocus ) {
|
||||
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(startLocus.getLocation().getContig(), startLocus.getLocation().getStart(), prevLocus.getLocation().getStart()),
|
||||
prevLocus.isActive, engine.getGenomeLocParser(), startLocus.getExtension() ) );
|
||||
|
||||
boolean curStatus = filteredProbArray[0] > ACTIVE_PROB_THRESHOLD;
|
||||
int curStart = 0;
|
||||
for(int iii = 1; iii < filteredProbArray.length; iii++ ) {
|
||||
final boolean thisStatus = filteredProbArray[iii] > ACTIVE_PROB_THRESHOLD;
|
||||
if( curStatus != thisStatus || (iii-curStart) > MAX_ACTIVE_REGION ) {
|
||||
returnList.add( new ActiveRegion(
|
||||
engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (iii-1)),
|
||||
curStatus, engine.getGenomeLocParser(), activeRegionExtension ) );
|
||||
curStatus = thisStatus;
|
||||
curStart = iii;
|
||||
}
|
||||
}
|
||||
if( curStart != filteredProbArray.length-1 ) {
|
||||
returnList.add( new ActiveRegion(
|
||||
engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (filteredProbArray.length-1)),
|
||||
curStatus, engine.getGenomeLocParser(), activeRegionExtension ) );
|
||||
}
|
||||
return returnList;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -73,8 +73,8 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
|||
return false;
|
||||
}
|
||||
|
||||
// Determine active status over the AlignmentContext
|
||||
public abstract boolean isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
||||
// Determine probability of active status over the AlignmentContext
|
||||
public abstract double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
||||
|
||||
// Map over the ActiveRegion
|
||||
public abstract MapType map(final ActiveRegion activeRegion, final ReadMetaDataTracker metaDataTracker);
|
||||
|
|
|
|||
|
|
@ -212,7 +212,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
|
||||
public class BAQedPileupElement extends PileupElement {
|
||||
public BAQedPileupElement( final PileupElement PE ) {
|
||||
super(PE.getRead(), PE.getOffset(), PE.isDeletion());
|
||||
super(PE.getRead(), PE.getOffset(), PE.isDeletion(), PE.isBeforeInsertion(), PE.isSoftClipped());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -169,9 +169,11 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
private VariantAnnotatorEngine annotationEngine;
|
||||
|
||||
// enable deletions in the pileup
|
||||
@Override
|
||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||
|
||||
// enable extended events for indels
|
||||
@Override
|
||||
public boolean generateExtendedEvents() {
|
||||
return (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ public class QCRefWalker extends RefWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
private final void throwError(ReferenceContext ref, String message) {
|
||||
throw new StingException(String.format("Site %s failed: %s", ref, message));
|
||||
throw new StingException(String.format("Site %s failed: %s", ref.getLocus(), message));
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
|
@ -92,13 +92,13 @@ public class QCRefWalker extends RefWalker<Integer, Integer> {
|
|||
contigName = locusContigName;
|
||||
ReferenceSequence refSeq = uncachedRef.getSequence(contigName);
|
||||
contigStart = 1;
|
||||
contigEnd = contigStart + refSeq.length();
|
||||
contigEnd = contigStart + refSeq.length() - 1;
|
||||
uncachedBases = uncachedRef.getSubsequenceAt(contigName, contigStart, contigEnd).getBases();
|
||||
logger.warn(String.format("Loading contig %s (%d-%d)", contigName, contigStart, contigEnd));
|
||||
logger.info(String.format("Loading contig %s (%d-%d)", contigName, contigStart, contigEnd));
|
||||
}
|
||||
|
||||
final byte refBase = ref.getBase();
|
||||
if (! ( BaseUtils.isRegularBase(refBase) || BaseUtils.isNBase(refBase) ) )
|
||||
if (! ( BaseUtils.isRegularBase(refBase) || isExtendFastaBase(refBase) ) )
|
||||
throwError(ref, String.format("Refbase isn't a regular base (%d %c)", refBase, (char)refBase));
|
||||
|
||||
// check bases are equal
|
||||
|
|
@ -114,6 +114,28 @@ public class QCRefWalker extends RefWalker<Integer, Integer> {
|
|||
return 1;
|
||||
}
|
||||
|
||||
private static final boolean isExtendFastaBase(final byte b) {
|
||||
switch ( b ) {
|
||||
case 'U':
|
||||
case 'R':
|
||||
case 'Y':
|
||||
case 'K':
|
||||
case 'M':
|
||||
case 'S':
|
||||
case 'W':
|
||||
case 'B':
|
||||
case 'D':
|
||||
case 'H':
|
||||
case 'V':
|
||||
case 'N':
|
||||
case 'X':
|
||||
case '-':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -159,7 +159,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
}
|
||||
}
|
||||
else {
|
||||
throw new IllegalStateException("This method hasn't been implemented yet for " + read.getReadGroup().getPlatform());
|
||||
throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,22 +25,20 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
@ -557,7 +555,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
// Look for this sample in the all vcs of the comp ROD track.
|
||||
boolean foundVariant = false;
|
||||
for (VariantContext compVC : compVCs) {
|
||||
if (sampleHasVariant(compVC.getGenotype(g.getSampleName()))) {
|
||||
if (haveSameGenotypes(g, compVC.getGenotype(g.getSampleName()))) {
|
||||
foundVariant = true;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ public class QualityUtils {
|
|||
|
||||
private static double qualToErrorProbCache[] = new double[256];
|
||||
static {
|
||||
for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw((byte)i);
|
||||
for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw(i);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -44,15 +44,15 @@ public class QualityUtils {
|
|||
* Convert a quality score to a probability of error. This is the Phred-style
|
||||
* conversion, *not* the Illumina-style conversion (though asymptotically, they're the same).
|
||||
*
|
||||
* @param qual a quality score (0-40)
|
||||
* @return a probability (0.0-1.0)
|
||||
* @param qual a quality score (0 - 255)
|
||||
* @return a probability (0.0 - 1.0)
|
||||
*/
|
||||
static public double qualToErrorProbRaw(byte qual) {
|
||||
static private double qualToErrorProbRaw(int qual) {
|
||||
return Math.pow(10.0, ((double) qual)/-10.0);
|
||||
}
|
||||
|
||||
static public double qualToErrorProb(byte qual) {
|
||||
return qualToErrorProbCache[qual];
|
||||
return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -177,7 +177,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
for (int i = 0; i < reads.size(); i++) {
|
||||
GATKSAMRecord read = reads.get(i);
|
||||
int offset = offsets.get(i);
|
||||
pileup.add(createNewPileupElement(read, offset, BaseUtils.simpleBaseToBaseIndex(read.getReadBases()[offset]) == BaseUtils.D));
|
||||
pileup.add(createNewPileupElement(read, offset, false, false, false)); // only used to create fake pileups for testing so ancillary information is not important
|
||||
}
|
||||
|
||||
return pileup;
|
||||
|
|
@ -196,7 +196,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
|
||||
UnifiedPileupElementTracker<PE> pileup = new UnifiedPileupElementTracker<PE>();
|
||||
for (GATKSAMRecord read : reads) {
|
||||
pileup.add(createNewPileupElement(read, offset, BaseUtils.simpleBaseToBaseIndex(read.getReadBases()[offset]) == BaseUtils.D));
|
||||
pileup.add(createNewPileupElement(read, offset, false, false, false)); // only used to create fake pileups for testing so ancillary information is not important
|
||||
}
|
||||
|
||||
return pileup;
|
||||
|
|
@ -204,7 +204,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
|
||||
protected abstract AbstractReadBackedPileup<RBP, PE> createNewPileup(GenomeLoc loc, PileupElementTracker<PE> pileupElementTracker);
|
||||
|
||||
protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion);
|
||||
protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeInsertion, boolean isSoftClipped);
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
|
|
|
|||
|
|
@ -31,6 +31,8 @@ import java.util.Arrays;
|
|||
* Time: 2:57:55 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
// Extended events are slated for removal
|
||||
public class ExtendedEventPileupElement extends PileupElement {
|
||||
public enum Type {
|
||||
NOEVENT, DELETION, INSERTION
|
||||
|
|
@ -46,7 +48,7 @@ public class ExtendedEventPileupElement extends PileupElement {
|
|||
|
||||
|
||||
public ExtendedEventPileupElement(GATKSAMRecord read, int offset, int eventLength, String eventBases, Type type) {
|
||||
super(read, offset, type == Type.DELETION);
|
||||
super(read, offset, type == Type.DELETION, false, false); // extended events are slated for removal
|
||||
this.read = read;
|
||||
this.offset = offset;
|
||||
this.eventLength = eventLength;
|
||||
|
|
|
|||
|
|
@ -23,47 +23,46 @@ public class PileupElement implements Comparable<PileupElement> {
|
|||
protected final GATKSAMRecord read;
|
||||
protected final int offset;
|
||||
protected final boolean isDeletion;
|
||||
protected final boolean isBeforeInsertion;
|
||||
protected final boolean isSoftClipped;
|
||||
|
||||
|
||||
/**
|
||||
* Creates a new pileup element.
|
||||
*
|
||||
* @param read the read we are adding to the pileup
|
||||
* @param offset the position in the read for this base. All deletions must be left aligned! (-1 is only allowed for reads starting with insertions)
|
||||
* @param isDeletion whether or not this base is a deletion
|
||||
* @param read the read we are adding to the pileup
|
||||
* @param offset the position in the read for this base. All deletions must be left aligned! (-1 is only allowed for reads starting with insertions)
|
||||
* @param isDeletion whether or not this base is a deletion
|
||||
* @param isBeforeInsertion whether or not this base is before an insertion
|
||||
* @param isSoftClipped whether or not this base was softclipped
|
||||
*/
|
||||
@Requires({
|
||||
"read != null",
|
||||
"offset >= -1",
|
||||
"offset <= read.getReadLength()"})
|
||||
public PileupElement(GATKSAMRecord read, int offset, boolean isDeletion) {
|
||||
public PileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeInsertion, final boolean isSoftClipped) {
|
||||
if (offset < 0 && isDeletion)
|
||||
throw new ReviewedStingException("Pileup Element cannot create a deletion with a negative offset");
|
||||
|
||||
this.read = read;
|
||||
this.offset = offset;
|
||||
this.isDeletion = isDeletion;
|
||||
this.isBeforeInsertion = isBeforeInsertion;
|
||||
this.isSoftClipped = isSoftClipped;
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Creates a NON DELETION pileup element.
|
||||
// *
|
||||
// * use this constructor only for insertions and matches/mismatches.
|
||||
// * @param read the read we are adding to the pileup
|
||||
// * @param offset the position in the read for this base. All deletions must be left aligned! (-1 is only allowed for reads starting with insertions)
|
||||
// */
|
||||
// @Requires({
|
||||
// "read != null",
|
||||
// "offset >= -1",
|
||||
// "offset <= read.getReadLength()"})
|
||||
// public PileupElement( GATKSAMRecord read, int offset ) {
|
||||
// this(read, offset, false);
|
||||
// }
|
||||
//
|
||||
public boolean isDeletion() {
|
||||
return isDeletion;
|
||||
}
|
||||
|
||||
public boolean isBeforeInsertion() {
|
||||
return isBeforeInsertion;
|
||||
}
|
||||
|
||||
public boolean isSoftClipped() {
|
||||
return isSoftClipped;
|
||||
}
|
||||
|
||||
public boolean isInsertionAtBeginningOfRead() {
|
||||
return offset == -1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
|
|||
}
|
||||
|
||||
@Override
|
||||
protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion) {
|
||||
protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeInsertion, boolean isSoftClipped) {
|
||||
throw new UnsupportedOperationException("Not enough information provided to create a new pileup element");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPil
|
|||
}
|
||||
|
||||
@Override
|
||||
protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion) {
|
||||
return new PileupElement(read, offset, isDeletion);
|
||||
protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeInsertion, boolean isSoftClipped) {
|
||||
return new PileupElement(read, offset, isDeletion, isBeforeInsertion, isSoftClipped);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -361,10 +361,10 @@ public class ArtificialSAMUtils {
|
|||
final GATKSAMRecord left = pair.get(0);
|
||||
final GATKSAMRecord right = pair.get(1);
|
||||
|
||||
pileupElements.add(new PileupElement(left, pos - leftStart, false));
|
||||
pileupElements.add(new PileupElement(left, pos - leftStart, false, false, false));
|
||||
|
||||
if (pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd()) {
|
||||
pileupElements.add(new PileupElement(right, pos - rightStart, false));
|
||||
pileupElements.add(new PileupElement(right, pos - rightStart, false, false, false));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -43,7 +43,10 @@ import java.util.Map;
|
|||
*
|
||||
*/
|
||||
public class GATKSAMRecord extends BAMRecord {
|
||||
public static final String REDUCED_READ_CONSENSUS_TAG = "RR";
|
||||
// ReduceReads specific attribute tags
|
||||
public static final String REDUCED_READ_CONSENSUS_TAG = "RR"; // marks a synthetic read produced by the ReduceReads tool
|
||||
public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT = "OP"; // reads that are clipped may use this attribute to keep track of their original alignment start
|
||||
public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT = "OE"; // reads that are clipped may use this attribute to keep track of their original alignment end
|
||||
|
||||
// the SAMRecord data we're caching
|
||||
private String mReadString = null;
|
||||
|
|
@ -321,6 +324,36 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the original alignment start of a previously clipped read.
|
||||
*
|
||||
* This is useful for reads that have been trimmed to a variant region and lost the information of it's original alignment end
|
||||
*
|
||||
* @return the alignment start of a read before it was clipped
|
||||
*/
|
||||
public int getOriginalAlignmentStart() {
|
||||
int originalAlignmentStart = getUnclippedStart();
|
||||
Integer alignmentShift = (Integer) getAttribute(REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT);
|
||||
if (alignmentShift != null)
|
||||
originalAlignmentStart += alignmentShift;
|
||||
return originalAlignmentStart;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the original alignment end of a previously clipped read.
|
||||
*
|
||||
* This is useful for reads that have been trimmed to a variant region and lost the information of it's original alignment end
|
||||
*
|
||||
* @return the alignment end of a read before it was clipped
|
||||
*/
|
||||
public int getOriginalAlignmentEnd() {
|
||||
int originalAlignmentEnd = getUnclippedEnd();
|
||||
Integer alignmentShift = (Integer) getAttribute(REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT);
|
||||
if (alignmentShift != null)
|
||||
originalAlignmentEnd -= alignmentShift;
|
||||
return originalAlignmentEnd;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an empty GATKSAMRecord with the read's header, read group and mate
|
||||
* information, but empty (not-null) fields:
|
||||
|
|
@ -363,4 +396,21 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
return emptyRead;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shallow copy of everything, except for the attribute list and the temporary attributes.
|
||||
* A new list of the attributes is created for both, but the attributes themselves are copied by reference.
|
||||
* This should be safe because callers should never modify a mutable value returned by any of the get() methods anyway.
|
||||
*
|
||||
* @return a shallow copy of the GATKSAMRecord
|
||||
* @throws CloneNotSupportedException
|
||||
*/
|
||||
@Override
|
||||
public Object clone() throws CloneNotSupportedException {
|
||||
final GATKSAMRecord clone = (GATKSAMRecord) super.clone();
|
||||
if (temporaryAttributes != null) {
|
||||
for (Object attribute : temporaryAttributes.keySet())
|
||||
clone.setTemporaryAttribute(attribute, temporaryAttributes.get(attribute));
|
||||
}
|
||||
return clone;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s -NO_HEADER",
|
||||
1,
|
||||
Arrays.asList("78e6842325f1f1bc9ab30d5e7737ee6e")
|
||||
Arrays.asList("929bbb96381541c162dc7e5462e26ea2")
|
||||
);
|
||||
|
||||
executeTest("testDiscordance--" + testFile, spec);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,83 @@
|
|||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
||||
public class GATKSAMRecordUnitTest extends BaseTest {
|
||||
GATKSAMRecord read, reducedRead;
|
||||
final static String BASES = "ACTG";
|
||||
final static String QUALS = "!+5?";
|
||||
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40, 1};
|
||||
final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30, -9}; // just the offsets
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
|
||||
read.setReadUnmappedFlag(true);
|
||||
read.setReadBases(new String(BASES).getBytes());
|
||||
read.setBaseQualityString(new String(QUALS));
|
||||
|
||||
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
|
||||
reducedRead.setReadBases(BASES.getBytes());
|
||||
reducedRead.setBaseQualityString(QUALS);
|
||||
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedReads() {
|
||||
Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read");
|
||||
Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read");
|
||||
|
||||
Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read");
|
||||
for (int i = 0; i < reducedRead.getReadLength(); i++) {
|
||||
Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedReadPileupElement() {
|
||||
PileupElement readp = new PileupElement(read, 0, false, false, false);
|
||||
PileupElement reducedreadp = new PileupElement(reducedRead, 0, false, false, false);
|
||||
|
||||
Assert.assertFalse(readp.getRead().isReducedRead());
|
||||
|
||||
Assert.assertTrue(reducedreadp.getRead().isReducedRead());
|
||||
Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]);
|
||||
Assert.assertEquals(reducedreadp.getQual(), readp.getQual());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetOriginalAlignments() {
|
||||
final byte [] bases = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'};
|
||||
final byte [] quals = {20 , 20 , 20 , 20 , 20 , 20 , 20 , 20 };
|
||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "6M");
|
||||
|
||||
// A regular read with all matches
|
||||
Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart());
|
||||
Assert.assertEquals(read.getAlignmentEnd(), read.getOriginalAlignmentEnd());
|
||||
|
||||
// Alignment start shifted
|
||||
int alignmentShift = 2;
|
||||
read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, alignmentShift);
|
||||
Assert.assertEquals(read.getAlignmentStart() + alignmentShift, read.getOriginalAlignmentStart());
|
||||
Assert.assertEquals(read.getAlignmentEnd(), read.getOriginalAlignmentEnd());
|
||||
|
||||
// Both alignments shifted
|
||||
read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT, alignmentShift);
|
||||
Assert.assertEquals(read.getAlignmentStart() + alignmentShift, read.getOriginalAlignmentStart());
|
||||
Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd());
|
||||
|
||||
// Alignment end shifted
|
||||
read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, null);
|
||||
Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart());
|
||||
Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd());
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,57 +1,11 @@
|
|||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
||||
public class ReadUtilsUnitTest extends BaseTest {
|
||||
GATKSAMRecord read, reducedRead;
|
||||
final static String BASES = "ACTG";
|
||||
final static String QUALS = "!+5?";
|
||||
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40, 1};
|
||||
final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30, -9}; // just the offsets
|
||||
|
||||
@BeforeTest
|
||||
public void init() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
|
||||
read.setReadUnmappedFlag(true);
|
||||
read.setReadBases(new String(BASES).getBytes());
|
||||
read.setBaseQualityString(new String(QUALS));
|
||||
|
||||
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
|
||||
reducedRead.setReadBases(BASES.getBytes());
|
||||
reducedRead.setBaseQualityString(QUALS);
|
||||
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedReads() {
|
||||
Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read");
|
||||
Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read");
|
||||
|
||||
Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read");
|
||||
for (int i = 0; i < reducedRead.getReadLength(); i++) {
|
||||
Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedReadPileupElement() {
|
||||
PileupElement readp = new PileupElement(read, 0, false);
|
||||
PileupElement reducedreadp = new PileupElement(reducedRead, 0, false);
|
||||
|
||||
Assert.assertFalse(readp.getRead().isReducedRead());
|
||||
|
||||
Assert.assertTrue(reducedreadp.getRead().isReducedRead());
|
||||
Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]);
|
||||
Assert.assertEquals(reducedreadp.getQual(), readp.getQual());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetAdaptorBoundary() {
|
||||
final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T'};
|
||||
|
|
|
|||
|
|
@ -25,9 +25,6 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
|||
@Argument(shortName="noIndels", doc="do not call indels with the Unified Genotyper", required=false)
|
||||
var noIndels: Boolean = false
|
||||
|
||||
@Argument(shortName="LOCAL_ET", doc="Doesn't use the AWS S3 storage for ET option", required=false)
|
||||
var LOCAL_ET: Boolean = false
|
||||
|
||||
@Argument(shortName="mbq", doc="The minimum Phred-Scaled quality score threshold to be considered a good base.", required=false)
|
||||
var minimumBaseQuality: Int = -1
|
||||
|
||||
|
|
@ -203,7 +200,7 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
|||
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
|
||||
logging_level = "INFO";
|
||||
memoryLimit = 4;
|
||||
phone_home = if ( LOCAL_ET ) GATKRunReport.PhoneHomeOption.STANDARD else GATKRunReport.PhoneHomeOption.AWS_S3
|
||||
phone_home = GATKRunReport.PhoneHomeOption.NO_ET
|
||||
}
|
||||
|
||||
def bai(bam: File) = new File(bam + ".bai")
|
||||
|
|
|
|||
Loading…
Reference in New Issue