Merge branch 'master' of ssh://chartl@ni.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
faba3dd530
6
ivy.xml
6
ivy.xml
|
|
@ -41,7 +41,7 @@
|
||||||
<dependency org="log4j" name="log4j" rev="1.2.15"/>
|
<dependency org="log4j" name="log4j" rev="1.2.15"/>
|
||||||
<dependency org="javax.mail" name="mail" rev="1.4.4"/>
|
<dependency org="javax.mail" name="mail" rev="1.4.4"/>
|
||||||
<dependency org="colt" name="colt" rev="1.2.0"/>
|
<dependency org="colt" name="colt" rev="1.2.0"/>
|
||||||
<dependency org="jboss" name="javassist" rev="3.7.ga"/>
|
<!-- <dependency org="jboss" name="javassist" rev="3.7.ga"/> -->
|
||||||
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
|
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
|
||||||
<dependency org="org.apache.bcel" name="bcel" rev="5.2"/>
|
<dependency org="org.apache.bcel" name="bcel" rev="5.2"/>
|
||||||
|
|
||||||
|
|
@ -66,13 +66,13 @@
|
||||||
<dependency org="org.apache.commons" name="commons-math" rev="2.2" />
|
<dependency org="org.apache.commons" name="commons-math" rev="2.2" />
|
||||||
|
|
||||||
<!-- Lucene core utilities -->
|
<!-- Lucene core utilities -->
|
||||||
<dependency org="org.apache.lucene" name="lucene-core" rev="3.0.3"/>
|
<!-- <dependency org="org.apache.lucene" name="lucene-core" rev="3.0.3"/> -->
|
||||||
|
|
||||||
<!-- Dependencies for LSF, DRMAA, and other C libraries -->
|
<!-- Dependencies for LSF, DRMAA, and other C libraries -->
|
||||||
<dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>
|
<dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>
|
||||||
|
|
||||||
<!-- Dependencies for amazon.com S3 support -->
|
<!-- Dependencies for amazon.com S3 support -->
|
||||||
<dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.0"/>
|
<dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.1"/>
|
||||||
|
|
||||||
<!-- Dependencies for GridEngine -->
|
<!-- Dependencies for GridEngine -->
|
||||||
<dependency org="net.sf.gridscheduler" name="drmaa" rev="latest.integration"/>
|
<dependency org="net.sf.gridscheduler" name="drmaa" rev="latest.integration"/>
|
||||||
|
|
|
||||||
|
|
@ -74,16 +74,16 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
|
|
||||||
static private class SAMRecordState {
|
static private class SAMRecordState {
|
||||||
SAMRecord read;
|
SAMRecord read;
|
||||||
int readOffset = -1; // how far are we offset from the start of the read bases?
|
int readOffset = -1; // how far are we offset from the start of the read bases?
|
||||||
int genomeOffset = -1; // how far are we offset from the alignment start on the genome?
|
int genomeOffset = -1; // how far are we offset from the alignment start on the genome?
|
||||||
|
|
||||||
Cigar cigar = null;
|
Cigar cigar = null;
|
||||||
int cigarOffset = -1;
|
int cigarOffset = -1;
|
||||||
CigarElement curElement = null;
|
CigarElement curElement = null;
|
||||||
int nCigarElements = 0;
|
int nCigarElements = 0;
|
||||||
|
|
||||||
// how far are we into a single cigarElement
|
|
||||||
int cigarElementCounter = -1;
|
int cigarElementCounter = -1; // how far are we into a single cigarElement
|
||||||
|
|
||||||
// The logical model for generating extended events is as follows: the "record state" implements the traversal
|
// The logical model for generating extended events is as follows: the "record state" implements the traversal
|
||||||
// along the reference; thus stepForwardOnGenome() returns on every and only on actual reference bases. This
|
// along the reference; thus stepForwardOnGenome() returns on every and only on actual reference bases. This
|
||||||
|
|
@ -93,19 +93,19 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
// stepForwardOnGenome(). The next call to stepForwardOnGenome() will clear that memory (as we remember only extended
|
// stepForwardOnGenome(). The next call to stepForwardOnGenome() will clear that memory (as we remember only extended
|
||||||
// events immediately preceding the current reference base).
|
// events immediately preceding the current reference base).
|
||||||
|
|
||||||
boolean generateExtendedEvents = true; // should we generate an additional, special pile for indels between the ref bases?
|
boolean generateExtendedEvents = true; // should we generate an additional, special pile for indels between the ref bases?
|
||||||
// the only purpose of this flag is to shield away a few additional lines of code
|
// the only purpose of this flag is to shield away a few additional lines of code
|
||||||
// when extended piles are not needed, it may not be even worth it...
|
// when extended piles are not needed, it may not be even worth it...
|
||||||
|
|
||||||
byte[] insertedBases = null; // remember full inserted sequence if we are generating piles of extended events (indels)
|
byte[] insertedBases = null; // remember full inserted sequence if we are generating piles of extended events (indels)
|
||||||
int eventLength = -1; // will be set to the length of insertion/deletion if we are generating piles of extended events
|
int eventLength = -1; // will be set to the length of insertion/deletion if we are generating piles of extended events
|
||||||
byte eventDelayedFlag = 0; // will be set to non-0 if there was an event (indel) right before the
|
byte eventDelayedFlag = 0; // will be set to non-0 if there was an event (indel) right before the
|
||||||
// current base on the ref. We use a counter-like variable here since clearing the indel event is
|
// current base on the ref. We use a counter-like variable here since clearing the indel event is
|
||||||
// delayed by one base, so we need to remember how long ago we have seen the actual event
|
// delayed by one base, so we need to remember how long ago we have seen the actual event
|
||||||
|
|
||||||
int eventStart = -1; // where on the read the extended event starts (i.e. the last position on the read prior to the
|
int eventStart = -1; // where on the read the extended event starts (i.e. the last position on the read prior to the
|
||||||
// event, or -1 if alignment starts with an insertion); this one is easy to recompute on the fly,
|
// event, or -1 if alignment starts with an insertion); this one is easy to recompute on the fly,
|
||||||
// we cache it here mainly for convenience
|
// we cache it here mainly for convenience
|
||||||
|
|
||||||
|
|
||||||
public SAMRecordState(SAMRecord read, boolean extended) {
|
public SAMRecordState(SAMRecord read, boolean extended) {
|
||||||
|
|
@ -176,6 +176,10 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
return String.format("%s ro=%d go=%d co=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarOffset, cigarElementCounter, curElement);
|
return String.format("%s ro=%d go=%d co=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarOffset, cigarElementCounter, curElement);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public CigarOperator peekForwardOnGenome() {
|
||||||
|
return ( cigarElementCounter + 1 > curElement.getLength() && cigarOffset + 1 < nCigarElements ? cigar.getCigarElement(cigarOffset + 1) : curElement ).getOperator();
|
||||||
|
}
|
||||||
|
|
||||||
public CigarOperator stepForwardOnGenome() {
|
public CigarOperator stepForwardOnGenome() {
|
||||||
// we enter this method with readOffset = index of the last processed base on the read
|
// we enter this method with readOffset = index of the last processed base on the read
|
||||||
// (-1 if we did not process a single base yet); this can be last matching base, or last base of an insertion
|
// (-1 if we did not process a single base yet); this can be last matching base, or last base of an insertion
|
||||||
|
|
@ -237,6 +241,8 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
readOffset += curElement.getLength();
|
readOffset += curElement.getLength();
|
||||||
break;
|
break;
|
||||||
case D: // deletion w.r.t. the reference
|
case D: // deletion w.r.t. the reference
|
||||||
|
if (readOffset < 0) // we don't want reads starting with deletion, this is a malformed cigar string
|
||||||
|
throw new UserException.MalformedBAM(read, "Read starting with deletion. Cigar: " + read.getCigarString());
|
||||||
if (generateExtendedEvents) {
|
if (generateExtendedEvents) {
|
||||||
if (cigarElementCounter == 1) {
|
if (cigarElementCounter == 1) {
|
||||||
// generate an extended event only if we just stepped into the deletion (i.e. don't
|
// generate an extended event only if we just stepped into the deletion (i.e. don't
|
||||||
|
|
@ -399,9 +405,9 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
final int eventStartOffset = state.getReadEventStartOffset(); // this will be -1 if base is not a deletion, or if base is the first deletion in the event. Otherwise, it will give the last base before the deletion began.
|
final int eventStartOffset = state.getReadEventStartOffset(); // this will be -1 if base is not a deletion, or if base is the first deletion in the event. Otherwise, it will give the last base before the deletion began.
|
||||||
final int eventLength = state.getEventLength();
|
final int eventLength = state.getEventLength();
|
||||||
|
|
||||||
// if (op != CigarOperator.N) // N's are never added to any pileup
|
if (op == CigarOperator.N) // N's are never added to any pileup
|
||||||
// continue;
|
continue;
|
||||||
//
|
|
||||||
if (state.hadIndel()) { // this read has an indel associated with the previous position on the ref
|
if (state.hadIndel()) { // this read has an indel associated with the previous position on the ref
|
||||||
size++;
|
size++;
|
||||||
ExtendedEventPileupElement pileupElement;
|
ExtendedEventPileupElement pileupElement;
|
||||||
|
|
@ -409,27 +415,26 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
nDeletions++;
|
nDeletions++;
|
||||||
maxDeletionLength = Math.max(maxDeletionLength, state.getEventLength());
|
maxDeletionLength = Math.max(maxDeletionLength, state.getEventLength());
|
||||||
pileupElement = new ExtendedEventPileupElement(read, eventStartOffset, eventLength);
|
pileupElement = new ExtendedEventPileupElement(read, eventStartOffset, eventLength);
|
||||||
}
|
}
|
||||||
else { // Insertion event
|
else { // Insertion event
|
||||||
nInsertions++;
|
nInsertions++;
|
||||||
pileupElement = new ExtendedEventPileupElement(read, eventStartOffset, eventLength, state.getEventBases());
|
pileupElement = new ExtendedEventPileupElement(read, eventStartOffset, eventLength, state.getEventBases());
|
||||||
}
|
}
|
||||||
|
if (read.getMappingQuality() == 0)
|
||||||
|
nMQ0Reads++;
|
||||||
|
|
||||||
indelPile.add(pileupElement);
|
indelPile.add(pileupElement);
|
||||||
}
|
}
|
||||||
|
|
||||||
// this read has no indel associated with the previous position on the ref. Criteria to include in the pileup are:
|
// this read has no indel so add it to the pileup as a NOEVENT:
|
||||||
// we only add reads that are not N's
|
// a deletion that didn't start here (therefore, not an extended event)
|
||||||
// we only include deletions to the pileup if the walker requests it
|
// we add (mis)matches as no events.
|
||||||
else if ( (op != CigarOperator.N) && (op != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci())) {
|
else if (op != CigarOperator.D || readInfo.includeReadsWithDeletionAtLoci()) {
|
||||||
size++;
|
size++;
|
||||||
indelPile.add(new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), readOffset));
|
indelPile.add(new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), readOffset));
|
||||||
|
if (read.getMappingQuality() == 0)
|
||||||
|
nMQ0Reads++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (state.getRead().getMappingQuality() == 0)
|
|
||||||
nMQ0Reads++;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (indelPile.size() != 0)
|
if (indelPile.size() != 0)
|
||||||
|
|
@ -455,26 +460,27 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
final SAMRecordState state = iterator.next(); // state object with the read/offset information
|
final SAMRecordState state = iterator.next(); // state object with the read/offset information
|
||||||
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
|
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
|
||||||
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
|
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
|
||||||
|
final CigarOperator nextOp = state.peekForwardOnGenome(); // next cigar operator
|
||||||
final int readOffset = state.getReadOffset(); // the base offset on this read
|
final int readOffset = state.getReadOffset(); // the base offset on this read
|
||||||
final int eventStartOffset = state.getReadEventStartOffset(); // this will be -1 if base is not a deletion, or if base is the first deletion in the event. Otherwise, it will give the last base before the deletion began.
|
|
||||||
|
|
||||||
if (op == CigarOperator.N) // N's are never added to any pileup
|
if (op == CigarOperator.N) // N's are never added to any pileup
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (read.getMappingQuality() == 0)
|
|
||||||
nMQ0Reads++;
|
|
||||||
|
|
||||||
if (op == CigarOperator.D) {
|
if (op == CigarOperator.D) {
|
||||||
if (readInfo.includeReadsWithDeletionAtLoci()) { // only add deletions to the pileup if we are authorized to do so
|
if (readInfo.includeReadsWithDeletionAtLoci()) { // only add deletions to the pileup if we are authorized to do so
|
||||||
int leftAlignedStart = (eventStartOffset < 0) ? readOffset : eventStartOffset;
|
pile.add(new PileupElement(read, readOffset, true, nextOp == CigarOperator.I, false));
|
||||||
pile.add(new PileupElement(read, leftAlignedStart, true));
|
|
||||||
size++;
|
size++;
|
||||||
nDeletions++;
|
nDeletions++;
|
||||||
|
if (read.getMappingQuality() == 0)
|
||||||
|
nMQ0Reads++;
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
if (!filterBaseInRead(read, location.getStart())) {
|
if (!filterBaseInRead(read, location.getStart())) {
|
||||||
pile.add(new PileupElement(read, readOffset, false));
|
pile.add(new PileupElement(read, readOffset, false, nextOp == CigarOperator.I, op == CigarOperator.S));
|
||||||
size++;
|
size++;
|
||||||
|
if (read.getMappingQuality() == 0)
|
||||||
|
nMQ0Reads++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -264,22 +264,8 @@ public class GATKRunReport {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
private final String getKey() {
|
||||||
* Opens the destination file and writes a gzipped version of the XML report there.
|
return getID() + ".report.xml.gz";
|
||||||
*
|
|
||||||
* @param destination
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
private void postReportToFile(File destination) throws IOException {
|
|
||||||
BufferedOutputStream out =
|
|
||||||
new BufferedOutputStream(
|
|
||||||
new GZIPOutputStream(
|
|
||||||
new FileOutputStream(destination)));
|
|
||||||
try {
|
|
||||||
postReportToStream(out);
|
|
||||||
} finally {
|
|
||||||
out.close();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -288,16 +274,21 @@ public class GATKRunReport {
|
||||||
* That is, postReport() is guarenteed not to fail for any reason.
|
* That is, postReport() is guarenteed not to fail for any reason.
|
||||||
*/
|
*/
|
||||||
private File postReportToLocalDisk(File rootDir) {
|
private File postReportToLocalDisk(File rootDir) {
|
||||||
String filename = getID() + ".report.xml.gz";
|
final String filename = getKey();
|
||||||
File file = new File(rootDir, filename);
|
final File destination = new File(rootDir, filename);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
postReportToFile(file);
|
final BufferedOutputStream out = new BufferedOutputStream(
|
||||||
logger.debug("Wrote report to " + file);
|
new GZIPOutputStream(
|
||||||
return file;
|
new FileOutputStream(destination)));
|
||||||
|
postReportToStream(out);
|
||||||
|
out.close();
|
||||||
|
logger.debug("Wrote report to " + destination);
|
||||||
|
return destination;
|
||||||
} catch ( Exception e ) {
|
} catch ( Exception e ) {
|
||||||
// we catch everything, and no matter what eat the error
|
// we catch everything, and no matter what eat the error
|
||||||
exceptDuringRunReport("Couldn't read report file", e);
|
exceptDuringRunReport("Couldn't read report file", e);
|
||||||
file.delete();
|
destination.delete();
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -305,42 +296,46 @@ public class GATKRunReport {
|
||||||
private void postReportToAWSS3() {
|
private void postReportToAWSS3() {
|
||||||
// modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html
|
// modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html
|
||||||
this.hostName = Utils.resolveHostname(); // we want to fill in the host name
|
this.hostName = Utils.resolveHostname(); // we want to fill in the host name
|
||||||
File localFile = postReportToLocalDisk(new File("./"));
|
final String key = getKey();
|
||||||
logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
|
logger.debug("Generating GATK report to AWS S3 with key " + key);
|
||||||
if ( localFile != null ) { // we succeeded in creating the local file
|
try {
|
||||||
localFile.deleteOnExit();
|
// create an byte output stream so we can capture the output as a byte[]
|
||||||
try {
|
final ByteArrayOutputStream byteStream = new ByteArrayOutputStream(8096);
|
||||||
// stop us from printing the annoying, and meaningless, mime types warning
|
final OutputStream outputStream = new GZIPOutputStream(byteStream);
|
||||||
Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
|
postReportToStream(outputStream);
|
||||||
mimeTypeLogger.setLevel(Level.FATAL);
|
outputStream.close();
|
||||||
|
final byte[] report = byteStream.toByteArray();
|
||||||
|
|
||||||
// Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials
|
// stop us from printing the annoying, and meaningless, mime types warning
|
||||||
// are stored in an AWSCredentials object:
|
Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
|
||||||
|
mimeTypeLogger.setLevel(Level.FATAL);
|
||||||
|
|
||||||
// IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket
|
// Your Amazon Web Services (AWS) login credentials are required to manage S3 accounts. These credentials
|
||||||
String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user
|
// are stored in an AWSCredentials object:
|
||||||
String awsSecretKey = "uQLTduhK6Gy8mbOycpoZIxr8ZoVj1SQaglTWjpYA"; // GATK AWS user
|
|
||||||
AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey);
|
|
||||||
|
|
||||||
// To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP
|
// IAM GATK user credentials -- only right is to PutObject into GATK_Run_Report bucket
|
||||||
// implementation based on HttpClient, as this is the most robust implementation provided with JetS3t.
|
String awsAccessKey = "AKIAJXU7VIHBPDW4TDSQ"; // GATK AWS user
|
||||||
S3Service s3Service = new RestS3Service(awsCredentials);
|
String awsSecretKey = "uQLTduhK6Gy8mbOycpoZIxr8ZoVj1SQaglTWjpYA"; // GATK AWS user
|
||||||
|
AWSCredentials awsCredentials = new AWSCredentials(awsAccessKey, awsSecretKey);
|
||||||
|
|
||||||
// Create an S3Object based on a file, with Content-Length set automatically and
|
// To communicate with S3, create a class that implements an S3Service. We will use the REST/HTTP
|
||||||
// Content-Type set based on the file's extension (using the Mimetypes utility class)
|
// implementation based on HttpClient, as this is the most robust implementation provided with JetS3t.
|
||||||
S3Object fileObject = new S3Object(localFile);
|
S3Service s3Service = new RestS3Service(awsCredentials);
|
||||||
//logger.info("Created S3Object" + fileObject);
|
|
||||||
//logger.info("Uploading " + localFile + " to AWS bucket");
|
// Create an S3Object based on a file, with Content-Length set automatically and
|
||||||
S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
|
// Content-Type set based on the file's extension (using the Mimetypes utility class)
|
||||||
logger.debug("Uploaded to AWS: " + s3Object);
|
S3Object fileObject = new S3Object(key, report);
|
||||||
logger.info("Uploaded run statistics report to AWS S3");
|
//logger.info("Created S3Object" + fileObject);
|
||||||
} catch ( S3ServiceException e ) {
|
//logger.info("Uploading " + localFile + " to AWS bucket");
|
||||||
exceptDuringRunReport("S3 exception occurred", e);
|
S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
|
||||||
} catch ( NoSuchAlgorithmException e ) {
|
logger.debug("Uploaded to AWS: " + s3Object);
|
||||||
exceptDuringRunReport("Couldn't calculate MD5", e);
|
logger.info("Uploaded run statistics report to AWS S3");
|
||||||
} catch ( IOException e ) {
|
} catch ( S3ServiceException e ) {
|
||||||
exceptDuringRunReport("Couldn't read report file", e);
|
exceptDuringRunReport("S3 exception occurred", e);
|
||||||
}
|
} catch ( NoSuchAlgorithmException e ) {
|
||||||
|
exceptDuringRunReport("Couldn't calculate MD5", e);
|
||||||
|
} catch ( IOException e ) {
|
||||||
|
exceptDuringRunReport("Couldn't read report file", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,10 +14,7 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.LinkedHashSet;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.Queue;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -54,7 +51,8 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
|
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
|
||||||
|
|
||||||
int minStart = Integer.MAX_VALUE;
|
int minStart = Integer.MAX_VALUE;
|
||||||
final ArrayList<ActiveRegion> isActiveList = new ArrayList<ActiveRegion>();
|
final ArrayList<Double> isActiveList = new ArrayList<Double>();
|
||||||
|
GenomeLoc firstIsActiveStart = null;
|
||||||
|
|
||||||
//ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
//ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
||||||
ReferenceOrderedView referenceOrderedDataView = null;
|
ReferenceOrderedView referenceOrderedDataView = null;
|
||||||
|
|
@ -64,25 +62,26 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
referenceOrderedDataView = (RodLocusView)locusView;
|
referenceOrderedDataView = (RodLocusView)locusView;
|
||||||
|
|
||||||
// We keep processing while the next reference location is within the interval
|
// We keep processing while the next reference location is within the interval
|
||||||
|
GenomeLoc prevLoc = null;
|
||||||
while( locusView.hasNext() ) {
|
while( locusView.hasNext() ) {
|
||||||
final AlignmentContext locus = locusView.next();
|
final AlignmentContext locus = locusView.next();
|
||||||
GenomeLoc location = locus.getLocation();
|
GenomeLoc location = locus.getLocation();
|
||||||
|
if(prevLoc != null) {
|
||||||
|
for(int iii = prevLoc.getStart() + 1; iii < location.getStart(); iii++ ) {
|
||||||
|
final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
|
||||||
|
if( initialIntervals.overlaps( fakeLoc ) ) {
|
||||||
|
final double isActiveProb = ( walker.presetActiveRegions == null ? walker.isActive( null, null, null )
|
||||||
|
: ( walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ) );
|
||||||
|
isActiveList.add( isActiveProb );
|
||||||
|
if( firstIsActiveStart == null ) {
|
||||||
|
firstIsActiveStart = fakeLoc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||||
|
|
||||||
if ( locus.hasExtendedEventPileup() ) {
|
|
||||||
// if the alignment context we received holds an "extended" pileup (i.e. pileup of insertions/deletions
|
|
||||||
// associated with the current site), we need to update the location. The updated location still starts
|
|
||||||
// at the current genomic position, but it has to span the length of the longest deletion (if any).
|
|
||||||
location = engine.getGenomeLocParser().setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength());
|
|
||||||
|
|
||||||
// it is possible that the new expanded location spans the current shard boundary; the next method ensures
|
|
||||||
// that when it is the case, the reference sequence held by the ReferenceView will be reloaded so that
|
|
||||||
// the view has all the bases we are gonna need. If the location fits within the current view bounds,
|
|
||||||
// the next call will not do anything to the view:
|
|
||||||
referenceView.expandBoundsToAccomodateLoc(location);
|
|
||||||
}
|
|
||||||
|
|
||||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||||
|
|
@ -91,11 +90,15 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
||||||
|
|
||||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||||
if( initialIntervals.overlaps(location) ) {
|
if( initialIntervals.overlaps( location ) ) {
|
||||||
final boolean isActive = ( walker.presetActiveRegions == null ? walker.isActive( tracker, refContext, locus ) : walker.presetActiveRegions.overlaps(location) );
|
final double isActiveProb = ( walker.presetActiveRegions == null ? walker.isActive( tracker, refContext, locus )
|
||||||
isActiveList.add( new ActiveRegion(location, isActive, engine.getGenomeLocParser(), activeRegionExtension ) );
|
: ( walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0 ) );
|
||||||
|
isActiveList.add( isActiveProb );
|
||||||
|
if( firstIsActiveStart == null ) {
|
||||||
|
firstIsActiveStart = location;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Grab all the previously unseen reads from this pileup and add them to the massive read list
|
// Grab all the previously unseen reads from this pileup and add them to the massive read list
|
||||||
for( final PileupElement p : locus.getBasePileup() ) {
|
for( final PileupElement p : locus.getBasePileup() ) {
|
||||||
final SAMRecord read = p.getRead();
|
final SAMRecord read = p.getRead();
|
||||||
|
|
@ -104,15 +107,9 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If this is the last pileup for this shard then need to first do a special walker.isActive() call
|
// If this is the last pileup for this shard calculate the minimum alignment start so that we know
|
||||||
// and then calculate the minimum alignment start so that we know which active regions in the work queue are now safe to process
|
// which active regions in the work queue are now safe to process
|
||||||
if( !locusView.hasNext() ) {
|
if( !locusView.hasNext() ) {
|
||||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
|
||||||
if( initialIntervals.overlaps(location) ) {
|
|
||||||
final boolean isActive = ( walker.presetActiveRegions == null ? walker.isActive( tracker, refContext, locus ) : walker.presetActiveRegions.overlaps(location) );
|
|
||||||
isActiveList.add( new ActiveRegion(location, isActive, engine.getGenomeLocParser(), activeRegionExtension ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
for( final PileupElement p : locus.getBasePileup() ) {
|
for( final PileupElement p : locus.getBasePileup() ) {
|
||||||
final SAMRecord read = p.getRead();
|
final SAMRecord read = p.getRead();
|
||||||
if( !myReads.contains(read) ) {
|
if( !myReads.contains(read) ) {
|
||||||
|
|
@ -121,12 +118,13 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
if( read.getAlignmentStart() < minStart ) { minStart = read.getAlignmentStart(); }
|
if( read.getAlignmentStart() < minStart ) { minStart = read.getAlignmentStart(); }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
printProgress(dataProvider.getShard(),locus.getLocation());
|
prevLoc = location;
|
||||||
|
printProgress(dataProvider.getShard(), locus.getLocation());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Take the individual isActive calls and integrate them into contiguous active regions and
|
// Take the individual isActive calls and integrate them into contiguous active regions and
|
||||||
// add these blocks of work to the work queue
|
// add these blocks of work to the work queue
|
||||||
final ArrayList<ActiveRegion> activeRegions = integrateActiveList( isActiveList );
|
final ArrayList<ActiveRegion> activeRegions = integrateActiveList( isActiveList, firstIsActiveStart, activeRegionExtension );
|
||||||
logger.debug("Integrated " + isActiveList.size() + " isActive calls into " + activeRegions.size() + " regions." );
|
logger.debug("Integrated " + isActiveList.size() + " isActive calls into " + activeRegions.size() + " regions." );
|
||||||
if( walker.activeRegionOutStream == null ) {
|
if( walker.activeRegionOutStream == null ) {
|
||||||
workQueue.addAll( activeRegions );
|
workQueue.addAll( activeRegions );
|
||||||
|
|
@ -137,14 +135,11 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Since we've sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
|
// Since we've sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
|
||||||
if( !workQueue.isEmpty() ) {
|
while( workQueue.peek() != null && (workQueue.peek().getExtendedLoc().getStop() < minStart || !workQueue.peek().getExtendedLoc().getContig().equals(dataProvider.getLocus().getContig())) ) {
|
||||||
while( workQueue.peek().getExtendedLoc().getStop() < minStart || !workQueue.peek().getExtendedLoc().getContig().equals(dataProvider.getLocus().getContig()) ) {
|
final ActiveRegion activeRegion = workQueue.remove();
|
||||||
final ActiveRegion activeRegion = workQueue.remove();
|
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
|
||||||
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -184,7 +179,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
}
|
}
|
||||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||||
if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
|
if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
|
||||||
activeRegion.add( (GATKSAMRecord) read );
|
otherRegionToTest.add( (GATKSAMRecord) read );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -218,30 +213,46 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
||||||
}
|
}
|
||||||
|
|
||||||
// integrate active regions into contiguous chunks with identical active status
|
// band-pass filter the list of isActive probabilities and turn into active regions
|
||||||
private ArrayList<ActiveRegion> integrateActiveList( final ArrayList<ActiveRegion> activeList ) {
|
private ArrayList<ActiveRegion> integrateActiveList( final ArrayList<Double> activeList, final GenomeLoc firstIsActiveStart, final int activeRegionExtension ) {
|
||||||
|
|
||||||
|
final double ACTIVE_PROB_THRESHOLD = 0.2;
|
||||||
final ArrayList<ActiveRegion> returnList = new ArrayList<ActiveRegion>();
|
final ArrayList<ActiveRegion> returnList = new ArrayList<ActiveRegion>();
|
||||||
if( activeList.size() == 0 ) {
|
if( activeList.size() == 0 ) {
|
||||||
return returnList;
|
return returnList;
|
||||||
} else if( activeList.size() == 1 ) {
|
} else if( activeList.size() == 1 ) {
|
||||||
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(activeList.get(0).getLocation().getContig(), activeList.get(0).getLocation().getStart(), activeList.get(0).getLocation().getStart()),
|
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart(), firstIsActiveStart.getStart()),
|
||||||
activeList.get(0).isActive, engine.getGenomeLocParser(), activeList.get(0).getExtension() ) );
|
activeList.get(0) > ACTIVE_PROB_THRESHOLD, engine.getGenomeLocParser(), activeRegionExtension ) );
|
||||||
return returnList;
|
return returnList;
|
||||||
} else {
|
} else {
|
||||||
ActiveRegion prevLocus = activeList.get(0);
|
final Double[] activeProbArray = activeList.toArray(new Double[activeList.size()]);
|
||||||
ActiveRegion startLocus = prevLocus;
|
final double[] filteredProbArray = new double[activeProbArray.length];
|
||||||
for( final ActiveRegion thisLocus : activeList ) {
|
final int FILTER_SIZE = 10;
|
||||||
if( prevLocus.isActive != thisLocus.isActive || !prevLocus.getLocation().contiguousP( thisLocus.getLocation() ) ) {
|
final int MAX_ACTIVE_REGION = 200;
|
||||||
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(startLocus.getLocation().getContig(), startLocus.getLocation().getStart(), prevLocus.getLocation().getStart()),
|
for( int iii = 0; iii < activeProbArray.length; iii++ ) {
|
||||||
prevLocus.isActive, engine.getGenomeLocParser(), startLocus.getExtension() ) );
|
double maxVal = 0;
|
||||||
startLocus = thisLocus;
|
for( int jjj = Math.max(0, iii-FILTER_SIZE); jjj < Math.min(activeList.size(), iii+FILTER_SIZE); jjj++ ) {
|
||||||
|
if( activeProbArray[jjj] > maxVal ) { maxVal = activeProbArray[jjj]; }
|
||||||
}
|
}
|
||||||
prevLocus = thisLocus;
|
filteredProbArray[iii] = maxVal;
|
||||||
}
|
}
|
||||||
// output the last region if necessary
|
|
||||||
if( startLocus != prevLocus ) {
|
boolean curStatus = filteredProbArray[0] > ACTIVE_PROB_THRESHOLD;
|
||||||
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(startLocus.getLocation().getContig(), startLocus.getLocation().getStart(), prevLocus.getLocation().getStart()),
|
int curStart = 0;
|
||||||
prevLocus.isActive, engine.getGenomeLocParser(), startLocus.getExtension() ) );
|
for(int iii = 1; iii < filteredProbArray.length; iii++ ) {
|
||||||
|
final boolean thisStatus = filteredProbArray[iii] > ACTIVE_PROB_THRESHOLD;
|
||||||
|
if( curStatus != thisStatus || (iii-curStart) > MAX_ACTIVE_REGION ) {
|
||||||
|
returnList.add( new ActiveRegion(
|
||||||
|
engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (iii-1)),
|
||||||
|
curStatus, engine.getGenomeLocParser(), activeRegionExtension ) );
|
||||||
|
curStatus = thisStatus;
|
||||||
|
curStart = iii;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( curStart != filteredProbArray.length-1 ) {
|
||||||
|
returnList.add( new ActiveRegion(
|
||||||
|
engine.getGenomeLocParser().createGenomeLoc(firstIsActiveStart.getContig(), firstIsActiveStart.getStart() + curStart, firstIsActiveStart.getStart() + (filteredProbArray.length-1)),
|
||||||
|
curStatus, engine.getGenomeLocParser(), activeRegionExtension ) );
|
||||||
}
|
}
|
||||||
return returnList;
|
return returnList;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -73,8 +73,8 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine active status over the AlignmentContext
|
// Determine probability of active status over the AlignmentContext
|
||||||
public abstract boolean isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
public abstract double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
||||||
|
|
||||||
// Map over the ActiveRegion
|
// Map over the ActiveRegion
|
||||||
public abstract MapType map(final ActiveRegion activeRegion, final ReadMetaDataTracker metaDataTracker);
|
public abstract MapType map(final ActiveRegion activeRegion, final ReadMetaDataTracker metaDataTracker);
|
||||||
|
|
|
||||||
|
|
@ -212,7 +212,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
|
|
||||||
public class BAQedPileupElement extends PileupElement {
|
public class BAQedPileupElement extends PileupElement {
|
||||||
public BAQedPileupElement( final PileupElement PE ) {
|
public BAQedPileupElement( final PileupElement PE ) {
|
||||||
super(PE.getRead(), PE.getOffset(), PE.isDeletion());
|
super(PE.getRead(), PE.getOffset(), PE.isDeletion(), PE.isBeforeInsertion(), PE.isSoftClipped());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -169,9 +169,11 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
private VariantAnnotatorEngine annotationEngine;
|
private VariantAnnotatorEngine annotationEngine;
|
||||||
|
|
||||||
// enable deletions in the pileup
|
// enable deletions in the pileup
|
||||||
|
@Override
|
||||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||||
|
|
||||||
// enable extended events for indels
|
// enable extended events for indels
|
||||||
|
@Override
|
||||||
public boolean generateExtendedEvents() {
|
public boolean generateExtendedEvents() {
|
||||||
return (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES);
|
return (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -83,7 +83,7 @@ public class QCRefWalker extends RefWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
private final void throwError(ReferenceContext ref, String message) {
|
private final void throwError(ReferenceContext ref, String message) {
|
||||||
throw new StingException(String.format("Site %s failed: %s", ref, message));
|
throw new StingException(String.format("Site %s failed: %s", ref.getLocus(), message));
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
|
@ -92,13 +92,13 @@ public class QCRefWalker extends RefWalker<Integer, Integer> {
|
||||||
contigName = locusContigName;
|
contigName = locusContigName;
|
||||||
ReferenceSequence refSeq = uncachedRef.getSequence(contigName);
|
ReferenceSequence refSeq = uncachedRef.getSequence(contigName);
|
||||||
contigStart = 1;
|
contigStart = 1;
|
||||||
contigEnd = contigStart + refSeq.length();
|
contigEnd = contigStart + refSeq.length() - 1;
|
||||||
uncachedBases = uncachedRef.getSubsequenceAt(contigName, contigStart, contigEnd).getBases();
|
uncachedBases = uncachedRef.getSubsequenceAt(contigName, contigStart, contigEnd).getBases();
|
||||||
logger.warn(String.format("Loading contig %s (%d-%d)", contigName, contigStart, contigEnd));
|
logger.info(String.format("Loading contig %s (%d-%d)", contigName, contigStart, contigEnd));
|
||||||
}
|
}
|
||||||
|
|
||||||
final byte refBase = ref.getBase();
|
final byte refBase = ref.getBase();
|
||||||
if (! ( BaseUtils.isRegularBase(refBase) || BaseUtils.isNBase(refBase) ) )
|
if (! ( BaseUtils.isRegularBase(refBase) || isExtendFastaBase(refBase) ) )
|
||||||
throwError(ref, String.format("Refbase isn't a regular base (%d %c)", refBase, (char)refBase));
|
throwError(ref, String.format("Refbase isn't a regular base (%d %c)", refBase, (char)refBase));
|
||||||
|
|
||||||
// check bases are equal
|
// check bases are equal
|
||||||
|
|
@ -114,6 +114,28 @@ public class QCRefWalker extends RefWalker<Integer, Integer> {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static final boolean isExtendFastaBase(final byte b) {
|
||||||
|
switch ( b ) {
|
||||||
|
case 'U':
|
||||||
|
case 'R':
|
||||||
|
case 'Y':
|
||||||
|
case 'K':
|
||||||
|
case 'M':
|
||||||
|
case 'S':
|
||||||
|
case 'W':
|
||||||
|
case 'B':
|
||||||
|
case 'D':
|
||||||
|
case 'H':
|
||||||
|
case 'V':
|
||||||
|
case 'N':
|
||||||
|
case 'X':
|
||||||
|
case '-':
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public Integer reduceInit() {
|
public Integer reduceInit() {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -159,7 +159,7 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw new IllegalStateException("This method hasn't been implemented yet for " + read.getReadGroup().getPlatform());
|
throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,22 +25,20 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.*;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
|
||||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||||
|
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
|
|
@ -557,7 +555,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
// Look for this sample in the all vcs of the comp ROD track.
|
// Look for this sample in the all vcs of the comp ROD track.
|
||||||
boolean foundVariant = false;
|
boolean foundVariant = false;
|
||||||
for (VariantContext compVC : compVCs) {
|
for (VariantContext compVC : compVCs) {
|
||||||
if (sampleHasVariant(compVC.getGenotype(g.getSampleName()))) {
|
if (haveSameGenotypes(g, compVC.getGenotype(g.getSampleName()))) {
|
||||||
foundVariant = true;
|
foundVariant = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ public class QualityUtils {
|
||||||
|
|
||||||
private static double qualToErrorProbCache[] = new double[256];
|
private static double qualToErrorProbCache[] = new double[256];
|
||||||
static {
|
static {
|
||||||
for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw((byte)i);
|
for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -44,15 +44,15 @@ public class QualityUtils {
|
||||||
* Convert a quality score to a probability of error. This is the Phred-style
|
* Convert a quality score to a probability of error. This is the Phred-style
|
||||||
* conversion, *not* the Illumina-style conversion (though asymptotically, they're the same).
|
* conversion, *not* the Illumina-style conversion (though asymptotically, they're the same).
|
||||||
*
|
*
|
||||||
* @param qual a quality score (0-40)
|
* @param qual a quality score (0 - 255)
|
||||||
* @return a probability (0.0-1.0)
|
* @return a probability (0.0 - 1.0)
|
||||||
*/
|
*/
|
||||||
static public double qualToErrorProbRaw(byte qual) {
|
static private double qualToErrorProbRaw(int qual) {
|
||||||
return Math.pow(10.0, ((double) qual)/-10.0);
|
return Math.pow(10.0, ((double) qual)/-10.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static public double qualToErrorProb(byte qual) {
|
static public double qualToErrorProb(byte qual) {
|
||||||
return qualToErrorProbCache[qual];
|
return qualToErrorProbCache[(int)qual & 0xff]; // Map: 127 -> 127; -128 -> 128; -1 -> 255; etc.
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -177,7 +177,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
||||||
for (int i = 0; i < reads.size(); i++) {
|
for (int i = 0; i < reads.size(); i++) {
|
||||||
GATKSAMRecord read = reads.get(i);
|
GATKSAMRecord read = reads.get(i);
|
||||||
int offset = offsets.get(i);
|
int offset = offsets.get(i);
|
||||||
pileup.add(createNewPileupElement(read, offset, BaseUtils.simpleBaseToBaseIndex(read.getReadBases()[offset]) == BaseUtils.D));
|
pileup.add(createNewPileupElement(read, offset, false, false, false)); // only used to create fake pileups for testing so ancillary information is not important
|
||||||
}
|
}
|
||||||
|
|
||||||
return pileup;
|
return pileup;
|
||||||
|
|
@ -196,7 +196,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
||||||
|
|
||||||
UnifiedPileupElementTracker<PE> pileup = new UnifiedPileupElementTracker<PE>();
|
UnifiedPileupElementTracker<PE> pileup = new UnifiedPileupElementTracker<PE>();
|
||||||
for (GATKSAMRecord read : reads) {
|
for (GATKSAMRecord read : reads) {
|
||||||
pileup.add(createNewPileupElement(read, offset, BaseUtils.simpleBaseToBaseIndex(read.getReadBases()[offset]) == BaseUtils.D));
|
pileup.add(createNewPileupElement(read, offset, false, false, false)); // only used to create fake pileups for testing so ancillary information is not important
|
||||||
}
|
}
|
||||||
|
|
||||||
return pileup;
|
return pileup;
|
||||||
|
|
@ -204,7 +204,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
||||||
|
|
||||||
protected abstract AbstractReadBackedPileup<RBP, PE> createNewPileup(GenomeLoc loc, PileupElementTracker<PE> pileupElementTracker);
|
protected abstract AbstractReadBackedPileup<RBP, PE> createNewPileup(GenomeLoc loc, PileupElementTracker<PE> pileupElementTracker);
|
||||||
|
|
||||||
protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion);
|
protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeInsertion, boolean isSoftClipped);
|
||||||
|
|
||||||
// --------------------------------------------------------
|
// --------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,8 @@ import java.util.Arrays;
|
||||||
* Time: 2:57:55 PM
|
* Time: 2:57:55 PM
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// Extended events are slated for removal
|
||||||
public class ExtendedEventPileupElement extends PileupElement {
|
public class ExtendedEventPileupElement extends PileupElement {
|
||||||
public enum Type {
|
public enum Type {
|
||||||
NOEVENT, DELETION, INSERTION
|
NOEVENT, DELETION, INSERTION
|
||||||
|
|
@ -46,7 +48,7 @@ public class ExtendedEventPileupElement extends PileupElement {
|
||||||
|
|
||||||
|
|
||||||
public ExtendedEventPileupElement(GATKSAMRecord read, int offset, int eventLength, String eventBases, Type type) {
|
public ExtendedEventPileupElement(GATKSAMRecord read, int offset, int eventLength, String eventBases, Type type) {
|
||||||
super(read, offset, type == Type.DELETION);
|
super(read, offset, type == Type.DELETION, false, false); // extended events are slated for removal
|
||||||
this.read = read;
|
this.read = read;
|
||||||
this.offset = offset;
|
this.offset = offset;
|
||||||
this.eventLength = eventLength;
|
this.eventLength = eventLength;
|
||||||
|
|
|
||||||
|
|
@ -23,47 +23,46 @@ public class PileupElement implements Comparable<PileupElement> {
|
||||||
protected final GATKSAMRecord read;
|
protected final GATKSAMRecord read;
|
||||||
protected final int offset;
|
protected final int offset;
|
||||||
protected final boolean isDeletion;
|
protected final boolean isDeletion;
|
||||||
|
protected final boolean isBeforeInsertion;
|
||||||
|
protected final boolean isSoftClipped;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new pileup element.
|
* Creates a new pileup element.
|
||||||
*
|
*
|
||||||
* @param read the read we are adding to the pileup
|
* @param read the read we are adding to the pileup
|
||||||
* @param offset the position in the read for this base. All deletions must be left aligned! (-1 is only allowed for reads starting with insertions)
|
* @param offset the position in the read for this base. All deletions must be left aligned! (-1 is only allowed for reads starting with insertions)
|
||||||
* @param isDeletion whether or not this base is a deletion
|
* @param isDeletion whether or not this base is a deletion
|
||||||
|
* @param isBeforeInsertion whether or not this base is before an insertion
|
||||||
|
* @param isSoftClipped whether or not this base was softclipped
|
||||||
*/
|
*/
|
||||||
@Requires({
|
@Requires({
|
||||||
"read != null",
|
"read != null",
|
||||||
"offset >= -1",
|
"offset >= -1",
|
||||||
"offset <= read.getReadLength()"})
|
"offset <= read.getReadLength()"})
|
||||||
public PileupElement(GATKSAMRecord read, int offset, boolean isDeletion) {
|
public PileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeInsertion, final boolean isSoftClipped) {
|
||||||
if (offset < 0 && isDeletion)
|
if (offset < 0 && isDeletion)
|
||||||
throw new ReviewedStingException("Pileup Element cannot create a deletion with a negative offset");
|
throw new ReviewedStingException("Pileup Element cannot create a deletion with a negative offset");
|
||||||
|
|
||||||
this.read = read;
|
this.read = read;
|
||||||
this.offset = offset;
|
this.offset = offset;
|
||||||
this.isDeletion = isDeletion;
|
this.isDeletion = isDeletion;
|
||||||
|
this.isBeforeInsertion = isBeforeInsertion;
|
||||||
|
this.isSoftClipped = isSoftClipped;
|
||||||
}
|
}
|
||||||
|
|
||||||
// /**
|
|
||||||
// * Creates a NON DELETION pileup element.
|
|
||||||
// *
|
|
||||||
// * use this constructor only for insertions and matches/mismatches.
|
|
||||||
// * @param read the read we are adding to the pileup
|
|
||||||
// * @param offset the position in the read for this base. All deletions must be left aligned! (-1 is only allowed for reads starting with insertions)
|
|
||||||
// */
|
|
||||||
// @Requires({
|
|
||||||
// "read != null",
|
|
||||||
// "offset >= -1",
|
|
||||||
// "offset <= read.getReadLength()"})
|
|
||||||
// public PileupElement( GATKSAMRecord read, int offset ) {
|
|
||||||
// this(read, offset, false);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
public boolean isDeletion() {
|
public boolean isDeletion() {
|
||||||
return isDeletion;
|
return isDeletion;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isBeforeInsertion() {
|
||||||
|
return isBeforeInsertion;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isSoftClipped() {
|
||||||
|
return isSoftClipped;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isInsertionAtBeginningOfRead() {
|
public boolean isInsertionAtBeginningOfRead() {
|
||||||
return offset == -1;
|
return offset == -1;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -96,7 +96,7 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion) {
|
protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeInsertion, boolean isSoftClipped) {
|
||||||
throw new UnsupportedOperationException("Not enough information provided to create a new pileup element");
|
throw new UnsupportedOperationException("Not enough information provided to create a new pileup element");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -71,7 +71,7 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPil
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion) {
|
protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeInsertion, boolean isSoftClipped) {
|
||||||
return new PileupElement(read, offset, isDeletion);
|
return new PileupElement(read, offset, isDeletion, isBeforeInsertion, isSoftClipped);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -361,10 +361,10 @@ public class ArtificialSAMUtils {
|
||||||
final GATKSAMRecord left = pair.get(0);
|
final GATKSAMRecord left = pair.get(0);
|
||||||
final GATKSAMRecord right = pair.get(1);
|
final GATKSAMRecord right = pair.get(1);
|
||||||
|
|
||||||
pileupElements.add(new PileupElement(left, pos - leftStart, false));
|
pileupElements.add(new PileupElement(left, pos - leftStart, false, false, false));
|
||||||
|
|
||||||
if (pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd()) {
|
if (pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd()) {
|
||||||
pileupElements.add(new PileupElement(right, pos - rightStart, false));
|
pileupElements.add(new PileupElement(right, pos - rightStart, false, false, false));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,10 @@ import java.util.Map;
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class GATKSAMRecord extends BAMRecord {
|
public class GATKSAMRecord extends BAMRecord {
|
||||||
public static final String REDUCED_READ_CONSENSUS_TAG = "RR";
|
// ReduceReads specific attribute tags
|
||||||
|
public static final String REDUCED_READ_CONSENSUS_TAG = "RR"; // marks a synthetic read produced by the ReduceReads tool
|
||||||
|
public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT = "OP"; // reads that are clipped may use this attribute to keep track of their original alignment start
|
||||||
|
public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT = "OE"; // reads that are clipped may use this attribute to keep track of their original alignment end
|
||||||
|
|
||||||
// the SAMRecord data we're caching
|
// the SAMRecord data we're caching
|
||||||
private String mReadString = null;
|
private String mReadString = null;
|
||||||
|
|
@ -321,6 +324,36 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines the original alignment start of a previously clipped read.
|
||||||
|
*
|
||||||
|
* This is useful for reads that have been trimmed to a variant region and lost the information of it's original alignment end
|
||||||
|
*
|
||||||
|
* @return the alignment start of a read before it was clipped
|
||||||
|
*/
|
||||||
|
public int getOriginalAlignmentStart() {
|
||||||
|
int originalAlignmentStart = getUnclippedStart();
|
||||||
|
Integer alignmentShift = (Integer) getAttribute(REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT);
|
||||||
|
if (alignmentShift != null)
|
||||||
|
originalAlignmentStart += alignmentShift;
|
||||||
|
return originalAlignmentStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines the original alignment end of a previously clipped read.
|
||||||
|
*
|
||||||
|
* This is useful for reads that have been trimmed to a variant region and lost the information of it's original alignment end
|
||||||
|
*
|
||||||
|
* @return the alignment end of a read before it was clipped
|
||||||
|
*/
|
||||||
|
public int getOriginalAlignmentEnd() {
|
||||||
|
int originalAlignmentEnd = getUnclippedEnd();
|
||||||
|
Integer alignmentShift = (Integer) getAttribute(REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT);
|
||||||
|
if (alignmentShift != null)
|
||||||
|
originalAlignmentEnd -= alignmentShift;
|
||||||
|
return originalAlignmentEnd;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates an empty GATKSAMRecord with the read's header, read group and mate
|
* Creates an empty GATKSAMRecord with the read's header, read group and mate
|
||||||
* information, but empty (not-null) fields:
|
* information, but empty (not-null) fields:
|
||||||
|
|
@ -363,4 +396,21 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
return emptyRead;
|
return emptyRead;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shallow copy of everything, except for the attribute list and the temporary attributes.
|
||||||
|
* A new list of the attributes is created for both, but the attributes themselves are copied by reference.
|
||||||
|
* This should be safe because callers should never modify a mutable value returned by any of the get() methods anyway.
|
||||||
|
*
|
||||||
|
* @return a shallow copy of the GATKSAMRecord
|
||||||
|
* @throws CloneNotSupportedException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Object clone() throws CloneNotSupportedException {
|
||||||
|
final GATKSAMRecord clone = (GATKSAMRecord) super.clone();
|
||||||
|
if (temporaryAttributes != null) {
|
||||||
|
for (Object attribute : temporaryAttributes.keySet())
|
||||||
|
clone.setTemporaryAttribute(attribute, temporaryAttributes.get(attribute));
|
||||||
|
}
|
||||||
|
return clone;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s -NO_HEADER",
|
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s -NO_HEADER",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("78e6842325f1f1bc9ab30d5e7737ee6e")
|
Arrays.asList("929bbb96381541c162dc7e5462e26ea2")
|
||||||
);
|
);
|
||||||
|
|
||||||
executeTest("testDiscordance--" + testFile, spec);
|
executeTest("testDiscordance--" + testFile, spec);
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,83 @@
|
||||||
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.BeforeClass;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
|
||||||
|
public class GATKSAMRecordUnitTest extends BaseTest {
|
||||||
|
GATKSAMRecord read, reducedRead;
|
||||||
|
final static String BASES = "ACTG";
|
||||||
|
final static String QUALS = "!+5?";
|
||||||
|
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40, 1};
|
||||||
|
final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30, -9}; // just the offsets
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public void init() {
|
||||||
|
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||||
|
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
|
||||||
|
read.setReadUnmappedFlag(true);
|
||||||
|
read.setReadBases(new String(BASES).getBytes());
|
||||||
|
read.setBaseQualityString(new String(QUALS));
|
||||||
|
|
||||||
|
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
|
||||||
|
reducedRead.setReadBases(BASES.getBytes());
|
||||||
|
reducedRead.setBaseQualityString(QUALS);
|
||||||
|
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReducedReads() {
|
||||||
|
Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read");
|
||||||
|
Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read");
|
||||||
|
|
||||||
|
Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read");
|
||||||
|
for (int i = 0; i < reducedRead.getReadLength(); i++) {
|
||||||
|
Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReducedReadPileupElement() {
|
||||||
|
PileupElement readp = new PileupElement(read, 0, false, false, false);
|
||||||
|
PileupElement reducedreadp = new PileupElement(reducedRead, 0, false, false, false);
|
||||||
|
|
||||||
|
Assert.assertFalse(readp.getRead().isReducedRead());
|
||||||
|
|
||||||
|
Assert.assertTrue(reducedreadp.getRead().isReducedRead());
|
||||||
|
Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]);
|
||||||
|
Assert.assertEquals(reducedreadp.getQual(), readp.getQual());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetOriginalAlignments() {
|
||||||
|
final byte [] bases = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'};
|
||||||
|
final byte [] quals = {20 , 20 , 20 , 20 , 20 , 20 , 20 , 20 };
|
||||||
|
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "6M");
|
||||||
|
|
||||||
|
// A regular read with all matches
|
||||||
|
Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart());
|
||||||
|
Assert.assertEquals(read.getAlignmentEnd(), read.getOriginalAlignmentEnd());
|
||||||
|
|
||||||
|
// Alignment start shifted
|
||||||
|
int alignmentShift = 2;
|
||||||
|
read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, alignmentShift);
|
||||||
|
Assert.assertEquals(read.getAlignmentStart() + alignmentShift, read.getOriginalAlignmentStart());
|
||||||
|
Assert.assertEquals(read.getAlignmentEnd(), read.getOriginalAlignmentEnd());
|
||||||
|
|
||||||
|
// Both alignments shifted
|
||||||
|
read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT, alignmentShift);
|
||||||
|
Assert.assertEquals(read.getAlignmentStart() + alignmentShift, read.getOriginalAlignmentStart());
|
||||||
|
Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd());
|
||||||
|
|
||||||
|
// Alignment end shifted
|
||||||
|
read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, null);
|
||||||
|
Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart());
|
||||||
|
Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -1,57 +1,11 @@
|
||||||
package org.broadinstitute.sting.utils.sam;
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.BeforeTest;
|
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
|
||||||
public class ReadUtilsUnitTest extends BaseTest {
|
public class ReadUtilsUnitTest extends BaseTest {
|
||||||
GATKSAMRecord read, reducedRead;
|
|
||||||
final static String BASES = "ACTG";
|
|
||||||
final static String QUALS = "!+5?";
|
|
||||||
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40, 1};
|
|
||||||
final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30, -9}; // just the offsets
|
|
||||||
|
|
||||||
@BeforeTest
|
|
||||||
public void init() {
|
|
||||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
|
||||||
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
|
|
||||||
read.setReadUnmappedFlag(true);
|
|
||||||
read.setReadBases(new String(BASES).getBytes());
|
|
||||||
read.setBaseQualityString(new String(QUALS));
|
|
||||||
|
|
||||||
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
|
|
||||||
reducedRead.setReadBases(BASES.getBytes());
|
|
||||||
reducedRead.setBaseQualityString(QUALS);
|
|
||||||
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testReducedReads() {
|
|
||||||
Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read");
|
|
||||||
Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read");
|
|
||||||
|
|
||||||
Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read");
|
|
||||||
for (int i = 0; i < reducedRead.getReadLength(); i++) {
|
|
||||||
Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testReducedReadPileupElement() {
|
|
||||||
PileupElement readp = new PileupElement(read, 0, false);
|
|
||||||
PileupElement reducedreadp = new PileupElement(reducedRead, 0, false);
|
|
||||||
|
|
||||||
Assert.assertFalse(readp.getRead().isReducedRead());
|
|
||||||
|
|
||||||
Assert.assertTrue(reducedreadp.getRead().isReducedRead());
|
|
||||||
Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]);
|
|
||||||
Assert.assertEquals(reducedreadp.getQual(), readp.getQual());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetAdaptorBoundary() {
|
public void testGetAdaptorBoundary() {
|
||||||
final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T'};
|
final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T'};
|
||||||
|
|
|
||||||
|
|
@ -25,9 +25,6 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
||||||
@Argument(shortName="noIndels", doc="do not call indels with the Unified Genotyper", required=false)
|
@Argument(shortName="noIndels", doc="do not call indels with the Unified Genotyper", required=false)
|
||||||
var noIndels: Boolean = false
|
var noIndels: Boolean = false
|
||||||
|
|
||||||
@Argument(shortName="LOCAL_ET", doc="Doesn't use the AWS S3 storage for ET option", required=false)
|
|
||||||
var LOCAL_ET: Boolean = false
|
|
||||||
|
|
||||||
@Argument(shortName="mbq", doc="The minimum Phred-Scaled quality score threshold to be considered a good base.", required=false)
|
@Argument(shortName="mbq", doc="The minimum Phred-Scaled quality score threshold to be considered a good base.", required=false)
|
||||||
var minimumBaseQuality: Int = -1
|
var minimumBaseQuality: Int = -1
|
||||||
|
|
||||||
|
|
@ -203,7 +200,7 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
||||||
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
|
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
|
||||||
logging_level = "INFO";
|
logging_level = "INFO";
|
||||||
memoryLimit = 4;
|
memoryLimit = 4;
|
||||||
phone_home = if ( LOCAL_ET ) GATKRunReport.PhoneHomeOption.STANDARD else GATKRunReport.PhoneHomeOption.AWS_S3
|
phone_home = GATKRunReport.PhoneHomeOption.NO_ET
|
||||||
}
|
}
|
||||||
|
|
||||||
def bai(bam: File) = new File(bam + ".bai")
|
def bai(bam: File) = new File(bam + ".bai")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue