Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
f7ac1f4a69
4
ivy.xml
4
ivy.xml
|
|
@ -41,7 +41,7 @@
|
||||||
<dependency org="log4j" name="log4j" rev="1.2.15"/>
|
<dependency org="log4j" name="log4j" rev="1.2.15"/>
|
||||||
<dependency org="javax.mail" name="mail" rev="1.4.4"/>
|
<dependency org="javax.mail" name="mail" rev="1.4.4"/>
|
||||||
<dependency org="colt" name="colt" rev="1.2.0"/>
|
<dependency org="colt" name="colt" rev="1.2.0"/>
|
||||||
<dependency org="jboss" name="javassist" rev="3.7.ga"/>
|
<!-- <dependency org="jboss" name="javassist" rev="3.7.ga"/> -->
|
||||||
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
|
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
|
||||||
<dependency org="org.apache.bcel" name="bcel" rev="5.2"/>
|
<dependency org="org.apache.bcel" name="bcel" rev="5.2"/>
|
||||||
|
|
||||||
|
|
@ -66,7 +66,7 @@
|
||||||
<dependency org="org.apache.commons" name="commons-math" rev="2.2" />
|
<dependency org="org.apache.commons" name="commons-math" rev="2.2" />
|
||||||
|
|
||||||
<!-- Lucene core utilities -->
|
<!-- Lucene core utilities -->
|
||||||
<dependency org="org.apache.lucene" name="lucene-core" rev="3.0.3"/>
|
<!-- <dependency org="org.apache.lucene" name="lucene-core" rev="3.0.3"/> -->
|
||||||
|
|
||||||
<!-- Dependencies for LSF, DRMAA, and other C libraries -->
|
<!-- Dependencies for LSF, DRMAA, and other C libraries -->
|
||||||
<dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>
|
<dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>
|
||||||
|
|
|
||||||
|
|
@ -83,7 +83,7 @@ public class QCRefWalker extends RefWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
private final void throwError(ReferenceContext ref, String message) {
|
private final void throwError(ReferenceContext ref, String message) {
|
||||||
throw new StingException(String.format("Site %s failed: %s", ref, message));
|
throw new StingException(String.format("Site %s failed: %s", ref.getLocus(), message));
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
|
@ -92,13 +92,13 @@ public class QCRefWalker extends RefWalker<Integer, Integer> {
|
||||||
contigName = locusContigName;
|
contigName = locusContigName;
|
||||||
ReferenceSequence refSeq = uncachedRef.getSequence(contigName);
|
ReferenceSequence refSeq = uncachedRef.getSequence(contigName);
|
||||||
contigStart = 1;
|
contigStart = 1;
|
||||||
contigEnd = contigStart + refSeq.length();
|
contigEnd = contigStart + refSeq.length() - 1;
|
||||||
uncachedBases = uncachedRef.getSubsequenceAt(contigName, contigStart, contigEnd).getBases();
|
uncachedBases = uncachedRef.getSubsequenceAt(contigName, contigStart, contigEnd).getBases();
|
||||||
logger.warn(String.format("Loading contig %s (%d-%d)", contigName, contigStart, contigEnd));
|
logger.info(String.format("Loading contig %s (%d-%d)", contigName, contigStart, contigEnd));
|
||||||
}
|
}
|
||||||
|
|
||||||
final byte refBase = ref.getBase();
|
final byte refBase = ref.getBase();
|
||||||
if (! ( BaseUtils.isRegularBase(refBase) || BaseUtils.isNBase(refBase) ) )
|
if (! ( BaseUtils.isRegularBase(refBase) || isExtendFastaBase(refBase) ) )
|
||||||
throwError(ref, String.format("Refbase isn't a regular base (%d %c)", refBase, (char)refBase));
|
throwError(ref, String.format("Refbase isn't a regular base (%d %c)", refBase, (char)refBase));
|
||||||
|
|
||||||
// check bases are equal
|
// check bases are equal
|
||||||
|
|
@ -114,6 +114,28 @@ public class QCRefWalker extends RefWalker<Integer, Integer> {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static final boolean isExtendFastaBase(final byte b) {
|
||||||
|
switch ( b ) {
|
||||||
|
case 'U':
|
||||||
|
case 'R':
|
||||||
|
case 'Y':
|
||||||
|
case 'K':
|
||||||
|
case 'M':
|
||||||
|
case 'S':
|
||||||
|
case 'W':
|
||||||
|
case 'B':
|
||||||
|
case 'D':
|
||||||
|
case 'H':
|
||||||
|
case 'V':
|
||||||
|
case 'N':
|
||||||
|
case 'X':
|
||||||
|
case '-':
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public Integer reduceInit() {
|
public Integer reduceInit() {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -159,7 +159,7 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw new IllegalStateException("This method hasn't been implemented yet for " + read.getReadGroup().getPlatform());
|
throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,10 @@ import java.util.Map;
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class GATKSAMRecord extends BAMRecord {
|
public class GATKSAMRecord extends BAMRecord {
|
||||||
public static final String REDUCED_READ_CONSENSUS_TAG = "RR";
|
// ReduceReads specific attribute tags
|
||||||
|
public static final String REDUCED_READ_CONSENSUS_TAG = "RR"; // marks a synthetic read produced by the ReduceReads tool
|
||||||
|
public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT = "OP"; // reads that are clipped may use this attribute to keep track of their original alignment start
|
||||||
|
public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT = "OE"; // reads that are clipped may use this attribute to keep track of their original alignment end
|
||||||
|
|
||||||
// the SAMRecord data we're caching
|
// the SAMRecord data we're caching
|
||||||
private String mReadString = null;
|
private String mReadString = null;
|
||||||
|
|
@ -321,6 +324,36 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines the original alignment start of a previously clipped read.
|
||||||
|
*
|
||||||
|
* This is useful for reads that have been trimmed to a variant region and lost the information of it's original alignment end
|
||||||
|
*
|
||||||
|
* @return the alignment start of a read before it was clipped
|
||||||
|
*/
|
||||||
|
public int getOriginalAlignmentStart() {
|
||||||
|
int originalAlignmentStart = getUnclippedStart();
|
||||||
|
Integer alignmentShift = (Integer) getAttribute(REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT);
|
||||||
|
if (alignmentShift != null)
|
||||||
|
originalAlignmentStart += alignmentShift;
|
||||||
|
return originalAlignmentStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines the original alignment end of a previously clipped read.
|
||||||
|
*
|
||||||
|
* This is useful for reads that have been trimmed to a variant region and lost the information of it's original alignment end
|
||||||
|
*
|
||||||
|
* @return the alignment end of a read before it was clipped
|
||||||
|
*/
|
||||||
|
public int getOriginalAlignmentEnd() {
|
||||||
|
int originalAlignmentEnd = getUnclippedEnd();
|
||||||
|
Integer alignmentShift = (Integer) getAttribute(REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT);
|
||||||
|
if (alignmentShift != null)
|
||||||
|
originalAlignmentEnd -= alignmentShift;
|
||||||
|
return originalAlignmentEnd;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates an empty GATKSAMRecord with the read's header, read group and mate
|
* Creates an empty GATKSAMRecord with the read's header, read group and mate
|
||||||
* information, but empty (not-null) fields:
|
* information, but empty (not-null) fields:
|
||||||
|
|
@ -363,4 +396,21 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
return emptyRead;
|
return emptyRead;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shallow copy of everything, except for the attribute list and the temporary attributes.
|
||||||
|
* A new list of the attributes is created for both, but the attributes themselves are copied by reference.
|
||||||
|
* This should be safe because callers should never modify a mutable value returned by any of the get() methods anyway.
|
||||||
|
*
|
||||||
|
* @return a shallow copy of the GATKSAMRecord
|
||||||
|
* @throws CloneNotSupportedException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Object clone() throws CloneNotSupportedException {
|
||||||
|
final GATKSAMRecord clone = (GATKSAMRecord) super.clone();
|
||||||
|
if (temporaryAttributes != null) {
|
||||||
|
for (Object attribute : temporaryAttributes.keySet())
|
||||||
|
clone.setTemporaryAttribute(attribute, temporaryAttributes.get(attribute));
|
||||||
|
}
|
||||||
|
return clone;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,83 @@
|
||||||
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.BeforeClass;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
|
||||||
|
public class GATKSAMRecordUnitTest extends BaseTest {
|
||||||
|
GATKSAMRecord read, reducedRead;
|
||||||
|
final static String BASES = "ACTG";
|
||||||
|
final static String QUALS = "!+5?";
|
||||||
|
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40, 1};
|
||||||
|
final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30, -9}; // just the offsets
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public void init() {
|
||||||
|
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||||
|
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
|
||||||
|
read.setReadUnmappedFlag(true);
|
||||||
|
read.setReadBases(new String(BASES).getBytes());
|
||||||
|
read.setBaseQualityString(new String(QUALS));
|
||||||
|
|
||||||
|
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
|
||||||
|
reducedRead.setReadBases(BASES.getBytes());
|
||||||
|
reducedRead.setBaseQualityString(QUALS);
|
||||||
|
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReducedReads() {
|
||||||
|
Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read");
|
||||||
|
Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read");
|
||||||
|
|
||||||
|
Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read");
|
||||||
|
for (int i = 0; i < reducedRead.getReadLength(); i++) {
|
||||||
|
Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReducedReadPileupElement() {
|
||||||
|
PileupElement readp = new PileupElement(read, 0, false, false, false);
|
||||||
|
PileupElement reducedreadp = new PileupElement(reducedRead, 0, false, false, false);
|
||||||
|
|
||||||
|
Assert.assertFalse(readp.getRead().isReducedRead());
|
||||||
|
|
||||||
|
Assert.assertTrue(reducedreadp.getRead().isReducedRead());
|
||||||
|
Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]);
|
||||||
|
Assert.assertEquals(reducedreadp.getQual(), readp.getQual());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetOriginalAlignments() {
|
||||||
|
final byte [] bases = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'};
|
||||||
|
final byte [] quals = {20 , 20 , 20 , 20 , 20 , 20 , 20 , 20 };
|
||||||
|
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "6M");
|
||||||
|
|
||||||
|
// A regular read with all matches
|
||||||
|
Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart());
|
||||||
|
Assert.assertEquals(read.getAlignmentEnd(), read.getOriginalAlignmentEnd());
|
||||||
|
|
||||||
|
// Alignment start shifted
|
||||||
|
int alignmentShift = 2;
|
||||||
|
read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, alignmentShift);
|
||||||
|
Assert.assertEquals(read.getAlignmentStart() + alignmentShift, read.getOriginalAlignmentStart());
|
||||||
|
Assert.assertEquals(read.getAlignmentEnd(), read.getOriginalAlignmentEnd());
|
||||||
|
|
||||||
|
// Both alignments shifted
|
||||||
|
read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT, alignmentShift);
|
||||||
|
Assert.assertEquals(read.getAlignmentStart() + alignmentShift, read.getOriginalAlignmentStart());
|
||||||
|
Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd());
|
||||||
|
|
||||||
|
// Alignment end shifted
|
||||||
|
read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, null);
|
||||||
|
Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart());
|
||||||
|
Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -1,57 +1,11 @@
|
||||||
package org.broadinstitute.sting.utils.sam;
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.BeforeTest;
|
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
|
||||||
public class ReadUtilsUnitTest extends BaseTest {
|
public class ReadUtilsUnitTest extends BaseTest {
|
||||||
GATKSAMRecord read, reducedRead;
|
|
||||||
final static String BASES = "ACTG";
|
|
||||||
final static String QUALS = "!+5?";
|
|
||||||
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40, 1};
|
|
||||||
final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30, -9}; // just the offsets
|
|
||||||
|
|
||||||
@BeforeTest
|
|
||||||
public void init() {
|
|
||||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
|
||||||
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
|
|
||||||
read.setReadUnmappedFlag(true);
|
|
||||||
read.setReadBases(new String(BASES).getBytes());
|
|
||||||
read.setBaseQualityString(new String(QUALS));
|
|
||||||
|
|
||||||
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
|
|
||||||
reducedRead.setReadBases(BASES.getBytes());
|
|
||||||
reducedRead.setBaseQualityString(QUALS);
|
|
||||||
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testReducedReads() {
|
|
||||||
Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read");
|
|
||||||
Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read");
|
|
||||||
|
|
||||||
Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read");
|
|
||||||
for (int i = 0; i < reducedRead.getReadLength(); i++) {
|
|
||||||
Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testReducedReadPileupElement() {
|
|
||||||
PileupElement readp = new PileupElement(read, 0, false, false, false);
|
|
||||||
PileupElement reducedreadp = new PileupElement(reducedRead, 0, false, false, false);
|
|
||||||
|
|
||||||
Assert.assertFalse(readp.getRead().isReducedRead());
|
|
||||||
|
|
||||||
Assert.assertTrue(reducedreadp.getRead().isReducedRead());
|
|
||||||
Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]);
|
|
||||||
Assert.assertEquals(reducedreadp.getQual(), readp.getQual());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetAdaptorBoundary() {
|
public void testGetAdaptorBoundary() {
|
||||||
final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T'};
|
final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T'};
|
||||||
|
|
|
||||||
|
|
@ -25,9 +25,6 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
||||||
@Argument(shortName="noIndels", doc="do not call indels with the Unified Genotyper", required=false)
|
@Argument(shortName="noIndels", doc="do not call indels with the Unified Genotyper", required=false)
|
||||||
var noIndels: Boolean = false
|
var noIndels: Boolean = false
|
||||||
|
|
||||||
@Argument(shortName="LOCAL_ET", doc="Doesn't use the AWS S3 storage for ET option", required=false)
|
|
||||||
var LOCAL_ET: Boolean = false
|
|
||||||
|
|
||||||
@Argument(shortName="mbq", doc="The minimum Phred-Scaled quality score threshold to be considered a good base.", required=false)
|
@Argument(shortName="mbq", doc="The minimum Phred-Scaled quality score threshold to be considered a good base.", required=false)
|
||||||
var minimumBaseQuality: Int = -1
|
var minimumBaseQuality: Int = -1
|
||||||
|
|
||||||
|
|
@ -203,7 +200,7 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
||||||
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
|
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
|
||||||
logging_level = "INFO";
|
logging_level = "INFO";
|
||||||
memoryLimit = 4;
|
memoryLimit = 4;
|
||||||
phone_home = if ( LOCAL_ET ) GATKRunReport.PhoneHomeOption.STANDARD else GATKRunReport.PhoneHomeOption.AWS_S3
|
phone_home = GATKRunReport.PhoneHomeOption.NO_ET
|
||||||
}
|
}
|
||||||
|
|
||||||
def bai(bam: File) = new File(bam + ".bai")
|
def bai(bam: File) = new File(bam + ".bai")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue