Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2012-01-27 15:12:55 -05:00
commit f7ac1f4a69
7 changed files with 164 additions and 58 deletions

View File

@ -41,7 +41,7 @@
<dependency org="log4j" name="log4j" rev="1.2.15"/>
<dependency org="javax.mail" name="mail" rev="1.4.4"/>
<dependency org="colt" name="colt" rev="1.2.0"/>
<dependency org="jboss" name="javassist" rev="3.7.ga"/>
<!-- <dependency org="jboss" name="javassist" rev="3.7.ga"/> -->
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
<dependency org="org.apache.bcel" name="bcel" rev="5.2"/>
@ -66,7 +66,7 @@
<dependency org="org.apache.commons" name="commons-math" rev="2.2" />
<!-- Lucene core utilities -->
<dependency org="org.apache.lucene" name="lucene-core" rev="3.0.3"/>
<!-- <dependency org="org.apache.lucene" name="lucene-core" rev="3.0.3"/> -->
<!-- Dependencies for LSF, DRMAA, and other C libraries -->
<dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>

View File

@ -83,7 +83,7 @@ public class QCRefWalker extends RefWalker<Integer, Integer> {
}
private final void throwError(ReferenceContext ref, String message) {
throw new StingException(String.format("Site %s failed: %s", ref, message));
throw new StingException(String.format("Site %s failed: %s", ref.getLocus(), message));
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
@ -92,13 +92,13 @@ public class QCRefWalker extends RefWalker<Integer, Integer> {
contigName = locusContigName;
ReferenceSequence refSeq = uncachedRef.getSequence(contigName);
contigStart = 1;
contigEnd = contigStart + refSeq.length();
contigEnd = contigStart + refSeq.length() - 1;
uncachedBases = uncachedRef.getSubsequenceAt(contigName, contigStart, contigEnd).getBases();
logger.warn(String.format("Loading contig %s (%d-%d)", contigName, contigStart, contigEnd));
logger.info(String.format("Loading contig %s (%d-%d)", contigName, contigStart, contigEnd));
}
final byte refBase = ref.getBase();
if (! ( BaseUtils.isRegularBase(refBase) || BaseUtils.isNBase(refBase) ) )
if (! ( BaseUtils.isRegularBase(refBase) || isExtendFastaBase(refBase) ) )
throwError(ref, String.format("Refbase isn't a regular base (%d %c)", refBase, (char)refBase));
// check bases are equal
@ -114,6 +114,28 @@ public class QCRefWalker extends RefWalker<Integer, Integer> {
return 1;
}
private static final boolean isExtendFastaBase(final byte b) {
switch ( b ) {
case 'U':
case 'R':
case 'Y':
case 'K':
case 'M':
case 'S':
case 'W':
case 'B':
case 'D':
case 'H':
case 'V':
case 'N':
case 'X':
case '-':
return true;
default:
return false;
}
}
public Integer reduceInit() {
return 0;
}

View File

@ -159,7 +159,7 @@ public class CycleCovariate implements StandardCovariate {
}
}
else {
throw new IllegalStateException("This method hasn't been implemented yet for " + read.getReadGroup().getPlatform());
throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid");
}
}

View File

@ -43,7 +43,10 @@ import java.util.Map;
*
*/
public class GATKSAMRecord extends BAMRecord {
public static final String REDUCED_READ_CONSENSUS_TAG = "RR";
// ReduceReads specific attribute tags
public static final String REDUCED_READ_CONSENSUS_TAG = "RR"; // marks a synthetic read produced by the ReduceReads tool
public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT = "OP"; // reads that are clipped may use this attribute to keep track of their original alignment start
public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT = "OE"; // reads that are clipped may use this attribute to keep track of their original alignment end
// the SAMRecord data we're caching
private String mReadString = null;
@ -321,6 +324,36 @@ public class GATKSAMRecord extends BAMRecord {
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
}
/**
* Determines the original alignment start of a previously clipped read.
*
* This is useful for reads that have been trimmed to a variant region and lost the information of it's original alignment end
*
* @return the alignment start of a read before it was clipped
*/
public int getOriginalAlignmentStart() {
int originalAlignmentStart = getUnclippedStart();
Integer alignmentShift = (Integer) getAttribute(REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT);
if (alignmentShift != null)
originalAlignmentStart += alignmentShift;
return originalAlignmentStart;
}
/**
* Determines the original alignment end of a previously clipped read.
*
* This is useful for reads that have been trimmed to a variant region and lost the information of it's original alignment end
*
* @return the alignment end of a read before it was clipped
*/
public int getOriginalAlignmentEnd() {
int originalAlignmentEnd = getUnclippedEnd();
Integer alignmentShift = (Integer) getAttribute(REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT);
if (alignmentShift != null)
originalAlignmentEnd -= alignmentShift;
return originalAlignmentEnd;
}
/**
* Creates an empty GATKSAMRecord with the read's header, read group and mate
* information, but empty (not-null) fields:
@ -363,4 +396,21 @@ public class GATKSAMRecord extends BAMRecord {
return emptyRead;
}
/**
* Shallow copy of everything, except for the attribute list and the temporary attributes.
* A new list of the attributes is created for both, but the attributes themselves are copied by reference.
* This should be safe because callers should never modify a mutable value returned by any of the get() methods anyway.
*
* @return a shallow copy of the GATKSAMRecord
* @throws CloneNotSupportedException
*/
@Override
public Object clone() throws CloneNotSupportedException {
final GATKSAMRecord clone = (GATKSAMRecord) super.clone();
if (temporaryAttributes != null) {
for (Object attribute : temporaryAttributes.keySet())
clone.setTemporaryAttribute(attribute, temporaryAttributes.get(attribute));
}
return clone;
}
}

View File

@ -0,0 +1,83 @@
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
public class GATKSAMRecordUnitTest extends BaseTest {
GATKSAMRecord read, reducedRead;
final static String BASES = "ACTG";
final static String QUALS = "!+5?";
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40, 1};
final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30, -9}; // just the offsets
@BeforeClass
public void init() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
read.setReadUnmappedFlag(true);
read.setReadBases(new String(BASES).getBytes());
read.setBaseQualityString(new String(QUALS));
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
reducedRead.setReadBases(BASES.getBytes());
reducedRead.setBaseQualityString(QUALS);
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
}
@Test
public void testReducedReads() {
Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read");
Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read");
Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read");
for (int i = 0; i < reducedRead.getReadLength(); i++) {
Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i);
}
}
@Test
public void testReducedReadPileupElement() {
PileupElement readp = new PileupElement(read, 0, false, false, false);
PileupElement reducedreadp = new PileupElement(reducedRead, 0, false, false, false);
Assert.assertFalse(readp.getRead().isReducedRead());
Assert.assertTrue(reducedreadp.getRead().isReducedRead());
Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]);
Assert.assertEquals(reducedreadp.getQual(), readp.getQual());
}
@Test
public void testGetOriginalAlignments() {
final byte [] bases = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'};
final byte [] quals = {20 , 20 , 20 , 20 , 20 , 20 , 20 , 20 };
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "6M");
// A regular read with all matches
Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart());
Assert.assertEquals(read.getAlignmentEnd(), read.getOriginalAlignmentEnd());
// Alignment start shifted
int alignmentShift = 2;
read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, alignmentShift);
Assert.assertEquals(read.getAlignmentStart() + alignmentShift, read.getOriginalAlignmentStart());
Assert.assertEquals(read.getAlignmentEnd(), read.getOriginalAlignmentEnd());
// Both alignments shifted
read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT, alignmentShift);
Assert.assertEquals(read.getAlignmentStart() + alignmentShift, read.getOriginalAlignmentStart());
Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd());
// Alignment end shifted
read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, null);
Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart());
Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd());
}
}

View File

@ -1,57 +1,11 @@
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
public class ReadUtilsUnitTest extends BaseTest {
GATKSAMRecord read, reducedRead;
final static String BASES = "ACTG";
final static String QUALS = "!+5?";
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40, 1};
final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30, -9}; // just the offsets
@BeforeTest
public void init() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
read.setReadUnmappedFlag(true);
read.setReadBases(new String(BASES).getBytes());
read.setBaseQualityString(new String(QUALS));
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
reducedRead.setReadBases(BASES.getBytes());
reducedRead.setBaseQualityString(QUALS);
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
}
@Test
public void testReducedReads() {
Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read");
Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read");
Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read");
for (int i = 0; i < reducedRead.getReadLength(); i++) {
Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i);
}
}
@Test
public void testReducedReadPileupElement() {
PileupElement readp = new PileupElement(read, 0, false, false, false);
PileupElement reducedreadp = new PileupElement(reducedRead, 0, false, false, false);
Assert.assertFalse(readp.getRead().isReducedRead());
Assert.assertTrue(reducedreadp.getRead().isReducedRead());
Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]);
Assert.assertEquals(reducedreadp.getQual(), readp.getQual());
}
@Test
public void testGetAdaptorBoundary() {
final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T'};

View File

@ -25,9 +25,6 @@ class MethodsDevelopmentCallingPipeline extends QScript {
@Argument(shortName="noIndels", doc="do not call indels with the Unified Genotyper", required=false)
var noIndels: Boolean = false
@Argument(shortName="LOCAL_ET", doc="Doesn't use the AWS S3 storage for ET option", required=false)
var LOCAL_ET: Boolean = false
@Argument(shortName="mbq", doc="The minimum Phred-Scaled quality score threshold to be considered a good base.", required=false)
var minimumBaseQuality: Int = -1
@ -203,7 +200,7 @@ class MethodsDevelopmentCallingPipeline extends QScript {
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
logging_level = "INFO";
memoryLimit = 4;
phone_home = if ( LOCAL_ET ) GATKRunReport.PhoneHomeOption.STANDARD else GATKRunReport.PhoneHomeOption.AWS_S3
phone_home = GATKRunReport.PhoneHomeOption.NO_ET
}
def bai(bam: File) = new File(bam + ".bai")