diff --git a/ivy.xml b/ivy.xml index f5ff15c30..f7c64aec6 100644 --- a/ivy.xml +++ b/ivy.xml @@ -41,7 +41,7 @@ - + @@ -66,7 +66,7 @@ - + diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRefWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRefWalker.java index bddf27d84..ab5324e39 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRefWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRefWalker.java @@ -83,7 +83,7 @@ public class QCRefWalker extends RefWalker { } private final void throwError(ReferenceContext ref, String message) { - throw new StingException(String.format("Site %s failed: %s", ref, message)); + throw new StingException(String.format("Site %s failed: %s", ref.getLocus(), message)); } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { @@ -92,13 +92,13 @@ public class QCRefWalker extends RefWalker { contigName = locusContigName; ReferenceSequence refSeq = uncachedRef.getSequence(contigName); contigStart = 1; - contigEnd = contigStart + refSeq.length(); + contigEnd = contigStart + refSeq.length() - 1; uncachedBases = uncachedRef.getSubsequenceAt(contigName, contigStart, contigEnd).getBases(); - logger.warn(String.format("Loading contig %s (%d-%d)", contigName, contigStart, contigEnd)); + logger.info(String.format("Loading contig %s (%d-%d)", contigName, contigStart, contigEnd)); } final byte refBase = ref.getBase(); - if (! ( BaseUtils.isRegularBase(refBase) || BaseUtils.isNBase(refBase) ) ) + if (! ( BaseUtils.isRegularBase(refBase) || isExtendFastaBase(refBase) ) ) throwError(ref, String.format("Refbase isn't a regular base (%d %c)", refBase, (char)refBase)); // check bases are equal @@ -114,6 +114,28 @@ public class QCRefWalker extends RefWalker { return 1; } + private static final boolean isExtendFastaBase(final byte b) { + switch ( b ) { + case 'U': + case 'R': + case 'Y': + case 'K': + case 'M': + case 'S': + case 'W': + case 'B': + case 'D': + case 'H': + case 'V': + case 'N': + case 'X': + case '-': + return true; + default: + return false; + } + } + public Integer reduceInit() { return 0; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index 6b4fec04e..b0819ee69 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -159,7 +159,7 @@ public class CycleCovariate implements StandardCovariate { } } else { - throw new IllegalStateException("This method hasn't been implemented yet for " + read.getReadGroup().getPlatform()); + throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid"); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 913548ecc..03b794ae3 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -43,7 +43,10 @@ import java.util.Map; * */ public class GATKSAMRecord extends BAMRecord { - public static final String REDUCED_READ_CONSENSUS_TAG = "RR"; + // ReduceReads specific attribute tags + public static final String REDUCED_READ_CONSENSUS_TAG = "RR"; // marks a synthetic read produced by the ReduceReads tool + public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT = "OP"; // reads that are clipped may use this attribute to keep track of their original alignment start + public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT = "OE"; // reads that are clipped may use this attribute to keep track of their original alignment end // the SAMRecord data we're caching private String mReadString = null; @@ -321,6 +324,36 @@ public class GATKSAMRecord extends BAMRecord { return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ; } + /** + * Determines the original alignment start of a previously clipped read. + * + * This is useful for reads that have been trimmed to a variant region and lost the information of it's original alignment end + * + * @return the alignment start of a read before it was clipped + */ + public int getOriginalAlignmentStart() { + int originalAlignmentStart = getUnclippedStart(); + Integer alignmentShift = (Integer) getAttribute(REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT); + if (alignmentShift != null) + originalAlignmentStart += alignmentShift; + return originalAlignmentStart; + } + + /** + * Determines the original alignment end of a previously clipped read. + * + * This is useful for reads that have been trimmed to a variant region and lost the information of it's original alignment end + * + * @return the alignment end of a read before it was clipped + */ + public int getOriginalAlignmentEnd() { + int originalAlignmentEnd = getUnclippedEnd(); + Integer alignmentShift = (Integer) getAttribute(REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT); + if (alignmentShift != null) + originalAlignmentEnd -= alignmentShift; + return originalAlignmentEnd; + } + /** * Creates an empty GATKSAMRecord with the read's header, read group and mate * information, but empty (not-null) fields: @@ -363,4 +396,21 @@ public class GATKSAMRecord extends BAMRecord { return emptyRead; } + /** + * Shallow copy of everything, except for the attribute list and the temporary attributes. + * A new list of the attributes is created for both, but the attributes themselves are copied by reference. + * This should be safe because callers should never modify a mutable value returned by any of the get() methods anyway. + * + * @return a shallow copy of the GATKSAMRecord + * @throws CloneNotSupportedException + */ + @Override + public Object clone() throws CloneNotSupportedException { + final GATKSAMRecord clone = (GATKSAMRecord) super.clone(); + if (temporaryAttributes != null) { + for (Object attribute : temporaryAttributes.keySet()) + clone.setTemporaryAttribute(attribute, temporaryAttributes.get(attribute)); + } + return clone; + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java new file mode 100755 index 000000000..729503f84 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java @@ -0,0 +1,83 @@ +package org.broadinstitute.sting.utils.sam; + +import net.sf.samtools.SAMFileHeader; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + + +public class GATKSAMRecordUnitTest extends BaseTest { + GATKSAMRecord read, reducedRead; + final static String BASES = "ACTG"; + final static String QUALS = "!+5?"; + final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40, 1}; + final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30, -9}; // just the offsets + + @BeforeClass + public void init() { + SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); + read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length()); + read.setReadUnmappedFlag(true); + read.setReadBases(new String(BASES).getBytes()); + read.setBaseQualityString(new String(QUALS)); + + reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length()); + reducedRead.setReadBases(BASES.getBytes()); + reducedRead.setBaseQualityString(QUALS); + reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG); + } + + @Test + public void testReducedReads() { + Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read"); + Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read"); + + Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read"); + for (int i = 0; i < reducedRead.getReadLength(); i++) { + Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i); + } + } + + @Test + public void testReducedReadPileupElement() { + PileupElement readp = new PileupElement(read, 0, false, false, false); + PileupElement reducedreadp = new PileupElement(reducedRead, 0, false, false, false); + + Assert.assertFalse(readp.getRead().isReducedRead()); + + Assert.assertTrue(reducedreadp.getRead().isReducedRead()); + Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]); + Assert.assertEquals(reducedreadp.getQual(), readp.getQual()); + } + + @Test + public void testGetOriginalAlignments() { + final byte [] bases = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'}; + final byte [] quals = {20 , 20 , 20 , 20 , 20 , 20 , 20 , 20 }; + GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "6M"); + + // A regular read with all matches + Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart()); + Assert.assertEquals(read.getAlignmentEnd(), read.getOriginalAlignmentEnd()); + + // Alignment start shifted + int alignmentShift = 2; + read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, alignmentShift); + Assert.assertEquals(read.getAlignmentStart() + alignmentShift, read.getOriginalAlignmentStart()); + Assert.assertEquals(read.getAlignmentEnd(), read.getOriginalAlignmentEnd()); + + // Both alignments shifted + read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT, alignmentShift); + Assert.assertEquals(read.getAlignmentStart() + alignmentShift, read.getOriginalAlignmentStart()); + Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd()); + + // Alignment end shifted + read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, null); + Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart()); + Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd()); + + } + +} diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java index b7a22ca1a..7598f62a6 100755 --- a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java @@ -1,57 +1,11 @@ package org.broadinstitute.sting.utils.sam; -import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.pileup.PileupElement; import org.testng.Assert; -import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; public class ReadUtilsUnitTest extends BaseTest { - GATKSAMRecord read, reducedRead; - final static String BASES = "ACTG"; - final static String QUALS = "!+5?"; - final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40, 1}; - final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30, -9}; // just the offsets - - @BeforeTest - public void init() { - SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); - read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length()); - read.setReadUnmappedFlag(true); - read.setReadBases(new String(BASES).getBytes()); - read.setBaseQualityString(new String(QUALS)); - - reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length()); - reducedRead.setReadBases(BASES.getBytes()); - reducedRead.setBaseQualityString(QUALS); - reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG); - } - - @Test - public void testReducedReads() { - Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read"); - Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read"); - - Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read"); - for (int i = 0; i < reducedRead.getReadLength(); i++) { - Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i); - } - } - - @Test - public void testReducedReadPileupElement() { - PileupElement readp = new PileupElement(read, 0, false, false, false); - PileupElement reducedreadp = new PileupElement(reducedRead, 0, false, false, false); - - Assert.assertFalse(readp.getRead().isReducedRead()); - - Assert.assertTrue(reducedreadp.getRead().isReducedRead()); - Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]); - Assert.assertEquals(reducedreadp.getQual(), readp.getQual()); - } - @Test public void testGetAdaptorBoundary() { final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T'}; diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala index 2f0715ae9..b860358ca 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala @@ -25,9 +25,6 @@ class MethodsDevelopmentCallingPipeline extends QScript { @Argument(shortName="noIndels", doc="do not call indels with the Unified Genotyper", required=false) var noIndels: Boolean = false - @Argument(shortName="LOCAL_ET", doc="Doesn't use the AWS S3 storage for ET option", required=false) - var LOCAL_ET: Boolean = false - @Argument(shortName="mbq", doc="The minimum Phred-Scaled quality score threshold to be considered a good base.", required=false) var minimumBaseQuality: Int = -1 @@ -203,7 +200,7 @@ class MethodsDevelopmentCallingPipeline extends QScript { trait UNIVERSAL_GATK_ARGS extends CommandLineGATK { logging_level = "INFO"; memoryLimit = 4; - phone_home = if ( LOCAL_ET ) GATKRunReport.PhoneHomeOption.STANDARD else GATKRunReport.PhoneHomeOption.AWS_S3 + phone_home = GATKRunReport.PhoneHomeOption.NO_ET } def bai(bam: File) = new File(bam + ".bai")