From 07f72516ae299a61014b3a1fe8a1ba6c34140687 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 26 Jan 2012 16:14:25 -0500 Subject: [PATCH 01/10] Unsupported platform should be a user error --- .../sting/gatk/walkers/recalibration/CycleCovariate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index 6b4fec04e..b0819ee69 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -159,7 +159,7 @@ public class CycleCovariate implements StandardCovariate { } } else { - throw new IllegalStateException("This method hasn't been implemented yet for " + read.getReadGroup().getPlatform()); + throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid"); } } From 0d4027104f2d511aacf8bc01dea00d0c5f0fb2ec Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 26 Jan 2012 16:07:29 -0500 Subject: [PATCH 02/10] Reduced reads are now aware of their original alignments * Added annotations for reads that had been soft clipped prior to being reduced so that we can later recuperate their original alignments (start and end). * Tags keep the alignment shifts, not real alignment, for better compression * Tags are defined in the GATKSAMRecord * GATKSAMRecord has new functionality to retrieve original alignment start of all reads (trimmed or not) -- getOriginalAlignmentStart() and getOriginalAligmentEnd() * Updated ReduceReads MD5s accordingly --- .../sting/utils/sam/GATKSAMRecord.java | 52 ++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 913548ecc..f17772f40 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -43,7 +43,10 @@ import java.util.Map; * */ public class GATKSAMRecord extends BAMRecord { - public static final String REDUCED_READ_CONSENSUS_TAG = "RR"; + // ReduceReads specific attribute tags + public static final String REDUCED_READ_CONSENSUS_TAG = "RR"; // marks a synthetic read produced by the ReduceReads tool + public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT = "OS"; // reads that are clipped may use this attribute to keep track of their original alignment start + public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT = "OE"; // reads that are clipped may use this attribute to keep track of their original alignment end // the SAMRecord data we're caching private String mReadString = null; @@ -321,6 +324,36 @@ public class GATKSAMRecord extends BAMRecord { return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ; } + /** + * Determines the original alignment start of a previously clipped read. + * + * This is useful for reads that have been trimmed to a variant region and lost the information of it's original alignment end + * + * @return the alignment start of a read before it was clipped + */ + public int getOriginalAlignmentStart() { + int originalAlignmentStart = getUnclippedStart(); + Integer alignmentShift = (Integer) getAttribute(REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT); + if (alignmentShift != null) + originalAlignmentStart += alignmentShift; + return originalAlignmentStart; + } + + /** + * Determines the original alignment end of a previously clipped read. + * + * This is useful for reads that have been trimmed to a variant region and lost the information of it's original alignment end + * + * @return the alignment end of a read before it was clipped + */ + public int getOriginalAlignmentEnd() { + int originalAlignmentEnd = getUnclippedEnd(); + Integer alignmentShift = (Integer) getAttribute(REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT); + if (alignmentShift != null) + originalAlignmentEnd -= alignmentShift; + return originalAlignmentEnd; + } + /** * Creates an empty GATKSAMRecord with the read's header, read group and mate * information, but empty (not-null) fields: @@ -363,4 +396,21 @@ public class GATKSAMRecord extends BAMRecord { return emptyRead; } + /** + * Shallow copy of everything, except for the attribute list and the temporary attributes. + * A new list of the attributes is created for both, but the attributes themselves are copied by reference. + * This should be safe because callers should never modify a mutable value returned by any of the get() methods anyway. + * + * @return a shallow copy of the GATKSAMRecord + * @throws CloneNotSupportedException + */ + @Override + public Object clone() throws CloneNotSupportedException { + final GATKSAMRecord clone = (GATKSAMRecord) super.clone(); + if (temporaryAttributes != null) { + for (Object attribute : temporaryAttributes.keySet()) + clone.setTemporaryAttribute(attribute, temporaryAttributes.get(attribute)); + } + return clone; + } } From 246e085ec9773320843f50da47066cd8313f659c Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 26 Jan 2012 16:59:08 -0500 Subject: [PATCH 03/10] Unit tests for GATKSAMRecord class * new unit tests for the alignment shift properties of reduce reads * moved unit tests from ReadUtils that were actually testing GATKSAMRecord, not any of the ReadUtils to it. * cleaned up ReadUtilsUnitTest --- .../utils/sam/GATKSAMRecordUnitTest.java | 83 +++++++++++++++++++ .../sting/utils/sam/ReadUtilsUnitTest.java | 46 ---------- 2 files changed, 83 insertions(+), 46 deletions(-) create mode 100755 public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java new file mode 100755 index 000000000..317b320d3 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java @@ -0,0 +1,83 @@ +package org.broadinstitute.sting.utils.sam; + +import net.sf.samtools.SAMFileHeader; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + + +public class GATKSAMRecordUnitTest extends BaseTest { + GATKSAMRecord read, reducedRead; + final static String BASES = "ACTG"; + final static String QUALS = "!+5?"; + final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40, 1}; + final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30, -9}; // just the offsets + + @BeforeClass + public void init() { + SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); + read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length()); + read.setReadUnmappedFlag(true); + read.setReadBases(new String(BASES).getBytes()); + read.setBaseQualityString(new String(QUALS)); + + reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length()); + reducedRead.setReadBases(BASES.getBytes()); + reducedRead.setBaseQualityString(QUALS); + reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG); + } + + @Test + public void testReducedReads() { + Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read"); + Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read"); + + Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read"); + for (int i = 0; i < reducedRead.getReadLength(); i++) { + Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i); + } + } + + @Test + public void testReducedReadPileupElement() { + PileupElement readp = new PileupElement(read, 0, false); + PileupElement reducedreadp = new PileupElement(reducedRead, 0, false); + + Assert.assertFalse(readp.getRead().isReducedRead()); + + Assert.assertTrue(reducedreadp.getRead().isReducedRead()); + Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]); + Assert.assertEquals(reducedreadp.getQual(), readp.getQual()); + } + + @Test + public void testGetOriginalAlignments() { + final byte [] bases = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'}; + final byte [] quals = {20 , 20 , 20 , 20 , 20 , 20 , 20 , 20 }; + GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "6M"); + + // A regular read with all matches + Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart()); + Assert.assertEquals(read.getAlignmentEnd(), read.getOriginalAlignmentEnd()); + + // Alignment start shifted + int alignmentShift = 2; + read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, alignmentShift); + Assert.assertEquals(read.getAlignmentStart() + alignmentShift, read.getOriginalAlignmentStart()); + Assert.assertEquals(read.getAlignmentEnd(), read.getOriginalAlignmentEnd()); + + // Both alignments shifted + read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT, alignmentShift); + Assert.assertEquals(read.getAlignmentStart() + alignmentShift, read.getOriginalAlignmentStart()); + Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd()); + + // Alignment end shifted + read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, null); + Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart()); + Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd()); + + } + +} diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java index b7a22ca1a..7598f62a6 100755 --- a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java @@ -1,57 +1,11 @@ package org.broadinstitute.sting.utils.sam; -import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.pileup.PileupElement; import org.testng.Assert; -import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; public class ReadUtilsUnitTest extends BaseTest { - GATKSAMRecord read, reducedRead; - final static String BASES = "ACTG"; - final static String QUALS = "!+5?"; - final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40, 1}; - final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30, -9}; // just the offsets - - @BeforeTest - public void init() { - SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); - read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length()); - read.setReadUnmappedFlag(true); - read.setReadBases(new String(BASES).getBytes()); - read.setBaseQualityString(new String(QUALS)); - - reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length()); - reducedRead.setReadBases(BASES.getBytes()); - reducedRead.setBaseQualityString(QUALS); - reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG); - } - - @Test - public void testReducedReads() { - Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read"); - Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read"); - - Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read"); - for (int i = 0; i < reducedRead.getReadLength(); i++) { - Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i); - } - } - - @Test - public void testReducedReadPileupElement() { - PileupElement readp = new PileupElement(read, 0, false, false, false); - PileupElement reducedreadp = new PileupElement(reducedRead, 0, false, false, false); - - Assert.assertFalse(readp.getRead().isReducedRead()); - - Assert.assertTrue(reducedreadp.getRead().isReducedRead()); - Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]); - Assert.assertEquals(reducedreadp.getQual(), readp.getQual()); - } - @Test public void testGetAdaptorBoundary() { final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T'}; From 2a565ebf90ea53e0a252fe097c778365aff3714a Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 26 Jan 2012 19:58:42 -0500 Subject: [PATCH 05/10] embarrassing fix-up, thanks Khalid. --- .../broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java index 317b320d3..729503f84 100755 --- a/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java @@ -42,8 +42,8 @@ public class GATKSAMRecordUnitTest extends BaseTest { @Test public void testReducedReadPileupElement() { - PileupElement readp = new PileupElement(read, 0, false); - PileupElement reducedreadp = new PileupElement(reducedRead, 0, false); + PileupElement readp = new PileupElement(read, 0, false, false, false); + PileupElement reducedreadp = new PileupElement(reducedRead, 0, false, false, false); Assert.assertFalse(readp.getRead().isReducedRead()); From cb04c0bf1136b351c703e9f3486527301f086067 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 27 Jan 2012 08:20:45 -0500 Subject: [PATCH 06/10] Removing javaassist 3.7, lucene library dependancies --- ivy.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ivy.xml b/ivy.xml index f5ff15c30..f7c64aec6 100644 --- a/ivy.xml +++ b/ivy.xml @@ -41,7 +41,7 @@ - + @@ -66,7 +66,7 @@ - + From 13d1626f51878d3973d43dd27a9a78e10119878d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 27 Jan 2012 08:24:17 -0500 Subject: [PATCH 07/10] Minor improvements in ref QC walker. Unfortunately this doesn't actually catch Chris's error --- .../sting/gatk/walkers/qc/QCRefWalker.java | 30 ++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRefWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRefWalker.java index bddf27d84..ab5324e39 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRefWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/QCRefWalker.java @@ -83,7 +83,7 @@ public class QCRefWalker extends RefWalker { } private final void throwError(ReferenceContext ref, String message) { - throw new StingException(String.format("Site %s failed: %s", ref, message)); + throw new StingException(String.format("Site %s failed: %s", ref.getLocus(), message)); } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { @@ -92,13 +92,13 @@ public class QCRefWalker extends RefWalker { contigName = locusContigName; ReferenceSequence refSeq = uncachedRef.getSequence(contigName); contigStart = 1; - contigEnd = contigStart + refSeq.length(); + contigEnd = contigStart + refSeq.length() - 1; uncachedBases = uncachedRef.getSubsequenceAt(contigName, contigStart, contigEnd).getBases(); - logger.warn(String.format("Loading contig %s (%d-%d)", contigName, contigStart, contigEnd)); + logger.info(String.format("Loading contig %s (%d-%d)", contigName, contigStart, contigEnd)); } final byte refBase = ref.getBase(); - if (! ( BaseUtils.isRegularBase(refBase) || BaseUtils.isNBase(refBase) ) ) + if (! ( BaseUtils.isRegularBase(refBase) || isExtendFastaBase(refBase) ) ) throwError(ref, String.format("Refbase isn't a regular base (%d %c)", refBase, (char)refBase)); // check bases are equal @@ -114,6 +114,28 @@ public class QCRefWalker extends RefWalker { return 1; } + private static final boolean isExtendFastaBase(final byte b) { + switch ( b ) { + case 'U': + case 'R': + case 'Y': + case 'K': + case 'M': + case 'S': + case 'W': + case 'B': + case 'D': + case 'H': + case 'V': + case 'N': + case 'X': + case '-': + return true; + default: + return false; + } + } + public Integer reduceInit() { return 0; } From ec9920b04f7a95918ed3b76a4e971143ed8084b0 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 27 Jan 2012 08:51:39 -0500 Subject: [PATCH 08/10] Updating the SAM TAG for Original Alignment Start to "OP" per Mark's recommendation to reuse the Indel Realigner tag that made it to the SAM spec. The Alignment end tag is still "OE" as there is no official tag to reuse. --- .../src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index f17772f40..03b794ae3 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -45,7 +45,7 @@ import java.util.Map; public class GATKSAMRecord extends BAMRecord { // ReduceReads specific attribute tags public static final String REDUCED_READ_CONSENSUS_TAG = "RR"; // marks a synthetic read produced by the ReduceReads tool - public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT = "OS"; // reads that are clipped may use this attribute to keep track of their original alignment start + public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT = "OP"; // reads that are clipped may use this attribute to keep track of their original alignment start public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT = "OE"; // reads that are clipped may use this attribute to keep track of their original alignment end // the SAMRecord data we're caching From 052a4bdb9cfd5f182e0a4e779f357ea63ea4449f Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 27 Jan 2012 11:13:30 -0500 Subject: [PATCH 10/10] Turning off PHONE HOME option in the MDCP * MDCP is for internal use and there is no need to report to the Amazon cloud. * Reporting to ASW_S3 is not allowing jobs to finish, this is probably a bug. --- .../queue/qscripts/MethodsDevelopmentCallingPipeline.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala index 2f0715ae9..b860358ca 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala @@ -25,9 +25,6 @@ class MethodsDevelopmentCallingPipeline extends QScript { @Argument(shortName="noIndels", doc="do not call indels with the Unified Genotyper", required=false) var noIndels: Boolean = false - @Argument(shortName="LOCAL_ET", doc="Doesn't use the AWS S3 storage for ET option", required=false) - var LOCAL_ET: Boolean = false - @Argument(shortName="mbq", doc="The minimum Phred-Scaled quality score threshold to be considered a good base.", required=false) var minimumBaseQuality: Int = -1 @@ -203,7 +200,7 @@ class MethodsDevelopmentCallingPipeline extends QScript { trait UNIVERSAL_GATK_ARGS extends CommandLineGATK { logging_level = "INFO"; memoryLimit = 4; - phone_home = if ( LOCAL_ET ) GATKRunReport.PhoneHomeOption.STANDARD else GATKRunReport.PhoneHomeOption.AWS_S3 + phone_home = GATKRunReport.PhoneHomeOption.NO_ET } def bai(bam: File) = new File(bam + ".bai")