From df3e4e1abd41c47ac95a0197b41761255d6fa8c4 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 19 Oct 2011 11:22:35 -0400 Subject: [PATCH 01/34] First working code to use SamRecordFactory to produce objects of our own design in SAMFileReader --- .../gatk/datasources/reads/SAMDataSource.java | 4 + .../sting/utils/sam/GATKSamRecordFactory.java | 74 +++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 public/java/src/org/broadinstitute/sting/utils/sam/GATKSamRecordFactory.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 74d39ecb0..96d8d7e1a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -43,6 +43,7 @@ import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQSamIterator; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory; import java.io.File; import java.lang.reflect.InvocationTargetException; @@ -57,6 +58,8 @@ import java.util.*; * Converts shards to SAM iterators over the specified region */ public class SAMDataSource { + final private static GATKSamRecordFactory factory = new GATKSamRecordFactory(); + /** Backing support for reads. */ protected final ReadProperties readProperties; @@ -756,6 +759,7 @@ public class SAMDataSource { public SAMReaders(Collection readerIDs, SAMFileReader.ValidationStringency validationStringency) { for(SAMReaderID readerID: readerIDs) { SAMFileReader reader = new SAMFileReader(readerID.samFile); + reader.setSAMRecordFactory(factory); reader.enableFileSource(true); reader.enableIndexMemoryMapping(false); if(!enableLowMemorySharding) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSamRecordFactory.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSamRecordFactory.java new file mode 100644 index 000000000..7c98a68e6 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSamRecordFactory.java @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.sam; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMRecordFactory; +import net.sf.samtools.BAMRecord; +import org.broadinstitute.sting.utils.exceptions.UserException; + +/** + * Factory interface which allows plugging in of different classes for generating instances of + * SAMRecord and BAMRecord when reading from SAM/BAM files. + * + * @author Tim Fennell + */ +public class GATKSamRecordFactory implements SAMRecordFactory { + + /** Create a new SAMRecord to be filled in */ + public SAMRecord createSAMRecord(SAMFileHeader header) { + throw new UserException.BadInput("The GATK now longer supports input SAM files"); + } + + /** Create a new BAM Record. */ + public BAMRecord createBAMRecord(final SAMFileHeader header, + final int referenceSequenceIndex, + final int alignmentStart, + final short readNameLength, + final short mappingQuality, + final int indexingBin, + final int cigarLen, + final int flags, + final int readLen, + final int mateReferenceSequenceIndex, + final int mateAlignmentStart, + final int insertSize, + final byte[] variableLengthBlock) { + return new BAMRecord(header, + referenceSequenceIndex, + alignmentStart, + readNameLength, + mappingQuality, + indexingBin, + cigarLen, + flags, + readLen, + mateReferenceSequenceIndex, + mateAlignmentStart, + insertSize, + variableLengthBlock); + } +} From 7928b287fcb76d80bcc1e6889f0474f782d2fe08 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 19 Oct 2011 13:15:27 -0400 Subject: [PATCH 02/34] GATKSamRecord now produced by SAMFileReaders by default -- Removed all of the unnecessary caching operations in GATKSAMRecord -- GATKSAMRecord renamed to GATKSamRecord for consistency --- .../gatk/datasources/reads/SAMDataSource.java | 4 +- .../iterators/ReadFormattingIterator.java | 26 +- ...elGenotypeLikelihoodsCalculationModel.java | 6 +- .../recalibration/CountCovariatesWalker.java | 6 +- .../recalibration/RecalDataManager.java | 8 +- .../TableRecalibrationWalker.java | 4 +- .../sting/utils/sam/GATKSAMRecord.java | 368 +----------------- .../sting/utils/sam/GATKSamRecordFactory.java | 26 +- .../sting/utils/sam/ReadUtils.java | 18 +- 9 files changed, 81 insertions(+), 385 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 96d8d7e1a..8452aadfd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -647,7 +647,9 @@ public class SAMDataSource { BAQ.QualityMode qmode, IndexedFastaSequenceFile refReader, byte defaultBaseQualities) { - wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities); + if ( useOriginalBaseQualities || defaultBaseQualities >= 0 ) + // only wrap if we are replacing the original qualitiies or using a default base quality + wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities); // NOTE: this (and other filtering) should be done before on-the-fly sorting // as there is no reason to sort something that we will end of throwing away diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java index 2f30d12a8..9a89d2086 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /** * An iterator which does post-processing of a read, including potentially wrapping @@ -78,7 +77,30 @@ public class ReadFormattingIterator implements StingSAMIterator { * no next exists. */ public SAMRecord next() { - return new GATKSAMRecord(wrappedIterator.next(), useOriginalBaseQualities, defaultBaseQualities); + SAMRecord rec = wrappedIterator.next(); + + // if we are using default quals, check if we need them, and add if necessary. + // 1. we need if reads are lacking or have incomplete quality scores + // 2. we add if defaultBaseQualities has a positive value + if (defaultBaseQualities >= 0) { + byte reads [] = rec.getReadBases(); + byte quals [] = rec.getBaseQualities(); + if (quals == null || quals.length < reads.length) { + byte new_quals [] = new byte [reads.length]; + for (int i=0; i attributes = record.getAttributes(); - for ( SAMTagAndValue attribute : attributes ) - setAttribute(attribute.tag, attribute.value); - - // if we are using default quals, check if we need them, and add if necessary. - // 1. we need if reads are lacking or have incomplete quality scores - // 2. we add if defaultBaseQualities has a positive value - if (defaultBaseQualities >= 0) { - byte reads [] = record.getReadBases(); - byte quals [] = record.getBaseQualities(); - if (quals == null || quals.length < reads.length) { - byte new_quals [] = new byte [reads.length]; - for (int i=0; i getAttributes() { return mRecord.getAttributes(); } - - public SAMFileHeader getHeader() { return mRecord.getHeader(); } - - public void setHeader(SAMFileHeader samFileHeader) { mRecord.setHeader(samFileHeader); } - - public byte[] getVariableBinaryRepresentation() { return mRecord.getVariableBinaryRepresentation(); } - - public int getAttributesBinarySize() { return mRecord.getAttributesBinarySize(); } - - public String format() { return mRecord.format(); } - - public List getAlignmentBlocks() { return mRecord.getAlignmentBlocks(); } - - public List validateCigar(long l) { return mRecord.validateCigar(l); } - @Override public boolean equals(Object o) { if (this == o) return true; // note -- this forbids a GATKSAMRecord being equal to its underlying SAMRecord - if (!(o instanceof GATKSAMRecord)) return false; + if (!(o instanceof GATKSamRecord)) return false; // note that we do not consider the GATKSAMRecord internal state at all - return mRecord.equals(((GATKSAMRecord)o).mRecord); - } - - public int hashCode() { return mRecord.hashCode(); } - - public List isValid() { return mRecord.isValid(); } - - public Object clone() throws CloneNotSupportedException { return mRecord.clone(); } - - public String toString() { return mRecord.toString(); } - - public SAMFileSource getFileSource() { return mRecord.getFileSource(); } - - /** - * Sets a marker providing the source reader for this file and the position in the file from which the read originated. - * @param fileSource source of the given file. - */ - @Override - protected void setFileSource(final SAMFileSource fileSource) { - try { - Method method = SAMRecord.class.getDeclaredMethod("setFileSource",SAMFileSource.class); - method.setAccessible(true); - method.invoke(mRecord,fileSource); - } - catch(Exception ex) { - throw new ReviewedStingException("Unable to invoke setFileSource method",ex); - } + return super.equals(o); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSamRecordFactory.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSamRecordFactory.java index 7c98a68e6..9076f06ae 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSamRecordFactory.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSamRecordFactory.java @@ -57,18 +57,18 @@ public class GATKSamRecordFactory implements SAMRecordFactory { final int mateAlignmentStart, final int insertSize, final byte[] variableLengthBlock) { - return new BAMRecord(header, - referenceSequenceIndex, - alignmentStart, - readNameLength, - mappingQuality, - indexingBin, - cigarLen, - flags, - readLen, - mateReferenceSequenceIndex, - mateAlignmentStart, - insertSize, - variableLengthBlock); + return new GATKSamRecord(header, + referenceSequenceIndex, + alignmentStart, + readNameLength, + mappingQuality, + indexingBin, + cigarLen, + flags, + readLen, + mateReferenceSequenceIndex, + mateAlignmentStart, + insertSize, + variableLengthBlock); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index d1e9a236f..f91d77244 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -274,10 +274,10 @@ public class ReadUtils { * @param adaptorLength length of adaptor sequence * @return a new read with adaptor sequence hard-clipped out or null if read is fully clipped */ - public static GATKSAMRecord hardClipAdaptorSequence(final SAMRecord rec, int adaptorLength) { + public static GATKSamRecord hardClipAdaptorSequence(final SAMRecord rec, int adaptorLength) { Pair adaptorBoundaries = getAdaptorBoundaries(rec, adaptorLength); - GATKSAMRecord result = (GATKSAMRecord)rec; + GATKSamRecord result = (GATKSamRecord)rec; if ( adaptorBoundaries != null ) { if ( rec.getReadNegativeStrandFlag() && adaptorBoundaries.second >= rec.getAlignmentStart() && adaptorBoundaries.first < rec.getAlignmentEnd() ) @@ -290,7 +290,7 @@ public class ReadUtils { } // return true if the read needs to be completely clipped - private static GATKSAMRecord hardClipStartOfRead(SAMRecord oldRec, int stopPosition) { + private static GATKSamRecord hardClipStartOfRead(SAMRecord oldRec, int stopPosition) { if ( stopPosition >= oldRec.getAlignmentEnd() ) { // BAM representation issue -- we can't clip away all bases in a read, just leave it alone and let the filter deal with it @@ -298,9 +298,9 @@ public class ReadUtils { return null; } - GATKSAMRecord rec; + GATKSamRecord rec; try { - rec = (GATKSAMRecord)oldRec.clone(); + rec = (GATKSamRecord)oldRec.clone(); } catch (Exception e) { return null; } @@ -370,7 +370,7 @@ public class ReadUtils { return rec; } - private static GATKSAMRecord hardClipEndOfRead(SAMRecord oldRec, int startPosition) { + private static GATKSamRecord hardClipEndOfRead(SAMRecord oldRec, int startPosition) { if ( startPosition <= oldRec.getAlignmentStart() ) { // BAM representation issue -- we can't clip away all bases in a read, just leave it alone and let the filter deal with it @@ -378,9 +378,9 @@ public class ReadUtils { return null; } - GATKSAMRecord rec; + GATKSamRecord rec; try { - rec = (GATKSAMRecord)oldRec.clone(); + rec = (GATKSamRecord)oldRec.clone(); } catch (Exception e) { return null; } @@ -598,7 +598,7 @@ public class ReadUtils { * @param rec original SAM record * @return a new read with adaptor sequence hard-clipped out or null if read is fully clipped */ - public static GATKSAMRecord hardClipAdaptorSequence(final SAMRecord rec) { + public static GATKSamRecord hardClipAdaptorSequence(final SAMRecord rec) { return hardClipAdaptorSequence(rec, DEFAULT_ADAPTOR_SIZE); } From 1b38aa1a7e67deeb4f426b16cf7eaa8d6016e938 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 19 Oct 2011 15:46:44 -0400 Subject: [PATCH 03/34] Cleaning up reduced read code accessors --- .../sting/utils/sam/ReadUtils.java | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index f91d77244..9fa8fc544 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -55,6 +55,7 @@ public class ReadUtils { public static final String REDUCED_READ_CONSENSUS_COUNTS_TAG = "CC"; public final static byte[] getReducedReadQualityTagValue(final SAMRecord read) { + // TODO -- warning of performance problem. Should be cached in GATKSamRecord return read.getByteArrayAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG); } @@ -70,21 +71,6 @@ public class ReadUtils { return getReducedReadQualityTagValue(read)[i]; } - public final static SAMRecord reducedReadWithReducedQuals(final SAMRecord read) { - if ( ! isReducedRead(read) ) throw new IllegalArgumentException("read must be a reduced read"); - return read; -// try { -// SAMRecord newRead = (SAMRecord)read.clone(); -// byte reducedQual = (byte)(int)getReducedReadQualityTagValue(read); -// byte[] newQuals = new byte[read.getBaseQualities().length]; -// Arrays.fill(newQuals, reducedQual); -// newRead.setBaseQualities(newQuals); -// return newRead; -// } catch ( CloneNotSupportedException e ) { -// throw new ReviewedStingException("SAMRecord no longer supports clone", e); -// } - } - // ---------------------------------------------------------------------------------------------------- // // General utilities From 52345f0aec828aca04af8e84259d68ad21ca69c9 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 19 Oct 2011 15:47:36 -0400 Subject: [PATCH 04/34] Meaningful documentation string --- .../sting/utils/sam/GATKSamRecordFactory.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSamRecordFactory.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSamRecordFactory.java index 9076f06ae..adde2e293 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSamRecordFactory.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSamRecordFactory.java @@ -31,10 +31,10 @@ import net.sf.samtools.BAMRecord; import org.broadinstitute.sting.utils.exceptions.UserException; /** - * Factory interface which allows plugging in of different classes for generating instances of - * SAMRecord and BAMRecord when reading from SAM/BAM files. + * Factory interface implementation used to create GATKSamRecords + * from SAMFileReaders with SAM-JDK * - * @author Tim Fennell + * @author Mark DePristo */ public class GATKSamRecordFactory implements SAMRecordFactory { From bba69701b505dec882ff8eee978be0451ec5b335 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 19 Oct 2011 17:49:17 -0400 Subject: [PATCH 05/34] Now creates GATKSamRecords now SamRecords --- .../org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java index 2dcdd5ce6..b6b671360 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java @@ -146,7 +146,7 @@ public class ArtificialSAMUtils { if( (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart != SAMRecord.NO_ALIGNMENT_START) || (refIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart == SAMRecord.NO_ALIGNMENT_START) ) throw new ReviewedStingException("Invalid alignment start for artificial read, start = " + alignmentStart); - SAMRecord record = new SAMRecord(header); + SAMRecord record = new GATKSamRecord(header); record.setReadName(name); record.setReferenceIndex(refIndex); record.setAlignmentStart(alignmentStart); From 3227143a1c930db8a243f1be264840a7e76ea7fc Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 19 Oct 2011 17:50:27 -0400 Subject: [PATCH 06/34] Systematic test code for FragmentPileup -- Creates all combinatinos of overlapping and non-overlapping read pair pileups in all orientations and first/second pairings to validate fragment detection. --- .../utils/pileup/FragmentPileupUnitTest.java | 147 ++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java new file mode 100644 index 000000000..778259528 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.pileup; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.testng.Assert; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + +/** + * Test routines for read-backed pileup. + */ +public class FragmentPileupUnitTest extends BaseTest { + private static SAMFileHeader header; + + private class FragmentPileupTest extends TestDataProvider { + List states = new ArrayList(); + + private FragmentPileupTest(String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) { + super(FragmentPileupTest.class, String.format("%s-leftIsFirst:%b-leftIsNegative:%b", name, leftIsFirst, leftIsNegative)); + + for ( int pos = leftStart; pos < rightStart + readLen; pos++) { + SAMRecord left = ArtificialSAMUtils.createArtificialRead(header, "readpair", 0, leftStart, readLen); + SAMRecord right = ArtificialSAMUtils.createArtificialRead(header, "readpair", 0, rightStart, readLen); + + left.setProperPairFlag(true); + right.setProperPairFlag(true); + + left.setFirstOfPairFlag(leftIsFirst); + right.setFirstOfPairFlag(! leftIsFirst); + + left.setReadNegativeStrandFlag(leftIsNegative); + left.setMateNegativeStrandFlag(!leftIsNegative); + right.setReadNegativeStrandFlag(!leftIsNegative); + right.setMateNegativeStrandFlag(leftIsNegative); + + left.setMateAlignmentStart(right.getAlignmentStart()); + right.setMateAlignmentStart(left.getAlignmentStart()); + + left.setMateReferenceIndex(0); + right.setMateReferenceIndex(0); + + int isize = rightStart + readLen - leftStart; + left.setInferredInsertSize(isize); + right.setInferredInsertSize(-isize); + + boolean posCoveredByLeft = pos >= left.getAlignmentStart() && pos <= left.getAlignmentEnd(); + boolean posCoveredByRight = pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd(); + + if ( posCoveredByLeft || posCoveredByRight ) { + List reads = new ArrayList(); + List offsets = new ArrayList(); + + if ( posCoveredByLeft ) { + reads.add(left); + offsets.add(pos - left.getAlignmentStart()); + } + + if ( posCoveredByRight ) { + reads.add(right); + offsets.add(pos - right.getAlignmentStart()); + } + + boolean shouldBeFragment = posCoveredByLeft && posCoveredByRight; + ReadBackedPileup pileup = new ReadBackedPileupImpl(null, reads, offsets); + TestState testState = new TestState(shouldBeFragment, pileup); + states.add(testState); + } + } + } + } + + private class TestState { + boolean shouldBeFragment; + ReadBackedPileup pileup; + + private TestState(final boolean shouldBeFragment, final ReadBackedPileup pileup) { + this.shouldBeFragment = shouldBeFragment; + this.pileup = pileup; + } + } + + @DataProvider(name = "fragmentPileupTest") + public Object[][] createTests() { + for ( boolean leftIsFirst : Arrays.asList(true, false) ) { + for ( boolean leftIsNegative : Arrays.asList(true, false) ) { + // Overlapping pair + // ----> [first] + // <--- [second] + new FragmentPileupTest("overlapping-pair", 10, 1, 5, leftIsFirst, leftIsNegative); + + // Non-overlapping pair + // ----> + // <---- + new FragmentPileupTest("nonoverlapping-pair", 10, 1, 15, leftIsFirst, leftIsNegative); + } + } + + return FragmentPileupTest.getTests(FragmentPileupTest.class); + } + + @Test(enabled = true, dataProvider = "fragmentPileupTest") + public void testMe(FragmentPileupTest test) { + for ( TestState testState : test.states ) { + ReadBackedPileup rbp = testState.pileup; + FragmentPileup fp = new FragmentPileup(rbp); + Assert.assertEquals(fp.getTwoReadPileup().size(), testState.shouldBeFragment ? 1 : 0); + Assert.assertEquals(fp.getOneReadPileup().size(), testState.shouldBeFragment ? 0 : 1); + } + } + + + @BeforeTest + public void setup() { + header = ArtificialSAMUtils.createArtificialSamHeader(1,1,1000); + } +} From 999a8998ae4d007847f9c2fdd99b4ef3217cc9fe Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 19 Oct 2011 17:51:48 -0400 Subject: [PATCH 07/34] Constructor for GATKSamRecord with header only, for unit testing --- .../sting/utils/sam/GATKSAMRecord.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 56e6e74be..7956ac388 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -36,6 +36,16 @@ public class GATKSamRecord extends BAMRecord { // These attributes exist in memory only, and are never written to disk. private Map temporaryAttributes; + /** + * HACK TO CREATE GATKSAMRECORD WITH ONLY A HEADER FOR TESTING PURPOSES ONLY + * @param header + */ + public GATKSamRecord(final SAMFileHeader header) { + super(header, SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX, SAMRecord.NO_ALIGNMENT_START, + (short)0, (short)255, 0, 1, 0, 1, 0, 0, 0, null); + } + + public GATKSamRecord(final SAMFileHeader header, final int referenceSequenceIndex, final int alignmentStart, @@ -57,17 +67,20 @@ public class GATKSamRecord extends BAMRecord { // *** The following methods are overloaded to cache the appropriate data ***// /////////////////////////////////////////////////////////////////////////////// + @Override public String getReadString() { if ( mReadString == null ) mReadString = super.getReadString(); return mReadString; } + @Override public void setReadString(String s) { super.setReadString(s); mReadString = s; } + @Override public SAMReadGroupRecord getReadGroup() { if ( !retrievedReadGroup ) { SAMReadGroupRecord tempReadGroup = super.getReadGroup(); From 94e1898d8f48185593d5717395888ce08abe13f7 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 21 Oct 2011 09:37:45 -0400 Subject: [PATCH 08/34] A canonical set of NGS platforms as enums with convenient manipulation methods --- .../sting/utils/NGSPlatform.java | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java diff --git a/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java b/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java new file mode 100644 index 000000000..3ab9e1655 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils; + +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; + +/** + * A canonical, master list of the standard NGS platforms. These values + * can be obtained (efficiently) from a GATKSamRecord object with the + * getNGSPlatform method. + * + * @author Mark DePristo + * @since 2011 + */ +public enum NGSPlatform { + ILLUMINA("ILLUMINA", "SLX", "SOLEXA"), + SOLID("SOLID"), + LS454("454"), + COMPLETE_GENOMICS("COMPLETE"), + PACBIO("PACBIO"), + ION_TORRENT("IONTORRENT"), + UNKNOWN("UNKNOWN"); + + /** + * Array of the prefix names in a BAM file for each of the platforms. + */ + private final String[] BAM_PL_NAMES; + + NGSPlatform(final String... BAM_PL_NAMES) { + for ( int i = 0; i < BAM_PL_NAMES.length; i++ ) + BAM_PL_NAMES[i] = BAM_PL_NAMES[i].toUpperCase(); + this.BAM_PL_NAMES = BAM_PL_NAMES; + } + + /** + * Returns a representative PL string for this platform + * @return + */ + public final String getDefaultPlatform() { + return BAM_PL_NAMES[0]; + } + + /** + * Convenience constructor -- calculates the NGSPlatfrom from a SAMRecord. + * Note you should not use this function if you have a GATKSamRecord -- use the + * accessor method instead. + * + * @param read + * @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match + */ + public static final NGSPlatform fromRead(SAMRecord read) { + return fromReadGroup(read.getReadGroup()); + } + + /** + * Returns the NGSPlatform corresponding to the PL tag in the read group + * @param rg + * @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match + */ + public static final NGSPlatform fromReadGroup(SAMReadGroupRecord rg) { + return fromReadGroupPL(rg.getPlatform()); + } + + /** + * Returns the NGSPlatform corresponding to the PL tag in the read group + * @param plFromRG -- the PL field (or equivalent) in a ReadGroup object + * @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match + */ + public static final NGSPlatform fromReadGroupPL(final String plFromRG) { + if ( plFromRG == null ) return UNKNOWN; + + // todo -- algorithm could be implemented more efficiently, as the list of all + // todo -- names is known upfront, so a decision tree could be used to identify + // todo -- a prefix common to PL + final String pl = plFromRG.toUpperCase(); + for ( final NGSPlatform ngsPlatform : NGSPlatform.values() ) { + for ( final String bamPLName : ngsPlatform.BAM_PL_NAMES ) { + if ( pl.contains(bamPLName) ) + return ngsPlatform; + } + } + + return UNKNOWN; + } +} From ed74ebcfa11ad90c85694dd6666c5d96af807393 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 21 Oct 2011 09:38:41 -0400 Subject: [PATCH 09/34] GATKSamRecords with efficiency NGSPlatform method --- .../utils/sam/GATKSAMReadGroupRecord.java | 23 +++++++++++++++++++ .../sting/utils/sam/GATKSAMRecord.java | 16 +++++++++---- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java index c7ffcab0c..ff7d12f09 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.utils.NGSPlatform; /** * @author ebanks @@ -15,16 +16,28 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord { // the SAMReadGroupRecord data we're caching private String mSample = null; private String mPlatform = null; + private NGSPlatform mNGSPlatform = null; // because some values can be null, we don't want to duplicate effort private boolean retrievedSample = false; private boolean retrievedPlatform = false; + private boolean retrievedNGSPlatform = false; + public GATKSAMReadGroupRecord(final String id) { + super(id); + } public GATKSAMReadGroupRecord(SAMReadGroupRecord record) { super(record.getReadGroupId(), record); } + public GATKSAMReadGroupRecord(SAMReadGroupRecord record, NGSPlatform pl) { + super(record.getReadGroupId(), record); + setPlatform(pl.getDefaultPlatform()); + mNGSPlatform = pl; + retrievedPlatform = retrievedNGSPlatform = true; + } + /////////////////////////////////////////////////////////////////////////////// // *** The following methods are overloaded to cache the appropriate data ***// /////////////////////////////////////////////////////////////////////////////// @@ -55,5 +68,15 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord { super.setPlatform(s); mPlatform = s; retrievedPlatform = true; + retrievedNGSPlatform = false; // recalculate the NGSPlatform + } + + public NGSPlatform getNGSPlatform() { + if ( ! retrievedNGSPlatform ) { + mNGSPlatform = NGSPlatform.fromReadGroupPL(getPlatform()); + retrievedNGSPlatform = true; + } + + return mNGSPlatform; } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 7956ac388..037545a78 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.*; +import org.broadinstitute.sting.utils.NGSPlatform; import java.util.HashMap; import java.util.Map; @@ -25,7 +26,7 @@ import java.util.Map; public class GATKSamRecord extends BAMRecord { // the SAMRecord data we're caching private String mReadString = null; - private SAMReadGroupRecord mReadGroup = null; + private GATKSAMReadGroupRecord mReadGroup = null; // because some values can be null, we don't want to duplicate effort private boolean retrievedReadGroup = false; @@ -81,17 +82,22 @@ public class GATKSamRecord extends BAMRecord { } @Override - public SAMReadGroupRecord getReadGroup() { + public GATKSAMReadGroupRecord getReadGroup() { if ( !retrievedReadGroup ) { SAMReadGroupRecord tempReadGroup = super.getReadGroup(); - mReadGroup = (tempReadGroup == null ? tempReadGroup : new GATKSAMReadGroupRecord(tempReadGroup)); + mReadGroup = (tempReadGroup == null ? null : new GATKSAMReadGroupRecord(tempReadGroup)); retrievedReadGroup = true; } return mReadGroup; } - public void setReadGroup(SAMReadGroupRecord record) { - mReadGroup = record; + public NGSPlatform getNGSPlatform() { + return getReadGroup().getNGSPlatform(); + } + + public void setReadGroup( final GATKSAMReadGroupRecord readGroup ) { + mReadGroup = readGroup; + retrievedReadGroup = true; } /** From be797a8a1f4eb72fe5843864decc45d2e7d852e8 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 21 Oct 2011 09:39:21 -0400 Subject: [PATCH 10/34] Recalibrator now uses the much more efficient NGSPlatform in the cycle covariates system --- ivy.xml | 2 +- .../walkers/recalibration/CycleCovariate.java | 130 ++---------------- .../recalibration/RecalDataManager.java | 8 +- 3 files changed, 14 insertions(+), 126 deletions(-) diff --git a/ivy.xml b/ivy.xml index 96c1de844..0737b676d 100644 --- a/ivy.xml +++ b/ivy.xml @@ -10,7 +10,7 @@ - + diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index e117454f9..857f74155 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -2,9 +2,12 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.GATKSamRecord; import java.util.Arrays; +import java.util.EnumSet; import java.util.List; /* @@ -46,6 +49,9 @@ import java.util.List; */ public class CycleCovariate implements StandardCovariate { + private final static EnumSet DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS); + private final static EnumSet FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT); + // Initialize any member variables using the command-line arguments passed to the walkers public void initialize( final RecalibrationArgumentCollection RAC ) { if( RAC.DEFAULT_PLATFORM != null ) { @@ -58,122 +64,6 @@ public class CycleCovariate implements StandardCovariate { } } - /* - // Used to pick out the covariate's value from attributes of the read - public final Comparable getValue( final SAMRecord read, final int offset ) { - - int cycle = 1; - - //----------------------------- - // ILLUMINA and SOLID - //----------------------------- - - if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SLX" ) || // Some bams have "illumina" and others have "SLX" - read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "ABI_SOLID" )) { // Some bams have "solid" and others have "ABI_SOLID" - cycle = offset + 1; - if( read.getReadNegativeStrandFlag() ) { - cycle = read.getReadLength() - offset; - } - } - - //----------------------------- - // 454 - //----------------------------- - - else if( read.getReadGroup().getPlatform().contains( "454" ) ) { // Some bams have "LS454" and others have just "454" - final byte[] bases = read.getReadBases(); - - // BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change - // For example, AAAAAAA was probably read in two flow cycles but here we count it as one - if( !read.getReadNegativeStrandFlag() ) { // Forward direction - int iii = 0; - while( iii <= offset ) - { - while( iii <= offset && bases[iii] == (byte)'T' ) { iii++; } - while( iii <= offset && bases[iii] == (byte)'A' ) { iii++; } - while( iii <= offset && bases[iii] == (byte)'C' ) { iii++; } - while( iii <= offset && bases[iii] == (byte)'G' ) { iii++; } - if( iii <= offset ) { cycle++; } - if( iii <= offset && !BaseUtils.isRegularBase(bases[iii]) ) { iii++; } - - } - } else { // Negative direction - int iii = bases.length-1; - while( iii >= offset ) - { - while( iii >= offset && bases[iii] == (byte)'T' ) { iii--; } - while( iii >= offset && bases[iii] == (byte)'A' ) { iii--; } - while( iii >= offset && bases[iii] == (byte)'C' ) { iii--; } - while( iii >= offset && bases[iii] == (byte)'G' ) { iii--; } - if( iii >= offset ) { cycle++; } - if( iii >= offset && !BaseUtils.isRegularBase(bases[iii]) ) { iii--; } - } - } - } - - //----------------------------- - // SOLID (unused), only to be used in conjunction with PrimerRoundCovariate - //----------------------------- - - //else if( read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) { - // // The ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf - // int pos = offset + 1; - // if( read.getReadNegativeStrandFlag() ) { - // pos = read.getReadLength() - offset; - // } - // cycle = pos / 5; // integer division - //} - - //----------------------------- - // UNRECOGNIZED PLATFORM - //----------------------------- - - else { // Platform is unrecognized so revert to the default platform but warn the user first - if( defaultPlatform != null) { // The user set a default platform - if( !warnedUserBadPlatform ) { - Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " + - "Defaulting to platform = " + defaultPlatform + "." ); - } - warnedUserBadPlatform = true; - - read.getReadGroup().setPlatform( defaultPlatform ); - return getValue( read, offset ); // A recursive call - } else { // The user did not set a default platform - throw new StingException( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " + - "No default platform specified. Users must set the default platform using the --default_platform argument." ); - } - } - - // Differentiate between first and second of pair. - // The sequencing machine cycle keeps incrementing for the second read in a pair. So it is possible for a read group - // to have an error affecting quality at a particular cycle on the first of pair which carries over to the second of pair. - // Therefore the cycle covariate must differentiate between first and second of pair reads. - // This effect can not be corrected by pulling out the first of pair and second of pair flags into a separate covariate because - // the current sequential model would consider the effects independently instead of jointly. - if( read.getReadPairedFlag() && read.getSecondOfPairFlag() ) { - cycle *= -1; - } - - return cycle; - } - */ - - // todo -- this should be put into a common place in the code base - private static List ILLUMINA_NAMES = Arrays.asList("ILLUMINA", "SLX", "SOLEXA"); - private static List SOLID_NAMES = Arrays.asList("SOLID"); - private static List LS454_NAMES = Arrays.asList("454"); - private static List COMPLETE_GENOMICS_NAMES = Arrays.asList("COMPLETE"); - private static List PACBIO_NAMES = Arrays.asList("PACBIO"); - private static List ION_TORRENT_NAMES = Arrays.asList("IONTORRENT"); - - private static boolean isPlatform(SAMRecord read, List names) { - String pl = read.getReadGroup().getPlatform().toUpperCase(); - for ( String name : names ) - if ( pl.contains( name ) ) - return true; - return false; - } - // Used to pick out the covariate's value from attributes of the read public void getValues(SAMRecord read, Comparable[] comparable) { @@ -181,7 +71,8 @@ public class CycleCovariate implements StandardCovariate { // Illumina, Solid, PacBio, and Complete Genomics //----------------------------- - if( isPlatform(read, ILLUMINA_NAMES) || isPlatform(read, SOLID_NAMES) || isPlatform(read, PACBIO_NAMES) || isPlatform(read, COMPLETE_GENOMICS_NAMES) ) { + final NGSPlatform ngsPlatform = ((GATKSamRecord)read).getNGSPlatform(); + if( DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform) ) { final int init; final int increment; if( !read.getReadNegativeStrandFlag() ) { @@ -227,8 +118,7 @@ public class CycleCovariate implements StandardCovariate { //----------------------------- // 454 and Ion Torrent //----------------------------- - - else if ( isPlatform(read, LS454_NAMES) || isPlatform(read, ION_TORRENT_NAMES)) { // Some bams have "LS454" and others have just "454" + else if( FLOW_CYCLE_PLATFORMS.contains(ngsPlatform) ) { final int readLength = read.getReadLength(); final byte[] bases = read.getReadBases(); @@ -273,8 +163,6 @@ public class CycleCovariate implements StandardCovariate { else { throw new IllegalStateException("This method hasn't been implemented yet for " + read.getReadGroup().getPlatform()); } - - } // Used to get the covariate's value from input csv file in TableRecalibrationWalker diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index bd949aa81..a8a829801 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSamRecord; import java.util.ArrayList; @@ -228,8 +229,7 @@ public class RecalDataManager { * @param RAC The list of shared command line arguments */ public static void parseSAMRecord( final SAMRecord read, final RecalibrationArgumentCollection RAC ) { - - SAMReadGroupRecord readGroup = read.getReadGroup(); + GATKSAMReadGroupRecord readGroup = ((GATKSamRecord)read).getReadGroup(); // If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments if( readGroup == null ) { @@ -241,7 +241,7 @@ public class RecalDataManager { warnUserNullReadGroup = true; } // There is no readGroup so defaulting to these values - readGroup = new SAMReadGroupRecord( RAC.DEFAULT_READ_GROUP ); + readGroup = new GATKSAMReadGroupRecord( RAC.DEFAULT_READ_GROUP ); readGroup.setPlatform( RAC.DEFAULT_PLATFORM ); ((GATKSamRecord)read).setReadGroup( readGroup ); } else { @@ -251,7 +251,7 @@ public class RecalDataManager { if( RAC.FORCE_READ_GROUP != null && !readGroup.getReadGroupId().equals(RAC.FORCE_READ_GROUP) ) { // Collapse all the read groups into a single common String provided by the user final String oldPlatform = readGroup.getPlatform(); - readGroup = new SAMReadGroupRecord( RAC.FORCE_READ_GROUP ); + readGroup = new GATKSAMReadGroupRecord( RAC.FORCE_READ_GROUP ); readGroup.setPlatform( oldPlatform ); ((GATKSamRecord)read).setReadGroup( readGroup ); } From 2403e9606201070f6c38039d6b5d2fcff1ea5e7c Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 21 Oct 2011 09:59:24 -0400 Subject: [PATCH 11/34] Renamed GATKSamRecord -> GATKSAMRecord for consistency. Better docs. --- ...elGenotypeLikelihoodsCalculationModel.java | 4 +- .../recalibration/CountCovariatesWalker.java | 6 +-- .../walkers/recalibration/CycleCovariate.java | 4 +- .../recalibration/RecalDataManager.java | 10 ++-- .../TableRecalibrationWalker.java | 4 +- .../sting/utils/NGSPlatform.java | 4 +- .../sting/utils/sam/ArtificialSAMUtils.java | 2 +- .../sting/utils/sam/GATKSAMRecord.java | 53 ++++++++++++++----- .../sting/utils/sam/GATKSamRecordFactory.java | 2 +- .../sting/utils/sam/ReadUtils.java | 20 +++---- 10 files changed, 67 insertions(+), 42 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 1eafcfb10..aea63b61d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.sam.GATKSamRecord; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -125,7 +125,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood for ( ExtendedEventPileupElement p : indelPileup.toExtendedIterable() ) { //SAMRecord read = p.getRead(); - GATKSamRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead()); + GATKSAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead()); if (read == null) continue; if(ReadUtils.is454Read(read)) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index 424d4cdd6..1bdb70bdd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -41,7 +41,7 @@ import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.sam.GATKSamRecord; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.PrintStream; import java.util.ArrayList; @@ -352,7 +352,7 @@ public class CountCovariatesWalker extends LocusWalker adaptorBoundaries = getAdaptorBoundaries(rec, adaptorLength); - GATKSamRecord result = (GATKSamRecord)rec; + GATKSAMRecord result = (GATKSAMRecord)rec; if ( adaptorBoundaries != null ) { if ( rec.getReadNegativeStrandFlag() && adaptorBoundaries.second >= rec.getAlignmentStart() && adaptorBoundaries.first < rec.getAlignmentEnd() ) @@ -276,7 +276,7 @@ public class ReadUtils { } // return true if the read needs to be completely clipped - private static GATKSamRecord hardClipStartOfRead(SAMRecord oldRec, int stopPosition) { + private static GATKSAMRecord hardClipStartOfRead(SAMRecord oldRec, int stopPosition) { if ( stopPosition >= oldRec.getAlignmentEnd() ) { // BAM representation issue -- we can't clip away all bases in a read, just leave it alone and let the filter deal with it @@ -284,9 +284,9 @@ public class ReadUtils { return null; } - GATKSamRecord rec; + GATKSAMRecord rec; try { - rec = (GATKSamRecord)oldRec.clone(); + rec = (GATKSAMRecord)oldRec.clone(); } catch (Exception e) { return null; } @@ -356,7 +356,7 @@ public class ReadUtils { return rec; } - private static GATKSamRecord hardClipEndOfRead(SAMRecord oldRec, int startPosition) { + private static GATKSAMRecord hardClipEndOfRead(SAMRecord oldRec, int startPosition) { if ( startPosition <= oldRec.getAlignmentStart() ) { // BAM representation issue -- we can't clip away all bases in a read, just leave it alone and let the filter deal with it @@ -364,9 +364,9 @@ public class ReadUtils { return null; } - GATKSamRecord rec; + GATKSAMRecord rec; try { - rec = (GATKSamRecord)oldRec.clone(); + rec = (GATKSAMRecord)oldRec.clone(); } catch (Exception e) { return null; } @@ -584,7 +584,7 @@ public class ReadUtils { * @param rec original SAM record * @return a new read with adaptor sequence hard-clipped out or null if read is fully clipped */ - public static GATKSamRecord hardClipAdaptorSequence(final SAMRecord rec) { + public static GATKSAMRecord hardClipAdaptorSequence(final SAMRecord rec) { return hardClipAdaptorSequence(rec, DEFAULT_ADAPTOR_SIZE); } From b863390cb1a4c89d8a77cdc6a20c76aa5f810ac8 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 21 Oct 2011 13:28:05 -0400 Subject: [PATCH 13/34] Moving reduced read functionality into GATKSAMRecord -- More functions take / produce GATKSAMRecords instead of SAMRecord --- .../indels/PairHMMIndelErrorModel.java | 2 +- .../sting/utils/pileup/PileupElement.java | 7 +-- .../sting/utils/sam/ArtificialSAMUtils.java | 11 +++-- .../sting/utils/sam/GATKSAMRecord.java | 48 ++++++++++++++++++- .../sting/utils/sam/ReadUtils.java | 18 ------- .../sting/utils/ReadUtilsUnitTest.java | 12 ++--- .../utils/ReservoirDownsamplerUnitTest.java | 5 +- 7 files changed, 67 insertions(+), 36 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 9edf5b5d4..3a824cdfe 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -393,7 +393,7 @@ public class PairHMMIndelErrorModel { for (PileupElement p: pileup) { // > 1 when the read is a consensus read representing multiple independent observations - final boolean isReduced = ReadUtils.isReducedRead(p.getRead()); + final boolean isReduced = p.isReducedRead(); readCounts[readIdx] = isReduced ? p.getReducedCount() : 1; // check if we've already computed likelihoods for this pileup element (i.e. for this read at this location) diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index f6ed792a5..e47b6ada7 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -4,6 +4,7 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; /** @@ -82,16 +83,16 @@ public class PileupElement { // -------------------------------------------------------------------------- public boolean isReducedRead() { - return ReadUtils.isReducedRead(getRead()); + return ((GATKSAMRecord)read).isReducedRead(); } public int getReducedCount() { if ( ! isReducedRead() ) throw new IllegalArgumentException("Cannot get reduced count for non-reduced read " + getRead().getReadName()); - return ReadUtils.getReducedCount(getRead(), offset); + return ((GATKSAMRecord)read).getReducedCount(offset); } public byte getReducedQual() { if ( ! isReducedRead() ) throw new IllegalArgumentException("Cannot get reduced qual for non-reduced read " + getRead().getReadName()); - return ReadUtils.getReducedQual(getRead(), offset); + return getQual(); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java index 85011f2e0..4eba32383 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java @@ -134,6 +134,7 @@ public class ArtificialSAMUtils { /** * Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read * + * * @param header the SAM header to associate the read with * @param name the name of the read * @param refIndex the reference index, i.e. what chromosome to associate it with @@ -142,11 +143,11 @@ public class ArtificialSAMUtils { * * @return the artificial read */ - public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, int length ) { + public static GATKSAMRecord createArtificialRead(SAMFileHeader header, String name, int refIndex, int alignmentStart, int length) { if( (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart != SAMRecord.NO_ALIGNMENT_START) || (refIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart == SAMRecord.NO_ALIGNMENT_START) ) throw new ReviewedStingException("Invalid alignment start for artificial read, start = " + alignmentStart); - SAMRecord record = new GATKSAMRecord(header); + GATKSAMRecord record = new GATKSAMRecord(header); record.setReadName(name); record.setReferenceIndex(refIndex); record.setAlignmentStart(alignmentStart); @@ -166,6 +167,7 @@ public class ArtificialSAMUtils { if (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { record.setReadUnmappedFlag(true); } + return record; } @@ -181,16 +183,17 @@ public class ArtificialSAMUtils { * * @return the artificial read */ - public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual ) { + public static GATKSAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual ) { if (bases.length != qual.length) { throw new ReviewedStingException("Passed in read string is different length then the quality array"); } - SAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length); + GATKSAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length); rec.setReadBases(bases); rec.setBaseQualities(qual); if (refIndex == -1) { rec.setReadUnmappedFlag(true); } + return rec; } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index c363f2072..d50f2ff03 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -45,9 +45,11 @@ public class GATKSAMRecord extends BAMRecord { // the SAMRecord data we're caching private String mReadString = null; private GATKSAMReadGroupRecord mReadGroup = null; + private byte[] reducedReadCounts = null; // because some values can be null, we don't want to duplicate effort private boolean retrievedReadGroup = false; + private boolean retrievedReduceReadCounts = false; // These temporary attributes were added here to make life easier for // certain algorithms by providing a way to label or attach arbitrary data to @@ -60,8 +62,27 @@ public class GATKSAMRecord extends BAMRecord { * @param header */ public GATKSAMRecord(final SAMFileHeader header) { - super(header, SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX, SAMRecord.NO_ALIGNMENT_START, - (short)0, (short)255, 0, 1, 0, 1, 0, 0, 0, null); + this(new SAMRecord(header)); + } + + /** + * HACK TO CREATE GATKSAMRECORD BASED ONLY A SAMRECORD FOR TESTING PURPOSES ONLY + * @param read + */ + public GATKSAMRecord(final SAMRecord read) { + super(read.getHeader(), read.getMateReferenceIndex(), + read.getAlignmentStart(), + read.getReadName() != null ? (short)read.getReadNameLength() : 0, + (short)read.getMappingQuality(), + 0, + read.getCigarLength(), + read.getFlags(), + read.getReadLength(), + read.getMateReferenceIndex(), + read.getMateAlignmentStart(), + read.getInferredInsertSize(), + new byte[]{}); + super.clearAttributes(); } public GATKSAMRecord(final SAMFileHeader header, @@ -121,6 +142,29 @@ public class GATKSAMRecord extends BAMRecord { retrievedReadGroup = true; } + // + // + // Reduced read functions + // + // + + public byte[] getReducedReadCounts() { + if ( ! retrievedReduceReadCounts ) { + reducedReadCounts = getByteArrayAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG); + retrievedReduceReadCounts = true; + } + + return reducedReadCounts; + } + + public boolean isReducedRead() { + return getReducedReadCounts() != null; + } + + public final byte getReducedCount(final int i) { + return getReducedReadCounts()[i]; + } + /** * Checks whether an attribute has been set for the given key. * diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 16abbe498..f8e4927ed 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -52,24 +52,6 @@ public class ReadUtils { // ---------------------------------------------------------------------------------------------------- public static final String REDUCED_READ_QUALITY_TAG = "RQ"; - public static final String REDUCED_READ_CONSENSUS_COUNTS_TAG = "CC"; - - public final static byte[] getReducedReadQualityTagValue(final SAMRecord read) { - // TODO -- warning of performance problem. Should be cached in GATKSAMRecord - return read.getByteArrayAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG); - } - - public final static boolean isReducedRead(final SAMRecord read) { - return getReducedReadQualityTagValue(read) != null; - } - - public final static byte getReducedQual(final SAMRecord read, final int i) { - return read.getBaseQualities()[i]; - } - - public final static byte getReducedCount(final SAMRecord read, final int i) { - return getReducedReadQualityTagValue(read)[i]; - } // ---------------------------------------------------------------------------------------------------- // diff --git a/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java index cc0007439..59a6ecb8d 100755 --- a/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java @@ -5,6 +5,7 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.testng.Assert; import org.testng.annotations.BeforeTest; @@ -12,7 +13,7 @@ import org.testng.annotations.Test; public class ReadUtilsUnitTest extends BaseTest { - SAMRecord read, reducedRead; + GATKSAMRecord read, reducedRead; final static String BASES = "ACTG"; final static String QUALS = "!+5?"; final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40}; @@ -47,13 +48,12 @@ public class ReadUtilsUnitTest extends BaseTest { @Test public void testReducedReads() { - Assert.assertFalse(ReadUtils.isReducedRead(read), "isReducedRead is false for normal read"); - Assert.assertEquals(ReadUtils.getReducedReadQualityTagValue(read), null, "No reduced read tag in normal read"); + Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read"); + Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read"); - Assert.assertTrue(ReadUtils.isReducedRead(reducedRead), "isReducedRead is true for reduced read"); + Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read"); for ( int i = 0; i < reducedRead.getReadLength(); i++) { - Assert.assertEquals(ReadUtils.getReducedQual(reducedRead, i), read.getBaseQualities()[i], "Reduced read quality not set to the expected value at " + i); - Assert.assertEquals(ReadUtils.getReducedCount(reducedRead, i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i); + Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i); } } diff --git a/public/java/test/org/broadinstitute/sting/utils/ReservoirDownsamplerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/ReservoirDownsamplerUnitTest.java index 76dd5d341..0f19e2f90 100644 --- a/public/java/test/org/broadinstitute/sting/utils/ReservoirDownsamplerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/ReservoirDownsamplerUnitTest.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.utils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; import org.testng.annotations.Test; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; @@ -28,7 +29,7 @@ public class ReservoirDownsamplerUnitTest { @Test public void testOneElementWithPoolSizeOne() { - List reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); + List reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); ReservoirDownsampler downsampler = new ReservoirDownsampler(1); downsampler.addAll(reads); @@ -40,7 +41,7 @@ public class ReservoirDownsamplerUnitTest { @Test public void testOneElementWithPoolSizeGreaterThanOne() { - List reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); + List reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); ReservoirDownsampler downsampler = new ReservoirDownsampler(5); downsampler.addAll(reads); From 42bf9adede112282e8e8e45bf987ab01305b0a7e Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sat, 22 Oct 2011 21:36:37 -0400 Subject: [PATCH 14/34] Initial version of "fast" FragmentPileup code -- Uses mayOverlapRoutine in ReadUtils -- Attempts to be smart when doing overlap calculation, to avoid unnecessary allocations -- PileupElement now comparable (sorts on offset than on start) -- Caliper microbenchmark to assess performance --- .../sting/utils/pileup/FragmentPileup.java | 124 ++++++++++++++++-- .../sting/utils/pileup/PileupElement.java | 16 ++- .../sting/utils/sam/ReadUtils.java | 48 +++++++ .../UnifiedGenotyperIntegrationTest.java | 12 +- .../utils/pileup/FragmentPileupBenchmark.java | 108 +++++++++++++++ .../utils/pileup/FragmentPileupUnitTest.java | 60 +++++---- 6 files changed, 323 insertions(+), 45 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java index f7d237401..f9a94989b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java @@ -1,9 +1,10 @@ package org.broadinstitute.sting.utils.pileup; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; + +import java.util.*; /** * An easy to access fragment-based pileup, which contains two separate pileups. The first @@ -16,28 +17,46 @@ import java.util.Map; * TODO -- technically we could generalize this code to support a pseudo-duplicate marking * TODO -- algorithm that could collect all duplicates into a single super pileup element * + * Oct 21: note that the order of the oneReadPileup and twoReadPileups are not + * defined. The algorithms that produce these lists are in fact producing + * lists of Pileup elements *NOT* sorted by alignment start position of the underlying + * reads. + * * User: depristo * Date: 3/26/11 * Time: 10:09 PM */ public class FragmentPileup { - final Collection oneReadPile; - final Collection twoReadPile = new ArrayList(); + Collection oneReadPile = null; + Collection twoReadPile = null; /** * Create a new Fragment-based pileup from the standard read-based pileup * @param pileup */ public FragmentPileup(ReadBackedPileup pileup) { - Map nameMap = new HashMap(); + //oldSlowCalculation(pileup); + fastNewCalculation(pileup); + } + + protected FragmentPileup(ReadBackedPileup pileup, boolean useOldAlgorithm) { + if ( useOldAlgorithm ) + oldSlowCalculation(pileup); + else + fastNewCalculation(pileup); + } + + private final void oldSlowCalculation(final ReadBackedPileup pileup) { + final Map nameMap = new HashMap(); // build an initial map, grabbing all of the multi-read fragments - for ( PileupElement p : pileup ) { - String readName = p.getRead().getReadName(); + for ( final PileupElement p : pileup ) { + final String readName = p.getRead().getReadName(); - PileupElement pe1 = nameMap.get(readName); + final PileupElement pe1 = nameMap.get(readName); if ( pe1 != null ) { // assumes we have at most 2 reads per fragment + if ( twoReadPile == null ) twoReadPile = new ArrayList(); twoReadPile.add(new TwoReadPileupElement(pe1, p)); nameMap.remove(readName); } else { @@ -45,17 +64,96 @@ public class FragmentPileup { } } - // now set the one Read pile to the values in the nameMap with only a single read oneReadPile = nameMap.values(); } + /** + * @param pileup + */ + private final void fastNewCalculation(final ReadBackedPileup pileup) { + Map nameMap = null; // lazy initialization + + for ( final PileupElement p : pileup ) { + final SAMRecord read = p.getRead(); + + switch (ReadUtils.readMightOverlapMate(read) ) { + // we know for certain this read doesn't have an overlapping mate + case NO: { + addToOnePile(p); + break; + } + + // we know that we overlap our mate, so put the read in the nameMap in + // case our mate shows up + case LEFT_YES: { + nameMap = addToNameMap(nameMap, p); + break; + } + + // read starts at the same position, so we are looking at either the first or + // the second read. In the first, add it to the map, in the second grab it + // from the map and create a fragment + case SAME_START: { + final PileupElement pe1 = getFromNameMap(nameMap, p); + if ( pe1 != null ) { + addToTwoPile(pe1, p); + nameMap.remove(p.getRead().getReadName()); + } else { + nameMap = addToNameMap(nameMap, p); + } + break; + } + + + // in this case we need to see if our mate is already present, and if so + // grab the read from the list + case RIGHT_MAYBE: { + final PileupElement pe1 = getFromNameMap(nameMap, p); + if ( pe1 != null ) { + addToTwoPile(pe1, p); + nameMap.remove(p.getRead().getReadName()); + } else { + addToOnePile(p); + } + break; + } + } + } + + if ( nameMap != null && ! nameMap.isEmpty() ) // could be slightly more optimally + for ( final PileupElement p : nameMap.values() ) + addToOnePile(p); + } + + private final Map addToNameMap(Map map, final PileupElement p) { + if ( map == null ) map = new HashMap(); + map.put(p.getRead().getReadName(), p); + return map; + } + + private final PileupElement getFromNameMap(Map map, final PileupElement p) { + return map == null ? null : map.get(p.getRead().getReadName()); + } + + + private final void addToOnePile(final PileupElement p) { + if ( oneReadPile == null ) oneReadPile = new ArrayList(); + oneReadPile.add(p); + } + + private final void addToTwoPile(final PileupElement p1, final PileupElement p2) { + // assumes we have at most 2 reads per fragment + if ( twoReadPile == null ) twoReadPile = new ArrayList(); + twoReadPile.add(new TwoReadPileupElement(p1, p2)); + } + /** * Gets the pileup elements containing two reads, in no particular order * * @return */ public Collection getTwoReadPileup() { - return twoReadPile; + return twoReadPile == null ? Collections.emptyList() : twoReadPile; } /** @@ -64,7 +162,7 @@ public class FragmentPileup { * @return */ public Collection getOneReadPileup() { - return oneReadPile; + return oneReadPile == null ? Collections.emptyList() : oneReadPile; } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index e47b6ada7..3d6b6f4b9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -13,7 +13,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils; * Date: Apr 14, 2009 * Time: 8:54:05 AM */ -public class PileupElement { +public class PileupElement implements Comparable { public static final byte DELETION_BASE = BaseUtils.D; public static final byte DELETION_QUAL = (byte) 16; public static final byte A_FOLLOWED_BY_INSERTION_BASE = (byte) 87; @@ -76,6 +76,20 @@ public class PileupElement { return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset]; } + @Override + public int compareTo(final PileupElement pileupElement) { + if ( offset < pileupElement.offset ) + return -1; + else if ( offset > pileupElement.offset ) + return 1; + else if ( read.getAlignmentStart() < pileupElement.read.getAlignmentStart() ) + return -1; + else if ( read.getAlignmentStart() > pileupElement.read.getAlignmentStart() ) + return 1; + else + return 0; + } + // -------------------------------------------------------------------------- // // Reduced read accessors diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index f8e4927ed..f093e6539 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -214,6 +214,54 @@ public class ReadUtils { return state; } + /** + * s1 e1 + * |-----------------------> [record in hand] + * s2 + * <-----------------------| + * + * s1, e1, and s2 are all in the record. Assuming that s1 < s2 (we are the left most read), + * we can compute whether we overlap with our mate by seeing if s2 <= e1 or no. If e1 < + * s2 then we known that we cannot over. + * + * If we are looking at the right read + * + * s1 + * |-----------------------> + * s2 e2 + * <-----------------------| [record in hand] + * + * we know the position of s1 and s2, but we don't know e1, so we cannot tell if we + * overlap with our mate or not, so in this case we return MAYBE. + * + * Note that if rec has an unmapped mate or is unpaired we certainly know the answer + * + * @param rec + * @return + */ + public static ReadOverlapsMateType readMightOverlapMate(final SAMRecord rec) { + if ( ! rec.getReadPairedFlag() || rec.getMateUnmappedFlag() ) { + return ReadOverlapsMateType.NO; + } else { // read is actually paired + final int recStart = rec.getAlignmentStart(); + final int recEnd = rec.getAlignmentEnd(); + final int mateStart = rec.getMateAlignmentStart(); + + if ( recStart < mateStart ) { + // we are the left most read + return mateStart <= recEnd ? ReadOverlapsMateType.LEFT_YES: ReadOverlapsMateType.NO; + } else if ( recStart == mateStart ) { + // we are the left most read + return ReadOverlapsMateType.SAME_START; + } else { + // we are the right most read, so we cannot tell + return ReadOverlapsMateType.RIGHT_MAYBE; + } + } + } + + public enum ReadOverlapsMateType { LEFT_YES, NO, SAME_START, RIGHT_MAYBE } + private static Pair getAdaptorBoundaries(SAMRecord rec, int adaptorLength) { int isize = rec.getInferredInsertSize(); if ( isize == 0 ) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 6b6346447..7d6cfc7ad 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -224,7 +224,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("0bece77ce6bc447438ef9b2921b2dc41")); + Arrays.asList("eeba568272f9b42d5450da75c7cc6d2d")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -252,7 +252,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("790b1a1d6ab79eee8c24812bb8ca6fae")); + Arrays.asList("19ff9bd3139480bdf79dcbf117cf2b24")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -262,7 +262,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("408d3aba4d094c067fc00a43992c2292")); + Arrays.asList("118918f2e9e56a3cfc5ccb2856d529c8")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1); } @@ -272,7 +272,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("5e4e09354410b76fc0d822050d84132a")); + Arrays.asList("a20799237accd52c1b8c2ac096309c8f")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2); } @@ -282,7 +282,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1, - Arrays.asList("c599eedbeb422713b8a28529e805e4ae")); + Arrays.asList("18ef8181157b4ac3eb8492f538467f92")); executeTest("test MultiSample Pilot2 indels with complicated records", spec3); } @@ -291,7 +291,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec( baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1, - Arrays.asList("37d908a682ac269f8f19dec939ff5b01")); + Arrays.asList("ad884e511a751b05e64db5314314365a")); executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4); } diff --git a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java new file mode 100644 index 000000000..fe1ca305f --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.pileup; + +import com.google.caliper.Param; +import com.google.caliper.SimpleBenchmark; +import com.google.caliper.runner.CaliperMain; +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; + +import java.util.*; + +/** + * Caliper microbenchmark of fragment pileup + */ +public class FragmentPileupBenchmark extends SimpleBenchmark { + final int N_PILEUPS_TO_GENERATE = 100; + List pileups = new ArrayList(N_PILEUPS_TO_GENERATE); + + @Param({"10", "100", "1000"}) // , "10000"}) + int pileupSize; // set automatically by framework + + @Param({"150", "400"}) + int insertSize; // set automatically by framework + + @Override protected void setUp() { + SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); + GenomeLocParser genomeLocParser; + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); + final int pos = 50; + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", pos); + + final Random ran = new Random(); + final int readLen = 100; + final boolean leftIsFirst = true; + final boolean leftIsNegative = false; + final int insertSizeVariation = insertSize / 10; + + for ( int pileupN = 0; pileupN < N_PILEUPS_TO_GENERATE; pileupN++ ) { + List pileupElements = new ArrayList(); + for ( int i = 0; i < pileupSize / 2; i++ ) { + final String readName = "read" + i; + final int leftStart = new Random().nextInt(49) + 1; + final int fragmentSize = (int)(ran.nextGaussian() * insertSizeVariation + insertSize); + final int rightStart = leftStart + fragmentSize - readLen; + + if ( rightStart <= 0 ) continue; + + List pair = FragmentPileupUnitTest.createPair(header, readName, readLen, leftStart, rightStart, leftIsFirst, leftIsNegative); + SAMRecord left = pair.get(0); + SAMRecord right = pair.get(1); + + pileupElements.add(new PileupElement(left, pos - leftStart)); + + if ( pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd() ) { + pileupElements.add(new PileupElement(right, pos - rightStart)); + } + } + + Collections.sort(pileupElements); + pileups.add(new ReadBackedPileupImpl(loc, pileupElements)); + } + } + + public void timeNaiveNameMatch(int rep) { + int nFrags = 0; + for ( int i = 0; i < rep; i++ ) { + for ( ReadBackedPileup rbp : pileups ) + nFrags += new FragmentPileup(rbp, true).getTwoReadPileup().size(); + } + } + + public void timeFastNameMatch(int rep) { + int nFrags = 0; + for ( int i = 0; i < rep; i++ ) + for ( ReadBackedPileup rbp : pileups ) + nFrags += new FragmentPileup(rbp, false).getTwoReadPileup().size(); + } + + public static void main(String[] args) { + CaliperMain.main(FragmentPileupBenchmark.class, args); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java index 778259528..be4ecc0c6 100644 --- a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java @@ -43,37 +43,48 @@ import java.util.*; public class FragmentPileupUnitTest extends BaseTest { private static SAMFileHeader header; + public final static List createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) { + SAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen); + SAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen); + + left.setReadPairedFlag(true); + right.setReadPairedFlag(true); + + left.setProperPairFlag(true); + right.setProperPairFlag(true); + + left.setFirstOfPairFlag(leftIsFirst); + right.setFirstOfPairFlag(! leftIsFirst); + + left.setReadNegativeStrandFlag(leftIsNegative); + left.setMateNegativeStrandFlag(!leftIsNegative); + right.setReadNegativeStrandFlag(!leftIsNegative); + right.setMateNegativeStrandFlag(leftIsNegative); + + left.setMateAlignmentStart(right.getAlignmentStart()); + right.setMateAlignmentStart(left.getAlignmentStart()); + + left.setMateReferenceIndex(0); + right.setMateReferenceIndex(0); + + int isize = rightStart + readLen - leftStart; + left.setInferredInsertSize(isize); + right.setInferredInsertSize(-isize); + + return Arrays.asList(left, right); + } + private class FragmentPileupTest extends TestDataProvider { List states = new ArrayList(); private FragmentPileupTest(String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) { super(FragmentPileupTest.class, String.format("%s-leftIsFirst:%b-leftIsNegative:%b", name, leftIsFirst, leftIsNegative)); + List pair = createPair(header, "readpair", readLen, leftStart, rightStart, leftIsFirst, leftIsNegative); + SAMRecord left = pair.get(0); + SAMRecord right = pair.get(1); + for ( int pos = leftStart; pos < rightStart + readLen; pos++) { - SAMRecord left = ArtificialSAMUtils.createArtificialRead(header, "readpair", 0, leftStart, readLen); - SAMRecord right = ArtificialSAMUtils.createArtificialRead(header, "readpair", 0, rightStart, readLen); - - left.setProperPairFlag(true); - right.setProperPairFlag(true); - - left.setFirstOfPairFlag(leftIsFirst); - right.setFirstOfPairFlag(! leftIsFirst); - - left.setReadNegativeStrandFlag(leftIsNegative); - left.setMateNegativeStrandFlag(!leftIsNegative); - right.setReadNegativeStrandFlag(!leftIsNegative); - right.setMateNegativeStrandFlag(leftIsNegative); - - left.setMateAlignmentStart(right.getAlignmentStart()); - right.setMateAlignmentStart(left.getAlignmentStart()); - - left.setMateReferenceIndex(0); - right.setMateReferenceIndex(0); - - int isize = rightStart + readLen - leftStart; - left.setInferredInsertSize(isize); - right.setInferredInsertSize(-isize); - boolean posCoveredByLeft = pos >= left.getAlignmentStart() && pos <= left.getAlignmentEnd(); boolean posCoveredByRight = pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd(); @@ -139,7 +150,6 @@ public class FragmentPileupUnitTest extends BaseTest { } } - @BeforeTest public void setup() { header = ArtificialSAMUtils.createArtificialSamHeader(1,1,1000); From f5d910b8a54b1dbceab506a7260ebaf1d9c4796b Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Sun, 23 Oct 2011 13:29:08 -0400 Subject: [PATCH 15/34] Haplotype caller now sends genotype likelihoods to the exact model to genotype the events found in the best haplotypes. --- .../AlleleFrequencyCalculationModel.java | 6 +- .../genotyper/ExactAFCalculationModel.java | 4 +- .../genotyper/GridSearchAFEstimation.java | 4 +- .../MultiallelicGenotypeLikelihoods.java | 7 +- .../genotyper/UnifiedGenotyperEngine.java | 79 ++++++++++++++++++- 5 files changed, 82 insertions(+), 18 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 1bb697056..35a9fe31d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -68,16 +68,12 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { /** * Must be overridden by concrete subclasses - * @param tracker rod data - * @param ref reference context * @param GLs genotype likelihoods * @param Alleles Alleles corresponding to GLs * @param log10AlleleFrequencyPriors priors * @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results */ - protected abstract void getLog10PNonRef(RefMetaDataTracker tracker, - ReferenceContext ref, - Map GLs, List Alleles, + protected abstract void getLog10PNonRef(Map GLs, List Alleles, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index f87eae781..1c2d82ab7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -51,9 +51,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { super(UAC, N, logger, verboseWriter); } - public void getLog10PNonRef(RefMetaDataTracker tracker, - ReferenceContext ref, - Map GLs, List alleles, + public void getLog10PNonRef(Map GLs, List alleles, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors) { final int numAlleles = alleles.size(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java index f4195e5f0..27842a8bf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java @@ -52,9 +52,7 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel { AFMatrix = new AlleleFrequencyMatrix(N); } - protected void getLog10PNonRef(RefMetaDataTracker tracker, - ReferenceContext ref, - Map GLs, List alleles, + protected void getLog10PNonRef(Map GLs, List alleles, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors) { initializeAFMatrix(GLs); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java index 3652763de..4f378b24a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java @@ -4,6 +4,7 @@ import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.ArrayList; +import java.util.List; /** * Created by IntelliJ IDEA. @@ -15,11 +16,11 @@ import java.util.ArrayList; public class MultiallelicGenotypeLikelihoods { private String sample; private double[] GLs; - private ArrayList alleleList; + private List alleleList; private int depth; public MultiallelicGenotypeLikelihoods(String sample, - ArrayList A, + List A, double[] log10Likelihoods, int depth) { /* Check for consistency between likelihood vector and number of alleles */ int numAlleles = A.size(); @@ -40,7 +41,7 @@ public class MultiallelicGenotypeLikelihoods { return GLs; } - public ArrayList getAlleles() { + public List getAlleles() { return alleleList; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index b72b68f9f..adb423145 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -325,7 +325,7 @@ public class UnifiedGenotyperEngine { // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(tracker, refContext, vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); // find the most likely frequency int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()); @@ -383,7 +383,7 @@ public class UnifiedGenotyperEngine { // the overall lod VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(tracker, refContext, vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); //double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1); //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); @@ -391,7 +391,7 @@ public class UnifiedGenotyperEngine { // the forward lod VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(tracker, refContext, vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1); @@ -400,7 +400,7 @@ public class UnifiedGenotyperEngine { // the reverse lod VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(tracker, refContext, vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1); @@ -447,6 +447,77 @@ public class UnifiedGenotyperEngine { return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); } + public VariantCallContext calculateGenotypes(final VariantContext vc, final GenomeLoc loc, final GenotypeLikelihoodsCalculationModel.Model model) { + + // initialize the data for this thread if that hasn't been done yet + if ( afcm.get() == null ) { + log10AlleleFrequencyPosteriors.set(new double[N+1]); + afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); + } + + // estimate our confidence in a reference call and return + if ( vc.getNSamples() == 0 ) + return null; + + // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) + clearAFarray(log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + + // find the most likely frequency + int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()); + + // calculate p(f>0) + double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()); + double sum = 0.0; + for (int i = 1; i <= N; i++) + sum += normalizedPosteriors[i]; + double PofF = Math.min(sum, 1.0); // deal with precision errors + + double phredScaledConfidence; + if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { + phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]); + if ( Double.isInfinite(phredScaledConfidence) ) + phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0]; + } else { + phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF); + if ( Double.isInfinite(phredScaledConfidence) ) { + sum = 0.0; + for (int i = 1; i <= N; i++) { + if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) + break; + sum += log10AlleleFrequencyPosteriors.get()[i]; + } + phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum); + } + } + + // return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero + if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) { + // technically, at this point our confidence in a reference call isn't accurately estimated + // because it didn't take into account samples with no data, so let's get a better estimate + return null; + } + + // create the genotypes + Map genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess); + + // *** note that calculating strand bias involves overwriting data structures, so we do that last + HashMap attributes = new HashMap(); + + int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc); + + Set myAlleles = new HashSet(vc.getAlleles()); + // strip out the alternate allele if it's a ref call + if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) { + myAlleles = new HashSet(1); + myAlleles.add(vc.getReference()); + } + VariantContext vcCall = new VariantContext("UG_call", loc.getContig(), loc.getStart(), endLoc, + myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes, vc.getReferenceBaseForIndel()); + + return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); + } + private int calculateEndPos(Collection alleles, Allele refAllele, GenomeLoc loc) { // TODO - temp fix until we can deal with extended events properly // for indels, stop location is one more than ref allele length From 166174a551860fa18e8092f3dbe56e0ef28eb884 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 24 Oct 2011 14:04:53 -0400 Subject: [PATCH 18/34] Google caliper example execution script -- FragmentPileup with final performance testing --- .../sting/utils/pileup/FragmentPileup.java | 93 +++++++++++++++++-- .../utils/pileup/FragmentPileupBenchmark.java | 34 ++++--- 2 files changed, 105 insertions(+), 22 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java index f9a94989b..e811db355 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java @@ -30,6 +30,13 @@ public class FragmentPileup { Collection oneReadPile = null; Collection twoReadPile = null; + public enum FragmentMatchingAlgorithm { + ORIGINAL, + FAST_V1, + skipNonOverlapping, + skipNonOverlappingNotLazy + } + /** * Create a new Fragment-based pileup from the standard read-based pileup * @param pileup @@ -39,15 +46,17 @@ public class FragmentPileup { fastNewCalculation(pileup); } - protected FragmentPileup(ReadBackedPileup pileup, boolean useOldAlgorithm) { - if ( useOldAlgorithm ) - oldSlowCalculation(pileup); - else - fastNewCalculation(pileup); + protected FragmentPileup(ReadBackedPileup pileup, FragmentMatchingAlgorithm algorithm) { + switch ( algorithm ) { + case ORIGINAL: oldSlowCalculation(pileup); break; + case FAST_V1: fastNewCalculation(pileup); break; + case skipNonOverlapping: skipNonOverlapping(pileup); break; + case skipNonOverlappingNotLazy: skipNonOverlappingNotLazy(pileup); break; + } } private final void oldSlowCalculation(final ReadBackedPileup pileup) { - final Map nameMap = new HashMap(); + final Map nameMap = new HashMap(pileup.size()); // build an initial map, grabbing all of the multi-read fragments for ( final PileupElement p : pileup ) { @@ -104,7 +113,6 @@ public class FragmentPileup { break; } - // in this case we need to see if our mate is already present, and if so // grab the read from the list case RIGHT_MAYBE: { @@ -120,9 +128,74 @@ public class FragmentPileup { } } - if ( nameMap != null && ! nameMap.isEmpty() ) // could be slightly more optimally - for ( final PileupElement p : nameMap.values() ) - addToOnePile(p); + if ( nameMap != null && ! nameMap.isEmpty() ) { + if ( oneReadPile == null ) + oneReadPile = nameMap.values(); + else + oneReadPile.addAll(nameMap.values()); + } + } + + /** + * @param pileup + */ + private final void skipNonOverlappingNotLazy(final ReadBackedPileup pileup) { + oneReadPile = new ArrayList(pileup.size()); + twoReadPile = new ArrayList(); + final Map nameMap = new HashMap(pileup.size()); + + // build an initial map, grabbing all of the multi-read fragments + for ( final PileupElement p : pileup ) { + // if we know that this read won't overlap its mate, or doesn't have one, jump out early + final SAMRecord read = p.getRead(); + final int mateStart = read.getMateAlignmentStart(); + if ( mateStart == 0 || mateStart > read.getAlignmentEnd() ) { + oneReadPile.add(p); + } else { + final String readName = p.getRead().getReadName(); + final PileupElement pe1 = nameMap.get(readName); + if ( pe1 != null ) { + // assumes we have at most 2 reads per fragment + twoReadPile.add(new TwoReadPileupElement(pe1, p)); + nameMap.remove(readName); + } else { + nameMap.put(readName, p); + } + } + } + + oneReadPile.addAll(nameMap.values()); + } + + private final void skipNonOverlapping(final ReadBackedPileup pileup) { + Map nameMap = null; + + // build an initial map, grabbing all of the multi-read fragments + for ( final PileupElement p : pileup ) { + // if we know that this read won't overlap its mate, or doesn't have one, jump out early + final SAMRecord read = p.getRead(); + final int mateStart = read.getMateAlignmentStart(); + if ( mateStart == 0 || mateStart > read.getAlignmentEnd() ) { + if ( oneReadPile == null ) oneReadPile = new ArrayList(pileup.size()); + oneReadPile.add(p); + } else { + final String readName = p.getRead().getReadName(); + final PileupElement pe1 = nameMap == null ? null : nameMap.get(readName); + if ( pe1 != null ) { + // assumes we have at most 2 reads per fragment + if ( twoReadPile == null ) twoReadPile = new ArrayList(); + twoReadPile.add(new TwoReadPileupElement(pe1, p)); + nameMap.remove(readName); + } else { + nameMap = addToNameMap(nameMap, p); + } + } + } + + if ( oneReadPile == null ) + oneReadPile = nameMap == null ? Collections.emptyList() : nameMap.values(); + else if ( nameMap != null ) + oneReadPile.addAll(nameMap.values()); } private final Map addToNameMap(Map map, final PileupElement p) { diff --git a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java index fe1ca305f..6a5378d46 100644 --- a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java @@ -39,16 +39,17 @@ import java.util.*; * Caliper microbenchmark of fragment pileup */ public class FragmentPileupBenchmark extends SimpleBenchmark { - final int N_PILEUPS_TO_GENERATE = 100; - List pileups = new ArrayList(N_PILEUPS_TO_GENERATE); + List pileups; - @Param({"10", "100", "1000"}) // , "10000"}) + @Param({"0", "4", "30", "150", "1000"}) int pileupSize; // set automatically by framework - @Param({"150", "400"}) + @Param({"200", "400"}) int insertSize; // set automatically by framework @Override protected void setUp() { + final int nPileupsToGenerate = 100; + pileups = new ArrayList(nPileupsToGenerate); SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); GenomeLocParser genomeLocParser; genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); @@ -61,7 +62,7 @@ public class FragmentPileupBenchmark extends SimpleBenchmark { final boolean leftIsNegative = false; final int insertSizeVariation = insertSize / 10; - for ( int pileupN = 0; pileupN < N_PILEUPS_TO_GENERATE; pileupN++ ) { + for ( int pileupN = 0; pileupN < nPileupsToGenerate; pileupN++ ) { List pileupElements = new ArrayList(); for ( int i = 0; i < pileupSize / 2; i++ ) { final String readName = "read" + i; @@ -87,19 +88,28 @@ public class FragmentPileupBenchmark extends SimpleBenchmark { } } - public void timeNaiveNameMatch(int rep) { + private void run(int rep, FragmentPileup.FragmentMatchingAlgorithm algorithm) { int nFrags = 0; for ( int i = 0; i < rep; i++ ) { for ( ReadBackedPileup rbp : pileups ) - nFrags += new FragmentPileup(rbp, true).getTwoReadPileup().size(); + nFrags += new FragmentPileup(rbp, algorithm).getTwoReadPileup().size(); } } - public void timeFastNameMatch(int rep) { - int nFrags = 0; - for ( int i = 0; i < rep; i++ ) - for ( ReadBackedPileup rbp : pileups ) - nFrags += new FragmentPileup(rbp, false).getTwoReadPileup().size(); + public void timeOriginal(int rep) { + run(rep, FragmentPileup.FragmentMatchingAlgorithm.ORIGINAL); + } + + public void timeFullOverlapPotential(int rep) { + run(rep, FragmentPileup.FragmentMatchingAlgorithm.FAST_V1); + } + + public void timeSkipNonOverlapping(int rep) { + run(rep, FragmentPileup.FragmentMatchingAlgorithm.skipNonOverlapping); + } + + public void timeSkipNonOverlappingNotLazy(int rep) { + run(rep, FragmentPileup.FragmentMatchingAlgorithm.skipNonOverlappingNotLazy); } public static void main(String[] args) { From 502592671d7bfdbb9cff0575e94ca3c0946d000d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 24 Oct 2011 14:40:05 -0400 Subject: [PATCH 19/34] Cleanup FragmentPileup before main repo commit -- removed intermiate functions. Now only original version and best optimized new version remain -- Moved general artificial read backed pileup creation code into ArtificialSamUtils --- .../sting/utils/pileup/FragmentPileup.java | 163 +++--------------- .../sting/utils/sam/ArtificialSAMUtils.java | 91 +++++++++- .../sting/utils/sam/ReadUtils.java | 48 ------ .../utils/pileup/FragmentPileupBenchmark.java | 40 +---- .../utils/pileup/FragmentPileupUnitTest.java | 33 +--- 5 files changed, 116 insertions(+), 259 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java index e811db355..4eda7c7cd 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java @@ -14,9 +14,6 @@ import java.util.*; * * Based on the original code by E. Banks * - * TODO -- technically we could generalize this code to support a pseudo-duplicate marking - * TODO -- algorithm that could collect all duplicates into a single super pileup element - * * Oct 21: note that the order of the oneReadPileup and twoReadPileups are not * defined. The algorithms that produce these lists are in fact producing * lists of Pileup elements *NOT* sorted by alignment start position of the underlying @@ -30,11 +27,9 @@ public class FragmentPileup { Collection oneReadPile = null; Collection twoReadPile = null; - public enum FragmentMatchingAlgorithm { + protected enum FragmentMatchingAlgorithm { ORIGINAL, - FAST_V1, skipNonOverlapping, - skipNonOverlappingNotLazy } /** @@ -42,16 +37,14 @@ public class FragmentPileup { * @param pileup */ public FragmentPileup(ReadBackedPileup pileup) { - //oldSlowCalculation(pileup); - fastNewCalculation(pileup); + skipNonOverlapping(pileup); } + /** For performance testing only */ protected FragmentPileup(ReadBackedPileup pileup, FragmentMatchingAlgorithm algorithm) { switch ( algorithm ) { case ORIGINAL: oldSlowCalculation(pileup); break; - case FAST_V1: fastNewCalculation(pileup); break; case skipNonOverlapping: skipNonOverlapping(pileup); break; - case skipNonOverlappingNotLazy: skipNonOverlappingNotLazy(pileup); break; } } @@ -76,58 +69,36 @@ public class FragmentPileup { oneReadPile = nameMap.values(); } - /** - * @param pileup - */ - private final void fastNewCalculation(final ReadBackedPileup pileup) { - Map nameMap = null; // lazy initialization + private final void skipNonOverlapping(final ReadBackedPileup pileup) { + Map nameMap = null; + // build an initial map, grabbing all of the multi-read fragments for ( final PileupElement p : pileup ) { final SAMRecord read = p.getRead(); + final int mateStart = read.getMateAlignmentStart(); - switch (ReadUtils.readMightOverlapMate(read) ) { - // we know for certain this read doesn't have an overlapping mate - case NO: { - addToOnePile(p); - break; - } - - // we know that we overlap our mate, so put the read in the nameMap in - // case our mate shows up - case LEFT_YES: { - nameMap = addToNameMap(nameMap, p); - break; - } - - // read starts at the same position, so we are looking at either the first or - // the second read. In the first, add it to the map, in the second grab it - // from the map and create a fragment - case SAME_START: { - final PileupElement pe1 = getFromNameMap(nameMap, p); - if ( pe1 != null ) { - addToTwoPile(pe1, p); - nameMap.remove(p.getRead().getReadName()); - } else { - nameMap = addToNameMap(nameMap, p); - } - break; - } - - // in this case we need to see if our mate is already present, and if so - // grab the read from the list - case RIGHT_MAYBE: { - final PileupElement pe1 = getFromNameMap(nameMap, p); - if ( pe1 != null ) { - addToTwoPile(pe1, p); - nameMap.remove(p.getRead().getReadName()); - } else { - addToOnePile(p); - } - break; + if ( mateStart == 0 || mateStart > read.getAlignmentEnd() ) { + // if we know that this read won't overlap its mate, or doesn't have one, jump out early + if ( oneReadPile == null ) oneReadPile = new ArrayList(pileup.size()); // lazy init + oneReadPile.add(p); + } else { + // the read might overlap it's mate, or is the rightmost read of a pair + final String readName = p.getRead().getReadName(); + final PileupElement pe1 = nameMap == null ? null : nameMap.get(readName); + if ( pe1 != null ) { + // assumes we have at most 2 reads per fragment + if ( twoReadPile == null ) twoReadPile = new ArrayList(); // lazy init + twoReadPile.add(new TwoReadPileupElement(pe1, p)); + nameMap.remove(readName); + } else { + if ( nameMap == null ) nameMap = new HashMap(pileup.size()); // lazy init + nameMap.put(readName, p); } } } + // add all of the reads that are potentially overlapping but whose mate never showed + // up to the oneReadPile if ( nameMap != null && ! nameMap.isEmpty() ) { if ( oneReadPile == null ) oneReadPile = nameMap.values(); @@ -136,90 +107,6 @@ public class FragmentPileup { } } - /** - * @param pileup - */ - private final void skipNonOverlappingNotLazy(final ReadBackedPileup pileup) { - oneReadPile = new ArrayList(pileup.size()); - twoReadPile = new ArrayList(); - final Map nameMap = new HashMap(pileup.size()); - - // build an initial map, grabbing all of the multi-read fragments - for ( final PileupElement p : pileup ) { - // if we know that this read won't overlap its mate, or doesn't have one, jump out early - final SAMRecord read = p.getRead(); - final int mateStart = read.getMateAlignmentStart(); - if ( mateStart == 0 || mateStart > read.getAlignmentEnd() ) { - oneReadPile.add(p); - } else { - final String readName = p.getRead().getReadName(); - final PileupElement pe1 = nameMap.get(readName); - if ( pe1 != null ) { - // assumes we have at most 2 reads per fragment - twoReadPile.add(new TwoReadPileupElement(pe1, p)); - nameMap.remove(readName); - } else { - nameMap.put(readName, p); - } - } - } - - oneReadPile.addAll(nameMap.values()); - } - - private final void skipNonOverlapping(final ReadBackedPileup pileup) { - Map nameMap = null; - - // build an initial map, grabbing all of the multi-read fragments - for ( final PileupElement p : pileup ) { - // if we know that this read won't overlap its mate, or doesn't have one, jump out early - final SAMRecord read = p.getRead(); - final int mateStart = read.getMateAlignmentStart(); - if ( mateStart == 0 || mateStart > read.getAlignmentEnd() ) { - if ( oneReadPile == null ) oneReadPile = new ArrayList(pileup.size()); - oneReadPile.add(p); - } else { - final String readName = p.getRead().getReadName(); - final PileupElement pe1 = nameMap == null ? null : nameMap.get(readName); - if ( pe1 != null ) { - // assumes we have at most 2 reads per fragment - if ( twoReadPile == null ) twoReadPile = new ArrayList(); - twoReadPile.add(new TwoReadPileupElement(pe1, p)); - nameMap.remove(readName); - } else { - nameMap = addToNameMap(nameMap, p); - } - } - } - - if ( oneReadPile == null ) - oneReadPile = nameMap == null ? Collections.emptyList() : nameMap.values(); - else if ( nameMap != null ) - oneReadPile.addAll(nameMap.values()); - } - - private final Map addToNameMap(Map map, final PileupElement p) { - if ( map == null ) map = new HashMap(); - map.put(p.getRead().getReadName(), p); - return map; - } - - private final PileupElement getFromNameMap(Map map, final PileupElement p) { - return map == null ? null : map.get(p.getRead().getReadName()); - } - - - private final void addToOnePile(final PileupElement p) { - if ( oneReadPile == null ) oneReadPile = new ArrayList(); - oneReadPile.add(p); - } - - private final void addToTwoPile(final PileupElement p1, final PileupElement p2) { - // assumes we have at most 2 reads per fragment - if ( twoReadPile == null ) twoReadPile = new ArrayList(); - twoReadPile.add(new TwoReadPileupElement(p1, p2)); - } - /** * Gets the pileup elements containing two reads, in no particular order * diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java index 4eba32383..1b3641128 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java @@ -2,11 +2,15 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.*; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import java.io.File; -import java.util.ArrayList; -import java.util.List; +import java.util.*; /** * @author aaron @@ -29,7 +33,7 @@ public class ArtificialSAMUtils { File outFile = new File(filename); SAMFileWriter out = new SAMFileWriterFactory().makeBAMWriter(header, true, outFile); - + for (int x = startingChromosome; x < startingChromosome + numberOfChromosomes; x++) { for (int readNumber = 1; readNumber < readsPerChomosome; readNumber++) { out.addAlignment(createArtificialRead(header, "Read_" + readNumber, x - startingChromosome, readNumber, DEFAULT_READ_LENGTH)); @@ -145,7 +149,7 @@ public class ArtificialSAMUtils { */ public static GATKSAMRecord createArtificialRead(SAMFileHeader header, String name, int refIndex, int alignmentStart, int length) { if( (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart != SAMRecord.NO_ALIGNMENT_START) || - (refIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart == SAMRecord.NO_ALIGNMENT_START) ) + (refIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart == SAMRecord.NO_ALIGNMENT_START) ) throw new ReviewedStingException("Invalid alignment start for artificial read, start = " + alignmentStart); GATKSAMRecord record = new GATKSAMRecord(header); record.setReadName(name); @@ -197,6 +201,37 @@ public class ArtificialSAMUtils { return rec; } + public final static List createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) { + SAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen); + SAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen); + + left.setReadPairedFlag(true); + right.setReadPairedFlag(true); + + left.setProperPairFlag(true); + right.setProperPairFlag(true); + + left.setFirstOfPairFlag(leftIsFirst); + right.setFirstOfPairFlag(! leftIsFirst); + + left.setReadNegativeStrandFlag(leftIsNegative); + left.setMateNegativeStrandFlag(!leftIsNegative); + right.setReadNegativeStrandFlag(!leftIsNegative); + right.setMateNegativeStrandFlag(leftIsNegative); + + left.setMateAlignmentStart(right.getAlignmentStart()); + right.setMateAlignmentStart(left.getAlignmentStart()); + + left.setMateReferenceIndex(0); + right.setMateReferenceIndex(0); + + int isize = rightStart + readLen - leftStart; + left.setInferredInsertSize(isize); + right.setInferredInsertSize(-isize); + + return Arrays.asList(left, right); + } + /** * create an iterator containing the specified read piles * @@ -258,4 +293,52 @@ public class ArtificialSAMUtils { return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header); } + + private final static int ranIntInclusive(Random ran, int start, int stop) { + final int range = stop - start; + return ran.nextInt(range) + start; + } + + /** + * Creates a read backed pileup containing up to pileupSize reads at refID 0 from header at loc with + * reads created that have readLen bases. Pairs are sampled from a gaussian distribution with mean insert + * size of insertSize and variation of insertSize / 10. The first read will be in the pileup, and the second + * may be, depending on where this sampled insertSize puts it. + * @param header + * @param loc + * @param readLen + * @param insertSize + * @param pileupSize + * @return + */ + public static ReadBackedPileup createReadBackedPileup(final SAMFileHeader header, final GenomeLoc loc, final int readLen, final int insertSize, final int pileupSize) { + final Random ran = new Random(); + final boolean leftIsFirst = true; + final boolean leftIsNegative = false; + final int insertSizeVariation = insertSize / 10; + final int pos = loc.getStart(); + + final List pileupElements = new ArrayList(); + for ( int i = 0; i < pileupSize / 2; i++ ) { + final String readName = "read" + i; + final int leftStart = ranIntInclusive(ran, 1, pos); + final int fragmentSize = (int)(ran.nextGaussian() * insertSizeVariation + insertSize); + final int rightStart = leftStart + fragmentSize - readLen; + + if ( rightStart <= 0 ) continue; + + List pair = createPair(header, readName, readLen, leftStart, rightStart, leftIsFirst, leftIsNegative); + final SAMRecord left = pair.get(0); + final SAMRecord right = pair.get(1); + + pileupElements.add(new PileupElement(left, pos - leftStart)); + + if ( pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd() ) { + pileupElements.add(new PileupElement(right, pos - rightStart)); + } + } + + Collections.sort(pileupElements); + return new ReadBackedPileupImpl(loc, pileupElements); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index f093e6539..f8e4927ed 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -214,54 +214,6 @@ public class ReadUtils { return state; } - /** - * s1 e1 - * |-----------------------> [record in hand] - * s2 - * <-----------------------| - * - * s1, e1, and s2 are all in the record. Assuming that s1 < s2 (we are the left most read), - * we can compute whether we overlap with our mate by seeing if s2 <= e1 or no. If e1 < - * s2 then we known that we cannot over. - * - * If we are looking at the right read - * - * s1 - * |-----------------------> - * s2 e2 - * <-----------------------| [record in hand] - * - * we know the position of s1 and s2, but we don't know e1, so we cannot tell if we - * overlap with our mate or not, so in this case we return MAYBE. - * - * Note that if rec has an unmapped mate or is unpaired we certainly know the answer - * - * @param rec - * @return - */ - public static ReadOverlapsMateType readMightOverlapMate(final SAMRecord rec) { - if ( ! rec.getReadPairedFlag() || rec.getMateUnmappedFlag() ) { - return ReadOverlapsMateType.NO; - } else { // read is actually paired - final int recStart = rec.getAlignmentStart(); - final int recEnd = rec.getAlignmentEnd(); - final int mateStart = rec.getMateAlignmentStart(); - - if ( recStart < mateStart ) { - // we are the left most read - return mateStart <= recEnd ? ReadOverlapsMateType.LEFT_YES: ReadOverlapsMateType.NO; - } else if ( recStart == mateStart ) { - // we are the left most read - return ReadOverlapsMateType.SAME_START; - } else { - // we are the right most read, so we cannot tell - return ReadOverlapsMateType.RIGHT_MAYBE; - } - } - } - - public enum ReadOverlapsMateType { LEFT_YES, NO, SAME_START, RIGHT_MAYBE } - private static Pair getAdaptorBoundaries(SAMRecord rec, int adaptorLength) { int isize = rec.getInferredInsertSize(); if ( isize == 0 ) diff --git a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java index 6a5378d46..8b797def4 100644 --- a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java @@ -53,38 +53,12 @@ public class FragmentPileupBenchmark extends SimpleBenchmark { SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); GenomeLocParser genomeLocParser; genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); - final int pos = 50; - GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", pos); - - final Random ran = new Random(); + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 50); final int readLen = 100; - final boolean leftIsFirst = true; - final boolean leftIsNegative = false; - final int insertSizeVariation = insertSize / 10; for ( int pileupN = 0; pileupN < nPileupsToGenerate; pileupN++ ) { - List pileupElements = new ArrayList(); - for ( int i = 0; i < pileupSize / 2; i++ ) { - final String readName = "read" + i; - final int leftStart = new Random().nextInt(49) + 1; - final int fragmentSize = (int)(ran.nextGaussian() * insertSizeVariation + insertSize); - final int rightStart = leftStart + fragmentSize - readLen; - - if ( rightStart <= 0 ) continue; - - List pair = FragmentPileupUnitTest.createPair(header, readName, readLen, leftStart, rightStart, leftIsFirst, leftIsNegative); - SAMRecord left = pair.get(0); - SAMRecord right = pair.get(1); - - pileupElements.add(new PileupElement(left, pos - leftStart)); - - if ( pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd() ) { - pileupElements.add(new PileupElement(right, pos - rightStart)); - } - } - - Collections.sort(pileupElements); - pileups.add(new ReadBackedPileupImpl(loc, pileupElements)); + ReadBackedPileup rbp = ArtificialSAMUtils.createReadBackedPileup(header, loc, readLen, insertSize, pileupSize); + pileups.add(rbp); } } @@ -100,18 +74,10 @@ public class FragmentPileupBenchmark extends SimpleBenchmark { run(rep, FragmentPileup.FragmentMatchingAlgorithm.ORIGINAL); } - public void timeFullOverlapPotential(int rep) { - run(rep, FragmentPileup.FragmentMatchingAlgorithm.FAST_V1); - } - public void timeSkipNonOverlapping(int rep) { run(rep, FragmentPileup.FragmentMatchingAlgorithm.skipNonOverlapping); } - public void timeSkipNonOverlappingNotLazy(int rep) { - run(rep, FragmentPileup.FragmentMatchingAlgorithm.skipNonOverlappingNotLazy); - } - public static void main(String[] args) { CaliperMain.main(FragmentPileupBenchmark.class, args); } diff --git a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java index be4ecc0c6..c42c01c65 100644 --- a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java @@ -43,44 +43,13 @@ import java.util.*; public class FragmentPileupUnitTest extends BaseTest { private static SAMFileHeader header; - public final static List createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) { - SAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen); - SAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen); - - left.setReadPairedFlag(true); - right.setReadPairedFlag(true); - - left.setProperPairFlag(true); - right.setProperPairFlag(true); - - left.setFirstOfPairFlag(leftIsFirst); - right.setFirstOfPairFlag(! leftIsFirst); - - left.setReadNegativeStrandFlag(leftIsNegative); - left.setMateNegativeStrandFlag(!leftIsNegative); - right.setReadNegativeStrandFlag(!leftIsNegative); - right.setMateNegativeStrandFlag(leftIsNegative); - - left.setMateAlignmentStart(right.getAlignmentStart()); - right.setMateAlignmentStart(left.getAlignmentStart()); - - left.setMateReferenceIndex(0); - right.setMateReferenceIndex(0); - - int isize = rightStart + readLen - leftStart; - left.setInferredInsertSize(isize); - right.setInferredInsertSize(-isize); - - return Arrays.asList(left, right); - } - private class FragmentPileupTest extends TestDataProvider { List states = new ArrayList(); private FragmentPileupTest(String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) { super(FragmentPileupTest.class, String.format("%s-leftIsFirst:%b-leftIsNegative:%b", name, leftIsFirst, leftIsNegative)); - List pair = createPair(header, "readpair", readLen, leftStart, rightStart, leftIsFirst, leftIsNegative); + List pair = ArtificialSAMUtils.createPair(header, "readpair", readLen, leftStart, rightStart, leftIsFirst, leftIsNegative); SAMRecord left = pair.get(0); SAMRecord right = pair.get(1); From 89a581a66f2c6ae1437301ffa6ab7243ebc5ca32 Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Fri, 14 Oct 2011 12:06:41 -0400 Subject: [PATCH 20/34] Added ability to specify arguments in files via -args/--arg_file Pushing back downsample and read filter args so they show up in getApproximateCommandLineArgs() --- .../sting/commandline/ArgumentMatch.java | 83 ++++++++-------- .../sting/commandline/ArgumentMatchSite.java | 76 ++++++++++++++ .../commandline/ArgumentMatchSource.java | 98 +++++++++++++++++++ .../commandline/ArgumentMatchSourceType.java | 32 ++++++ .../sting/commandline/ArgumentMatches.java | 16 +-- .../sting/commandline/CommandLineProgram.java | 14 +-- .../sting/commandline/ParsingEngine.java | 93 +++++++++++++++--- .../sting/commandline/ParsingMethod.java | 6 +- .../sting/gatk/GenomeAnalysisEngine.java | 31 ++++-- .../arguments/GATKArgumentCollection.java | 16 ++- .../sting/utils/help/HelpFormatter.java | 33 +++++-- .../ArgumentMatchSiteUnitTest.java | 79 +++++++++++++++ .../ArgumentMatchSourceUnitTest.java | 98 +++++++++++++++++++ .../commandline/ParsingEngineUnitTest.java | 26 +++++ 14 files changed, 610 insertions(+), 91 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSite.java create mode 100644 public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java create mode 100644 public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java create mode 100644 public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java create mode 100644 public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java index 351583c07..c0823e5c5 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java @@ -46,7 +46,7 @@ public class ArgumentMatch implements Iterable { /** * Maps indices of command line arguments to values paired with that argument. */ - public final SortedMap> indices = new TreeMap>(); + public final SortedMap> sites = new TreeMap>(); /** * An ordered, freeform collection of tags. @@ -72,32 +72,32 @@ public class ArgumentMatch implements Iterable { } /** - * A simple way of indicating that an argument with the given label and definition exists at this index. + * A simple way of indicating that an argument with the given label and definition exists at this site. * @param label Label of the argument match. Must not be null. * @param definition The associated definition, if one exists. May be null. - * @param index Position of the argument. Must not be null. + * @param site Position of the argument. Must not be null. * @param tags ordered freeform text tags associated with this argument. */ - public ArgumentMatch(final String label, final ArgumentDefinition definition, final int index, final Tags tags) { - this( label, definition, index, null, tags ); + public ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final Tags tags) { + this( label, definition, site, null, tags ); } /** - * A simple way of indicating that an argument with the given label and definition exists at this index. + * A simple way of indicating that an argument with the given label and definition exists at this site. * @param label Label of the argument match. Must not be null. * @param definition The associated definition, if one exists. May be null. - * @param index Position of the argument. Must not be null. + * @param site Position of the argument. Must not be null. * @param value Value for the argument at this position. * @param tags ordered freeform text tags associated with this argument. */ - private ArgumentMatch(final String label, final ArgumentDefinition definition, final int index, final String value, final Tags tags) { + private ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final String value, final Tags tags) { this.label = label; this.definition = definition; ArrayList values = new ArrayList(); if( value != null ) values.add(value); - indices.put(index,values ); + sites.put(site,values ); this.tags = tags; } @@ -117,7 +117,7 @@ public class ArgumentMatch implements Iterable { ArgumentMatch otherArgumentMatch = (ArgumentMatch)other; return this.definition.equals(otherArgumentMatch.definition) && this.label.equals(otherArgumentMatch.label) && - this.indices.equals(otherArgumentMatch.indices) && + this.sites.equals(otherArgumentMatch.sites) && this.tags.equals(otherArgumentMatch.tags); } @@ -129,16 +129,17 @@ public class ArgumentMatch implements Iterable { * @param key Key which specifies the transform. * @return A variant of this ArgumentMatch with all keys transformed. */ + @SuppressWarnings("unchecked") ArgumentMatch transform(Multiplexer multiplexer, Object key) { - SortedMap> newIndices = new TreeMap>(); - for(Map.Entry> index: indices.entrySet()) { + SortedMap> newIndices = new TreeMap>(); + for(Map.Entry> site: sites.entrySet()) { List newEntries = new ArrayList(); - for(String entry: index.getValue()) + for(String entry: site.getValue()) newEntries.add(multiplexer.transformArgument(key,entry)); - newIndices.put(index.getKey(),newEntries); + newIndices.put(site.getKey(),newEntries); } ArgumentMatch newArgumentMatch = new ArgumentMatch(label,definition); - newArgumentMatch.indices.putAll(newIndices); + newArgumentMatch.sites.putAll(newIndices); return newArgumentMatch; } @@ -157,9 +158,9 @@ public class ArgumentMatch implements Iterable { public Iterator iterator() { return new Iterator() { /** - * Iterate over each the available index. + * Iterate over each the available site. */ - private Iterator indexIterator = null; + private Iterator siteIterator = null; /** * Iterate over each available token. @@ -167,9 +168,9 @@ public class ArgumentMatch implements Iterable { private Iterator tokenIterator = null; /** - * The next index to return. Null if none remain. + * The next site to return. Null if none remain. */ - Integer nextIndex = null; + ArgumentMatchSite nextSite = null; /** * The next token to return. Null if none remain. @@ -177,7 +178,7 @@ public class ArgumentMatch implements Iterable { String nextToken = null; { - indexIterator = indices.keySet().iterator(); + siteIterator = sites.keySet().iterator(); prepareNext(); } @@ -186,7 +187,7 @@ public class ArgumentMatch implements Iterable { * @return True if there's another token waiting in the wings. False otherwise. */ public boolean hasNext() { - return nextToken != null; + return nextToken != null; } /** @@ -194,32 +195,32 @@ public class ArgumentMatch implements Iterable { * @return The next ArgumentMatch in the series. Should never be null. */ public ArgumentMatch next() { - if( nextIndex == null || nextToken == null ) + if( nextSite == null || nextToken == null ) throw new IllegalStateException( "No more ArgumentMatches are available" ); - ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken, tags ); + ArgumentMatch match = new ArgumentMatch( label, definition, nextSite, nextToken, tags ); prepareNext(); return match; } /** * Initialize the next ArgumentMatch to return. If no ArgumentMatches are available, - * initialize nextIndex / nextToken to null. + * initialize nextSite / nextToken to null. */ private void prepareNext() { if( tokenIterator != null && tokenIterator.hasNext() ) { nextToken = tokenIterator.next(); } else { - nextIndex = null; + nextSite = null; nextToken = null; // Do a nested loop. While more data is present in the inner loop, grab that data. // Otherwise, troll the outer iterator looking for more data. - while( indexIterator.hasNext() ) { - nextIndex = indexIterator.next(); - if( indices.get(nextIndex) != null ) { - tokenIterator = indices.get(nextIndex).iterator(); + while( siteIterator.hasNext() ) { + nextSite = siteIterator.next(); + if( sites.get(nextSite) != null ) { + tokenIterator = sites.get(nextSite).iterator(); if( tokenIterator.hasNext() ) { nextToken = tokenIterator.next(); break; @@ -245,29 +246,29 @@ public class ArgumentMatch implements Iterable { * @param other The other match to merge into. */ public void mergeInto( ArgumentMatch other ) { - indices.putAll(other.indices); + sites.putAll(other.sites); } /** * Associate a value with this merge maapping. - * @param index index of the command-line argument to which this value is mated. + * @param site site of the command-line argument to which this value is mated. * @param value Text representation of value to add. */ - public void addValue( int index, String value ) { - if( !indices.containsKey(index) || indices.get(index) == null ) - indices.put(index, new ArrayList() ); - indices.get(index).add(value); + public void addValue( ArgumentMatchSite site, String value ) { + if( !sites.containsKey(site) || sites.get(site) == null ) + sites.put(site, new ArrayList() ); + sites.get(site).add(value); } /** * Does this argument already have a value at the given site? * Arguments are only allowed to be single-valued per site, and * flags aren't allowed a value at all. - * @param index Index at which to check for values. + * @param site Site at which to check for values. * @return True if the argument has a value at the given site. False otherwise. */ - public boolean hasValueAtSite( int index ) { - return (indices.get(index) != null && indices.get(index).size() >= 1) || isArgumentFlag(); + public boolean hasValueAtSite( ArgumentMatchSite site ) { + return (sites.get(site) != null && sites.get(site).size() >= 1) || isArgumentFlag(); } /** @@ -276,9 +277,9 @@ public class ArgumentMatch implements Iterable { */ public List values() { List values = new ArrayList(); - for( int index: indices.keySet() ) { - if( indices.get(index) != null ) - values.addAll(indices.get(index)); + for( ArgumentMatchSite site: sites.keySet() ) { + if( sites.get(site) != null ) + values.addAll(sites.get(site)); } return values; } diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSite.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSite.java new file mode 100644 index 000000000..8a4120101 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSite.java @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.commandline; + +/** + * Which source and the index within the source where an argument match was found. + */ +public class ArgumentMatchSite implements Comparable { + private final ArgumentMatchSource source; + private final int index; + + public ArgumentMatchSite(ArgumentMatchSource source, int index) { + this.source = source; + this.index = index; + } + + public ArgumentMatchSource getSource() { + return source; + } + + public int getIndex() { + return index; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + ArgumentMatchSite that = (ArgumentMatchSite) o; + + return (index == that.index) && (source == null ? that.source == null : source.equals(that.source)); + } + + @Override + public int hashCode() { + int result = source != null ? source.hashCode() : 0; + // Generated by intellij. No other special reason to this implementation. -ks + result = 31 * result + index; + return result; + } + + @Override + public int compareTo(ArgumentMatchSite that) { + int comp = this.source.compareTo(that.source); + if (comp != 0) + return comp; + + // Both files are the same. + if (this.index == that.index) + return 0; + return this.index < that.index ? -1 : 1; + } +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java new file mode 100644 index 000000000..ed2700006 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.commandline; + +import java.io.File; + +/** + * Where an argument match originated, via the commandline or a file. + */ +public class ArgumentMatchSource implements Comparable { + public static final ArgumentMatchSource COMMAND_LINE = new ArgumentMatchSource(ArgumentMatchSourceType.CommandLine, null); + + private final ArgumentMatchSourceType type; + private final File file; + + /** + * Creates an argument match source from the specified file. + * @param file File specifying the arguments. Must not be null. + */ + public ArgumentMatchSource(File file) { + this(ArgumentMatchSourceType.File, file); + } + + private ArgumentMatchSource(ArgumentMatchSourceType type, File file) { + if (type == ArgumentMatchSourceType.File && file == null) + throw new IllegalArgumentException("An argument match source of type File cannot have a null file."); + this.type = type; + this.file = file; + } + + public ArgumentMatchSourceType getType() { + return type; + } + + public File getFile() { + return file; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + ArgumentMatchSource that = (ArgumentMatchSource) o; + + return (type == that.type) && (file == null ? that.file == null : file.equals(that.file)); + } + + @Override + public int hashCode() { + int result = type != null ? type.hashCode() : 0; + result = 31 * result + (file != null ? file.hashCode() : 0); + return result; + } + + /** + * Compares two sources, putting the command line first, then files. + */ + @Override + public int compareTo(ArgumentMatchSource that) { + int comp = this.type.compareTo(that.type); + if (comp != 0) + return comp; + + File f1 = this.file; + File f2 = that.file; + + if ((f1 == null) ^ (f2 == null)) { + // If one of the files is null and the other is not + // put the null file first + return f1 == null ? -1 : 1; + } + + return f1 == null ? 0 : f1.compareTo(f2); + } +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java new file mode 100644 index 000000000..3ff6e21d4 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.commandline; + +/** + * Type of where an argument match originated, via the commandline or a file. + */ +public enum ArgumentMatchSourceType { + CommandLine, File +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java index 52d3b8232..3da28c420 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java @@ -37,7 +37,7 @@ public class ArgumentMatches implements Iterable { * Collection matches from argument definition to argument value. * Package protected access is deliberate. */ - Map argumentMatches = new TreeMap(); + Map argumentMatches = new TreeMap(); /** * Provide a place to put command-line argument values that don't seem to belong to @@ -80,7 +80,7 @@ public class ArgumentMatches implements Iterable { * @param site Site at which to check. * @return True if the site has a match. False otherwise. */ - boolean hasMatch( int site ) { + boolean hasMatch( ArgumentMatchSite site ) { return argumentMatches.containsKey( site ); } @@ -90,7 +90,7 @@ public class ArgumentMatches implements Iterable { * @return The match present at the given site. * @throws IllegalArgumentException if site does not contain a match. */ - ArgumentMatch getMatch( int site ) { + ArgumentMatch getMatch( ArgumentMatchSite site ) { if( !argumentMatches.containsKey(site) ) throw new IllegalArgumentException( "Site does not contain an argument: " + site ); return argumentMatches.get(site); @@ -107,6 +107,7 @@ public class ArgumentMatches implements Iterable { /** * Return all argument matches of this source. + * @param parsingEngine Parsing engine. * @param argumentSource Argument source to match. * @return List of all matches. */ @@ -167,6 +168,7 @@ public class ArgumentMatches implements Iterable { * TODO: Generify this. * @param multiplexer Multiplexer that controls the transformation process. * @param key Key which specifies the transform. + * @return new argument matches. */ ArgumentMatches transform(Multiplexer multiplexer, Object key) { ArgumentMatches newArgumentMatches = new ArgumentMatches(); @@ -187,15 +189,15 @@ public class ArgumentMatches implements Iterable { for( ArgumentMatch argumentMatch: getUniqueMatches() ) { if( argumentMatch.definition == match.definition && argumentMatch.tags.equals(match.tags) ) { argumentMatch.mergeInto( match ); - for( int index: match.indices.keySet() ) - argumentMatches.put( index, argumentMatch ); + for( ArgumentMatchSite site: match.sites.keySet() ) + argumentMatches.put( site, argumentMatch ); definitionExists = true; } } if( !definitionExists ) { - for( int index: match.indices.keySet() ) - argumentMatches.put( index, match ); + for( ArgumentMatchSite site: match.sites.keySet() ) + argumentMatches.put( site, match ); } } diff --git a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java index d88e7030e..bed1e710e 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java +++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java @@ -35,10 +35,7 @@ import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.utils.help.HelpFormatter; import java.io.IOException; -import java.util.Collection; -import java.util.Collections; -import java.util.EnumSet; -import java.util.Locale; +import java.util.*; public abstract class CommandLineProgram { @@ -155,6 +152,7 @@ public abstract class CommandLineProgram { * * @param clp the command line program to execute * @param args the command line arguments passed in + * @param dryRun dry run * @throws Exception when an exception occurs */ @SuppressWarnings("unchecked") @@ -176,6 +174,8 @@ public abstract class CommandLineProgram { ParsingEngine parser = clp.parser = new ParsingEngine(clp); parser.addArgumentSource(clp.getClass()); + Map> parsedArgs; + // process the args if (clp.canAddArgumentsDynamically()) { // if the command-line program can toss in extra args, fetch them and reparse the arguments. @@ -196,14 +196,14 @@ public abstract class CommandLineProgram { Class[] argumentSources = clp.getArgumentSources(); for (Class argumentSource : argumentSources) parser.addArgumentSource(clp.getArgumentSourceName(argumentSource), argumentSource); - parser.parse(args); + parsedArgs = parser.parse(args); if (isHelpPresent(parser)) printHelpAndExit(clp, parser); if ( ! dryRun ) parser.validate(); } else { - parser.parse(args); + parsedArgs = parser.parse(args); if ( ! dryRun ) { if (isHelpPresent(parser)) @@ -230,7 +230,7 @@ public abstract class CommandLineProgram { } // regardless of what happens next, generate the header information - HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args); + HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), parsedArgs); // call the execute CommandLineProgram.result = clp.execute(); diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index fbf8c6516..c19960355 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.commandline; import com.google.java.contract.Requires; +import org.apache.commons.io.FileUtils; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.JVMUtils; @@ -35,6 +36,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.utils.help.HelpFormatter; +import java.io.File; +import java.io.IOException; import java.lang.reflect.Field; import java.util.*; @@ -100,6 +103,8 @@ public class ParsingEngine { if(clp != null) argumentTypeDescriptors.addAll(clp.getArgumentTypeDescriptors()); argumentTypeDescriptors.addAll(STANDARD_ARGUMENT_TYPE_DESCRIPTORS); + + addArgumentSource(ParsingEngineArgumentFiles.class); } /** @@ -148,21 +153,43 @@ public class ParsingEngine { * command-line arguments to the arguments that are actually * required. * @param tokens Tokens passed on the command line. + * @return The parsed arguments by file. */ - public void parse( String[] tokens ) { + public SortedMap> parse( String[] tokens ) { argumentMatches = new ArgumentMatches(); + SortedMap> parsedArgs = new TreeMap>(); - int lastArgumentMatchSite = -1; + List cmdLineTokens = Arrays.asList(tokens); + parse(ArgumentMatchSource.COMMAND_LINE, cmdLineTokens, argumentMatches, parsedArgs); - for( int i = 0; i < tokens.length; i++ ) { - String token = tokens[i]; + ParsingEngineArgumentFiles argumentFiles = new ParsingEngineArgumentFiles(); + + // Load the arguments ONLY into the argument files. + // Validation may optionally run on the rest of the arguments. + loadArgumentsIntoObject(argumentFiles); + + for (File file: argumentFiles.files) { + List fileTokens = getArguments(file); + parse(new ArgumentMatchSource(file), fileTokens, argumentMatches, parsedArgs); + } + + return parsedArgs; + } + + private void parse(ArgumentMatchSource matchSource, List tokens, + ArgumentMatches argumentMatches, SortedMap> parsedArgs) { + ArgumentMatchSite lastArgumentMatchSite = new ArgumentMatchSite(matchSource, -1); + + int i = 0; + for (String token: tokens) { // If the token is of argument form, parse it into its own argument match. // Otherwise, pair it with the most recently used argument discovered. + ArgumentMatchSite site = new ArgumentMatchSite(matchSource, i); if( isArgumentForm(token) ) { - ArgumentMatch argumentMatch = parseArgument( token, i ); + ArgumentMatch argumentMatch = parseArgument( token, site ); if( argumentMatch != null ) { argumentMatches.mergeInto( argumentMatch ); - lastArgumentMatchSite = i; + lastArgumentMatchSite = site; } } else { @@ -170,10 +197,31 @@ public class ParsingEngine { !argumentMatches.getMatch(lastArgumentMatchSite).hasValueAtSite(lastArgumentMatchSite)) argumentMatches.getMatch(lastArgumentMatchSite).addValue( lastArgumentMatchSite, token ); else - argumentMatches.MissingArgument.addValue( i, token ); + argumentMatches.MissingArgument.addValue( site, token ); } + i++; } + + parsedArgs.put(matchSource, tokens); + } + + private List getArguments(File file) { + try { + if (file.getAbsolutePath().endsWith(".list")) { + return getListArguments(file); + } + } catch (IOException e) { + throw new UserException.CouldNotReadInputFile(file, e); + } + throw new UserException.CouldNotReadInputFile(file, "file extension is not .list"); + } + + private List getListArguments(File file) throws IOException { + ArrayList argsList = new ArrayList(); + for (String line: FileUtils.readLines(file)) + argsList.addAll(Arrays.asList(Utils.escapeExpressions(line))); + return argsList; } public enum ValidationType { MissingRequiredArgument, @@ -494,7 +542,7 @@ public class ParsingEngine { * @param position The position of the token in question. * @return ArgumentMatch associated with this token, or null if no match exists. */ - private ArgumentMatch parseArgument( String token, int position ) { + private ArgumentMatch parseArgument( String token, ArgumentMatchSite position ) { if( !isArgumentForm(token) ) throw new IllegalArgumentException( "Token is not recognizable as an argument: " + token ); @@ -579,9 +627,21 @@ class UnmatchedArgumentException extends ArgumentException { private static String formatArguments( ArgumentMatch invalidValues ) { StringBuilder sb = new StringBuilder(); - for( int index: invalidValues.indices.keySet() ) - for( String value: invalidValues.indices.get(index) ) { - sb.append( String.format("%nInvalid argument value '%s' at position %d.", value, index) ); + for( ArgumentMatchSite site: invalidValues.sites.keySet() ) + for( String value: invalidValues.sites.get(site) ) { + switch (site.getSource().getType()) { + case CommandLine: + sb.append( String.format("%nInvalid argument value '%s' at position %d.", + value, site.getIndex()) ); + break; + case File: + sb.append( String.format("%nInvalid argument value '%s' in file %s at position %d.", + value, site.getSource().getFile().getAbsolutePath(), site.getIndex()) ); + break; + default: + throw new RuntimeException( String.format("Unexpected argument match source type: %s", + site.getSource().getType())); + } if(value != null && Utils.dupString(' ',value.length()).equals(value)) sb.append(" Please make sure any line continuation backslashes on your command line are not followed by whitespace."); } @@ -634,4 +694,13 @@ class UnknownEnumeratedValueException extends ArgumentException { private static String formatArguments(ArgumentDefinition definition, String argumentPassed) { return String.format("Invalid value %s specified for argument %s; valid options are (%s).", argumentPassed, definition.fullName, Utils.join(",",definition.validOptions)); } -} \ No newline at end of file +} + +/** + * Container class to store the list of argument files. + * The files will be parsed after the command line arguments. + */ +class ParsingEngineArgumentFiles { + @Argument(fullName = "arg_file", shortName = "args", doc = "Reads arguments from the specified file", required = false) + public List files = new ArrayList(); +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java index a070cb5a1..452309e89 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java @@ -68,7 +68,7 @@ public abstract class ParsingMethod { * @return An argument match. Definition field will be populated if a match was found or * empty if no appropriate definition could be found. */ - public ArgumentMatch match( ArgumentDefinitions definitions, String token, int position ) { + public ArgumentMatch match( ArgumentDefinitions definitions, String token, ArgumentMatchSite position ) { // If the argument is valid, parse out the argument. Matcher matcher = pattern.matcher(token); @@ -102,9 +102,7 @@ public abstract class ParsingMethod { // Try to find a matching argument. If found, label that as the match. If not found, add the argument // with a null definition. - ArgumentMatch argumentMatch = new ArgumentMatch(argument,argumentDefinition,position,tags); - - return argumentMatch; + return new ArgumentMatch(argument,argumentDefinition,position,tags); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 7bc3daa9a..fb0dcc6cd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -220,12 +220,12 @@ public class GenomeAnalysisEngine { ShardStrategy shardStrategy = getShardStrategy(readsDataSource,microScheduler.getReference(),intervals); // execute the microscheduler, storing the results - Object result = microScheduler.execute(this.walker, shardStrategy); + return microScheduler.execute(this.walker, shardStrategy); //monitor.stop(); //logger.info(String.format("Maximum heap size consumed: %d",monitor.getMaxMemoryUsed())); - return result; + //return result; } /** @@ -296,10 +296,14 @@ public class GenomeAnalysisEngine { else if(WalkerManager.getDownsamplingMethod(walker) != null) method = WalkerManager.getDownsamplingMethod(walker); else - method = argCollection.getDefaultDownsamplingMethod(); + method = GATKArgumentCollection.getDefaultDownsamplingMethod(); return method; } + protected void setDownsamplingMethod(DownsamplingMethod method) { + argCollection.setDownsamplingMethod(method); + } + public BAQ.QualityMode getWalkerBAQQualityMode() { return WalkerManager.getBAQQualityMode(walker); } public BAQ.ApplicationTime getWalkerBAQApplicationTime() { return WalkerManager.getBAQApplicationTime(walker); } @@ -389,7 +393,9 @@ public class GenomeAnalysisEngine { /** * Get the sharding strategy given a driving data source. * + * @param readsDataSource readsDataSource * @param drivingDataSource Data on which to shard. + * @param intervals intervals * @return the sharding strategy */ protected ShardStrategy getShardStrategy(SAMDataSource readsDataSource, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals) { @@ -426,7 +432,7 @@ public class GenomeAnalysisEngine { return new MonolithicShardStrategy(getGenomeLocParser(), readsDataSource,shardType,region); } - ShardStrategy shardStrategy = null; + ShardStrategy shardStrategy; ShardStrategyFactory.SHATTER_STRATEGY shardType; long SHARD_SIZE = 100000L; @@ -435,6 +441,8 @@ public class GenomeAnalysisEngine { if (walker instanceof RodWalker) SHARD_SIZE *= 1000; if (intervals != null && !intervals.isEmpty()) { + if (readsDataSource == null) + throw new IllegalArgumentException("readsDataSource is null"); if(!readsDataSource.isEmpty() && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate) throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Locus walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately."); @@ -498,7 +506,8 @@ public class GenomeAnalysisEngine { */ private void initializeTempDirectory() { File tempDir = new File(System.getProperty("java.io.tmpdir")); - tempDir.mkdirs(); + if (!tempDir.exists() && !tempDir.mkdirs()) + throw new UserException.BadTmpDir("Unable to create directory"); } /** @@ -729,6 +738,7 @@ public class GenomeAnalysisEngine { * @param reads Reads data source. * @param reference Reference data source. * @param rods a collection of the reference ordered data tracks + * @param manager manager */ private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection rods, RMDTrackBuilder manager) { if ((reads.isEmpty() && (rods == null || rods.isEmpty())) || reference == null ) @@ -757,15 +767,22 @@ public class GenomeAnalysisEngine { /** * Gets a data source for the given set of reads. * + * @param argCollection arguments + * @param genomeLocParser parser + * @param refReader reader * @return A data source for the given set of reads. */ private SAMDataSource createReadsDataSource(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser, IndexedFastaSequenceFile refReader) { DownsamplingMethod method = getDownsamplingMethod(); + // Synchronize the method back into the collection so that it shows up when + // interrogating for the downsample method during command line recreation. + setDownsamplingMethod(method); + if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF) throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested."); - SAMDataSource dataSource = new SAMDataSource( + return new SAMDataSource( samReaderIDs, genomeLocParser, argCollection.useOriginalBaseQualities, @@ -781,14 +798,12 @@ public class GenomeAnalysisEngine { refReader, argCollection.defaultBaseQualities, !argCollection.disableLowMemorySharding); - return dataSource; } /** * Opens a reference sequence file paired with an index. Only public for testing purposes * * @param refFile Handle to a reference sequence file. Non-null. - * @return A thread-safe file wrapper. */ public void setReferenceDataSource(File refFile) { this.referenceDataSource = new ReferenceDataSource(refFile); diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 486868dc2..18e71bc2b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -135,8 +135,8 @@ public class GATKArgumentCollection { /** * Gets the downsampling method explicitly specified by the user. If the user didn't specify - * a default downsampling mechanism, return null. - * @return The explicitly specified downsampling mechanism, or null if none exists. + * a default downsampling mechanism, return the default. + * @return The explicitly specified downsampling mechanism, or the default if none exists. */ public DownsamplingMethod getDownsamplingMethod() { if(downsamplingType == null && downsampleFraction == null && downsampleCoverage == null) @@ -146,6 +146,18 @@ public class GATKArgumentCollection { return new DownsamplingMethod(downsamplingType,downsampleCoverage,downsampleFraction); } + /** + * Set the downsampling method stored in the argument collection so that it is read back out when interrogating the command line arguments. + * @param method The downsampling mechanism. + */ + public void setDownsamplingMethod(DownsamplingMethod method) { + if (method == null) + throw new IllegalArgumentException("method is null"); + downsamplingType = method.type; + downsampleCoverage = method.toCoverage; + downsampleFraction = method.toFraction; + } + // -------------------------------------------------------------------------------------------------------------- // // BAQ arguments diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java index a9d71ef98..25ef8ccd2 100755 --- a/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpFormatter.java @@ -29,6 +29,7 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.commandline.ArgumentDefinition; import org.broadinstitute.sting.commandline.ArgumentDefinitionGroup; import org.broadinstitute.sting.commandline.ArgumentDefinitions; +import org.broadinstitute.sting.commandline.ArgumentMatchSource; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.text.TextFormattingUtils; @@ -47,6 +48,7 @@ public class HelpFormatter { /** * Prints the help, given a collection of argument definitions. + * @param applicationDetails Application details * @param argumentDefinitions Argument definitions for which help should be printed. */ public void printHelp( ApplicationDetails applicationDetails, ArgumentDefinitions argumentDefinitions ) { @@ -233,7 +235,7 @@ public class HelpFormatter { private List prepareArgumentGroups( ArgumentDefinitions argumentDefinitions ) { // Sort the list of argument definitions according to how they should be shown. // Put the sorted results into a new cloned data structure. - Comparator definitionComparator = new Comparator() { + Comparator definitionComparator = new Comparator() { public int compare( ArgumentDefinition lhs, ArgumentDefinition rhs ) { if( lhs.required && rhs.required ) return 0; if( lhs.required ) return -1; @@ -242,15 +244,15 @@ public class HelpFormatter { } }; - List argumentGroups = new ArrayList(); + List argumentGroups = new ArrayList(); for( ArgumentDefinitionGroup argumentGroup: argumentDefinitions.getArgumentDefinitionGroups() ) { - List sortedDefinitions = new ArrayList( argumentGroup.argumentDefinitions ); + List sortedDefinitions = new ArrayList( argumentGroup.argumentDefinitions ); Collections.sort( sortedDefinitions, definitionComparator ); argumentGroups.add( new ArgumentDefinitionGroup(argumentGroup.groupName,sortedDefinitions) ); } // Sort the argument groups themselves with main arguments first, followed by plugins sorted in name order. - Comparator groupComparator = new Comparator() { + Comparator groupComparator = new Comparator() { public int compare( ArgumentDefinitionGroup lhs, ArgumentDefinitionGroup rhs ) { if( lhs.groupName == null && rhs.groupName == null ) return 0; if( lhs.groupName == null ) return -1; @@ -271,9 +273,9 @@ public class HelpFormatter { * Generate a standard header for the logger * * @param applicationDetails details of the application to run. - * @param args the command line arguments passed in + * @param parsedArgs the command line arguments passed in */ - public static void generateHeaderInformation(ApplicationDetails applicationDetails, String[] args) { + public static void generateHeaderInformation(ApplicationDetails applicationDetails, Map> parsedArgs) { DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); java.util.Date date = new java.util.Date(); @@ -283,11 +285,22 @@ public class HelpFormatter { logger.info(barrier); for (String headerLine : applicationDetails.applicationHeader) logger.info(headerLine); - String output = ""; - for (String str : args) { - output = output + str + " "; + logger.debug("Current directory: " + System.getProperty("user.dir")); + for (Map.Entry> entry: parsedArgs.entrySet()) { + ArgumentMatchSource matchSource = entry.getKey(); + final String sourceName; + switch (matchSource.getType()) { + case CommandLine: sourceName = "Program"; break; + case File: sourceName = matchSource.getFile().getPath(); break; + default: throw new RuntimeException("Unexpected argument match source type: " + matchSource.getType()); + } + + String output = sourceName + " Args:"; + for (String str : entry.getValue()) { + output = output + " " + str; + } + logger.info(output); } - logger.info("Program Args: " + output); logger.info("Date/Time: " + dateFormat.format(date)); logger.info(barrier); diff --git a/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java new file mode 100644 index 000000000..99d6b88f3 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSiteUnitTest.java @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.commandline; + +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; + +public class ArgumentMatchSiteUnitTest { + @Test + public void testCommandLine() { + ArgumentMatchSite site = new ArgumentMatchSite(ArgumentMatchSource.COMMAND_LINE, 1); + Assert.assertEquals(site.getSource(), ArgumentMatchSource.COMMAND_LINE); + Assert.assertEquals(site.getIndex(), 1); + } + + @Test + public void testFile() { + ArgumentMatchSource source = new ArgumentMatchSource(new File("test")); + ArgumentMatchSite site = new ArgumentMatchSite(source, 1); + Assert.assertEquals(site.getSource(), source); + Assert.assertEquals(site.getIndex(), 1); + } + + @Test + public void testEquals() { + ArgumentMatchSource cmdLine = ArgumentMatchSource.COMMAND_LINE; + ArgumentMatchSite site1 = new ArgumentMatchSite(cmdLine, 1); + ArgumentMatchSite site2 = new ArgumentMatchSite(cmdLine, 2); + + Assert.assertFalse(site1.equals(null)); + + Assert.assertTrue(site1.equals(site1)); + Assert.assertFalse(site1.equals(site2)); + + Assert.assertFalse(site2.equals(site1)); + Assert.assertTrue(site2.equals(site2)); + } + + @Test + public void testCompareTo() { + ArgumentMatchSource cmdLine = ArgumentMatchSource.COMMAND_LINE; + ArgumentMatchSite site1 = new ArgumentMatchSite(cmdLine, 1); + ArgumentMatchSite site2 = new ArgumentMatchSite(cmdLine, 2); + + Assert.assertTrue(site1.compareTo(site1) == 0); + Assert.assertTrue(site1.compareTo(site2) < 0); + Assert.assertTrue(site2.compareTo(site1) > 0); + Assert.assertTrue(site2.compareTo(site2) == 0); + } + + @Test(expectedExceptions = NullPointerException.class) + public void testCompareToNull() { + new ArgumentMatchSite(ArgumentMatchSource.COMMAND_LINE, 0).compareTo(null); + } +} diff --git a/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java new file mode 100644 index 000000000..4bc7eb822 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/commandline/ArgumentMatchSourceUnitTest.java @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.commandline; + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; + +public class ArgumentMatchSourceUnitTest extends BaseTest { + @Test + public void testCommandLine() { + ArgumentMatchSource source = ArgumentMatchSource.COMMAND_LINE; + Assert.assertEquals(source.getType(), ArgumentMatchSourceType.CommandLine); + Assert.assertNull(source.getFile()); + } + + @Test + public void testFile() { + File f = new File("test"); + ArgumentMatchSource source = new ArgumentMatchSource(f); + Assert.assertEquals(source.getType(), ArgumentMatchSourceType.File); + Assert.assertEquals(source.getFile(), f); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testNullFile() { + new ArgumentMatchSource(null); + } + + @Test + public void testEquals() { + ArgumentMatchSource cmdLine = ArgumentMatchSource.COMMAND_LINE; + ArgumentMatchSource fileA = new ArgumentMatchSource(new File("a")); + ArgumentMatchSource fileB = new ArgumentMatchSource(new File("b")); + + Assert.assertFalse(cmdLine.equals(null)); + + Assert.assertTrue(cmdLine.equals(cmdLine)); + Assert.assertFalse(cmdLine.equals(fileA)); + Assert.assertFalse(cmdLine.equals(fileB)); + + Assert.assertFalse(fileA.equals(cmdLine)); + Assert.assertTrue(fileA.equals(fileA)); + Assert.assertFalse(fileA.equals(fileB)); + + Assert.assertFalse(fileB.equals(cmdLine)); + Assert.assertFalse(fileB.equals(fileA)); + Assert.assertTrue(fileB.equals(fileB)); + } + + @Test + public void testCompareTo() { + ArgumentMatchSource cmdLine = ArgumentMatchSource.COMMAND_LINE; + ArgumentMatchSource fileA = new ArgumentMatchSource(new File("a")); + ArgumentMatchSource fileB = new ArgumentMatchSource(new File("b")); + + Assert.assertTrue(cmdLine.compareTo(cmdLine) == 0); + Assert.assertTrue(cmdLine.compareTo(fileA) < 0); + Assert.assertTrue(cmdLine.compareTo(fileB) < 0); + + Assert.assertTrue(fileA.compareTo(cmdLine) > 0); + Assert.assertTrue(fileA.compareTo(fileA) == 0); + Assert.assertTrue(fileA.compareTo(fileB) < 0); + + Assert.assertTrue(fileB.compareTo(cmdLine) > 0); + Assert.assertTrue(fileB.compareTo(fileA) > 0); + Assert.assertTrue(fileB.compareTo(fileB) == 0); + } + + @Test(expectedExceptions = NullPointerException.class) + public void testCompareToNull() { + ArgumentMatchSource.COMMAND_LINE.compareTo(null); + } +} diff --git a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java index f04731214..87f0e6ff0 100755 --- a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.commandline; +import org.apache.commons.io.FileUtils; import org.broad.tribble.Feature; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -34,6 +35,8 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; +import java.io.File; +import java.io.IOException; import java.util.List; import java.util.EnumSet; /** @@ -493,6 +496,7 @@ public class ParsingEngineUnitTest extends BaseTest { Assert.assertNotNull(definition, "Invalid default argument name assigned"); } + @SuppressWarnings("unused") private class CamelCaseArgProvider { @Argument(doc="my arg") Integer myArg; @@ -507,6 +511,7 @@ public class ParsingEngineUnitTest extends BaseTest { parsingEngine.validate(); } + @SuppressWarnings("unused") private class BooleanArgProvider { @Argument(doc="my bool") boolean myBool; @@ -561,6 +566,7 @@ public class ParsingEngineUnitTest extends BaseTest { parsingEngine.validate(); } + @SuppressWarnings("unused") private class MutuallyExclusiveArgProvider { @Argument(doc="foo",exclusiveOf="bar") Integer foo; @@ -618,6 +624,7 @@ public class ParsingEngineUnitTest extends BaseTest { parsingEngine.addArgumentSource( MultipleArgumentCollectionProvider.class ); } + @SuppressWarnings("unused") private class MultipleArgumentCollectionProvider { @ArgumentCollection RequiredArgProvider rap1 = new RequiredArgProvider(); @@ -937,4 +944,23 @@ public class ParsingEngineUnitTest extends BaseTest { VariantContextRodBindingArgProvider argProvider = new VariantContextRodBindingArgProvider(); parsingEngine.loadArgumentsIntoObject( argProvider ); } + + @Test + public void argumentListTest() throws IOException { + File argsFile = BaseTest.createTempFile("args.", ".list"); + try { + FileUtils.write(argsFile, "-I na12878.bam"); + final String[] commandLine = new String[] {"-args", argsFile.getPath()}; + parsingEngine.addArgumentSource(InputFileArgProvider.class); + parsingEngine.parse(commandLine); + parsingEngine.validate(); + + InputFileArgProvider argProvider = new InputFileArgProvider(); + parsingEngine.loadArgumentsIntoObject(argProvider); + + Assert.assertEquals(argProvider.inputFile, "na12878.bam", "Argument is not correctly initialized"); + } finally { + FileUtils.deleteQuietly(argsFile); + } + } } From fac9932938f1275015aa0b93a82fb9bd52dd391d Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Mon, 24 Oct 2011 15:49:02 -0400 Subject: [PATCH 21/34] Embedding gsalib source and queueJobReport R scripts in the dist and package jars. Moved gsalib and queueJobReport.R to embeddable namespaced locations. Updated packager dependencies/dir to add an @includes which filters the embedded fileset. RScriptExecutor can now JIT compiles the gsalib. RScriptExecutor uses ProcessController and sends the Rscript output to java's stdout when run under -l DEBUG. Refactored ProcessController and IOUtils from Queue to Sting Utils. Added more unit tests to ProcessController along with a utility class to hard stop OutputStreams at a specified byte count. Replaced uses of some IOUtils with Apache Commons IO. ShellJobRunner refactored to use direct ProcessController and now kills jobs on shutdown. Better QGraph responsiveness on shutdown by using Object.wait() instead of Thread.sleep(). --- build.xml | 138 +++-- .../sting/queue/util}/queueJobReport.R | 0 .../sting/utils/R}/gsalib/DESCRIPTION | 0 .../sting/utils/R}/gsalib/R/gsa.error.R | 0 .../sting/utils/R}/gsalib/R/gsa.getargs.R | 0 .../sting/utils/R}/gsalib/R/gsa.message.R | 0 .../sting/utils/R}/gsalib/R/gsa.plot.venn.R | 0 .../sting/utils/R}/gsalib/R/gsa.read.eval.R | 0 .../utils/R}/gsalib/R/gsa.read.gatkreport.R | 0 .../utils/R}/gsalib/R/gsa.read.squidmetrics.R | 0 .../sting/utils/R}/gsalib/R/gsa.read.vcf.R | 0 .../sting/utils/R}/gsalib/R/gsa.warn.R | 0 .../sting/utils/R}/gsalib/Read-and-delete-me | 0 .../utils/R}/gsalib/data/tearsheetdrop.jpg | Bin .../sting/utils/R}/gsalib/man/gsa.error.Rd | 0 .../sting/utils/R}/gsalib/man/gsa.getargs.Rd | 0 .../sting/utils/R}/gsalib/man/gsa.message.Rd | 0 .../utils/R}/gsalib/man/gsa.plot.venn.Rd | 0 .../utils/R}/gsalib/man/gsa.read.eval.Rd | 0 .../R}/gsalib/man/gsa.read.gatkreport.Rd | 0 .../R}/gsalib/man/gsa.read.squidmetrics.Rd | 0 .../sting/utils/R}/gsalib/man/gsa.read.vcf.Rd | 0 .../sting/utils/R}/gsalib/man/gsa.warn.Rd | 0 .../utils/R}/gsalib/man/gsalib-package.Rd | 0 .../sting/utils/R/RScriptExecutor.java | 129 ++++- .../sting/utils/R/RScriptLibrary.java | 59 ++ .../sting/utils/io/FileExtension.java | 36 ++ .../io/HardThresholdingOutputStream.java | 54 ++ .../sting/utils/io/IOUtils.java | 353 ++++++++++++ .../sting/utils/io/Resource.java | 53 ++ .../utils/runtime/CapturedStreamOutput.java | 133 +++++ .../utils/runtime/InputStreamSettings.java | 115 ++++ .../utils/runtime/OutputStreamSettings.java | 126 +++++ .../utils/runtime/ProcessController.java | 363 ++++++++++++ .../sting/utils/runtime/ProcessOutput.java | 56 ++ .../sting/utils/runtime/ProcessSettings.java | 140 +++++ .../sting/utils/runtime/StreamLocation.java | 32 ++ .../sting/utils/runtime/StreamOutput.java | 68 +++ .../sting/utils/R/RScriptLibraryUnitTest.java | 46 ++ .../sting/utils/io/IOUtilsUnitTest.java | 197 +++++++ .../runtime/ProcessControllerUnitTest.java | 517 ++++++++++++++++++ public/packages/CreatePackager.xsl | 20 +- public/packages/GATKEngine.xml | 2 + public/packages/Queue.xml | 4 + .../sting/queue/QCommandLine.scala | 26 +- .../sting/queue/QScriptManager.scala | 16 +- .../queue/engine/CommandLineJobRunner.scala | 5 +- .../sting/queue/engine/FunctionEdge.scala | 20 +- .../sting/queue/engine/InProcessRunner.scala | 6 +- .../sting/queue/engine/QGraph.scala | 24 +- .../queue/engine/drmaa/DrmaaJobManager.scala | 2 +- .../queue/engine/drmaa/DrmaaJobRunner.scala | 20 +- .../queue/engine/shell/ShellJobManager.scala | 1 + .../queue/engine/shell/ShellJobRunner.scala | 51 +- .../extensions/gatk/GATKScatterFunction.scala | 2 +- .../sting/queue/extensions/gatk/RodBind.scala | 2 +- .../queue/extensions/gatk/TaggedFile.scala | 2 +- .../function/JavaCommandLineFunction.scala | 2 +- .../sting/queue/function/QFunction.scala | 1 + .../scattergather/GatherFunction.scala | 5 +- .../ScatterGatherableFunction.scala | 19 +- .../sting/queue/util/CommandLineJob.scala | 51 -- .../sting/queue/util/FileExtension.scala | 15 - .../sting/queue/util/IOUtils.scala | 253 --------- .../sting/queue/util/JobExitException.scala | 11 - .../sting/queue/util/ProcessController.scala | 369 ------------- .../sting/queue/util/QJobReport.scala | 14 +- .../sting/queue/util/ShellJob.scala | 27 - .../sting/queue/pipeline/PipelineTest.scala | 7 +- .../sting/queue/util/IOUtilsUnitTest.scala | 122 ----- .../sting/queue/util/ShellJobUnitTest.scala | 73 --- 71 files changed, 2676 insertions(+), 1111 deletions(-) rename public/R/{ => scripts/org/broadinstitute/sting/queue/util}/queueJobReport.R (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/DESCRIPTION (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/R/gsa.error.R (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/R/gsa.getargs.R (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/R/gsa.message.R (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/R/gsa.plot.venn.R (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/R/gsa.read.eval.R (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/R/gsa.read.gatkreport.R (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/R/gsa.read.squidmetrics.R (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/R/gsa.read.vcf.R (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/R/gsa.warn.R (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/Read-and-delete-me (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/data/tearsheetdrop.jpg (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/man/gsa.error.Rd (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/man/gsa.getargs.Rd (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/man/gsa.message.Rd (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/man/gsa.plot.venn.Rd (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/man/gsa.read.eval.Rd (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/man/gsa.read.gatkreport.Rd (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/man/gsa.read.squidmetrics.Rd (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/man/gsa.read.vcf.Rd (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/man/gsa.warn.Rd (100%) rename public/R/src/{ => org/broadinstitute/sting/utils/R}/gsalib/man/gsalib-package.Rd (100%) create mode 100644 public/java/src/org/broadinstitute/sting/utils/R/RScriptLibrary.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/io/FileExtension.java create mode 100755 public/java/src/org/broadinstitute/sting/utils/io/HardThresholdingOutputStream.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/io/Resource.java create mode 100755 public/java/src/org/broadinstitute/sting/utils/runtime/CapturedStreamOutput.java create mode 100755 public/java/src/org/broadinstitute/sting/utils/runtime/InputStreamSettings.java create mode 100755 public/java/src/org/broadinstitute/sting/utils/runtime/OutputStreamSettings.java create mode 100755 public/java/src/org/broadinstitute/sting/utils/runtime/ProcessController.java create mode 100755 public/java/src/org/broadinstitute/sting/utils/runtime/ProcessOutput.java create mode 100755 public/java/src/org/broadinstitute/sting/utils/runtime/ProcessSettings.java create mode 100755 public/java/src/org/broadinstitute/sting/utils/runtime/StreamLocation.java create mode 100755 public/java/src/org/broadinstitute/sting/utils/runtime/StreamOutput.java create mode 100644 public/java/test/org/broadinstitute/sting/utils/R/RScriptLibraryUnitTest.java create mode 100644 public/java/test/org/broadinstitute/sting/utils/io/IOUtilsUnitTest.java create mode 100644 public/java/test/org/broadinstitute/sting/utils/runtime/ProcessControllerUnitTest.java delete mode 100644 public/scala/src/org/broadinstitute/sting/queue/util/CommandLineJob.scala delete mode 100644 public/scala/src/org/broadinstitute/sting/queue/util/FileExtension.scala delete mode 100644 public/scala/src/org/broadinstitute/sting/queue/util/IOUtils.scala delete mode 100644 public/scala/src/org/broadinstitute/sting/queue/util/JobExitException.scala delete mode 100644 public/scala/src/org/broadinstitute/sting/queue/util/ProcessController.scala delete mode 100755 public/scala/src/org/broadinstitute/sting/queue/util/ShellJob.scala delete mode 100644 public/scala/test/org/broadinstitute/sting/queue/util/IOUtilsUnitTest.scala delete mode 100644 public/scala/test/org/broadinstitute/sting/queue/util/ShellJobUnitTest.scala diff --git a/build.xml b/build.xml index 446982a44..6ca959c38 100644 --- a/build.xml +++ b/build.xml @@ -28,6 +28,8 @@ + + @@ -35,18 +37,25 @@ + + + + + + - + - + @@ -60,7 +69,7 @@ - + @@ -82,7 +91,7 @@ - + @@ -113,7 +122,7 @@ - + @@ -154,7 +163,7 @@ - + @@ -211,11 +220,11 @@ - + - + @@ -224,11 +233,11 @@ - + - + @@ -266,7 +275,7 @@ - + @@ -312,13 +321,13 @@ - + - + @@ -327,11 +336,11 @@ - + - @@ -341,9 +350,9 @@ - + - + @@ -362,14 +371,14 @@ - + - + - - + @@ -413,9 +422,9 @@ - + - + @@ -424,12 +433,12 @@ - + - + @@ -532,6 +541,11 @@ + + + + + @@ -539,7 +553,7 @@ - + @@ -551,6 +565,12 @@ + + + + + + @@ -579,6 +599,10 @@ + + + + @@ -593,6 +617,10 @@ + + + + @@ -605,28 +633,7 @@ - @@ -643,6 +650,9 @@ + + + @@ -682,20 +692,7 @@ - + @@ -780,10 +777,6 @@ - @@ -800,10 +793,6 @@ - @@ -851,6 +840,8 @@ + + @@ -1187,19 +1178,18 @@ - - + - + - + diff --git a/public/R/queueJobReport.R b/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R similarity index 100% rename from public/R/queueJobReport.R rename to public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R diff --git a/public/R/src/gsalib/DESCRIPTION b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/DESCRIPTION similarity index 100% rename from public/R/src/gsalib/DESCRIPTION rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/DESCRIPTION diff --git a/public/R/src/gsalib/R/gsa.error.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.error.R similarity index 100% rename from public/R/src/gsalib/R/gsa.error.R rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.error.R diff --git a/public/R/src/gsalib/R/gsa.getargs.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.getargs.R similarity index 100% rename from public/R/src/gsalib/R/gsa.getargs.R rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.getargs.R diff --git a/public/R/src/gsalib/R/gsa.message.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.message.R similarity index 100% rename from public/R/src/gsalib/R/gsa.message.R rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.message.R diff --git a/public/R/src/gsalib/R/gsa.plot.venn.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.plot.venn.R similarity index 100% rename from public/R/src/gsalib/R/gsa.plot.venn.R rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.plot.venn.R diff --git a/public/R/src/gsalib/R/gsa.read.eval.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.eval.R similarity index 100% rename from public/R/src/gsalib/R/gsa.read.eval.R rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.eval.R diff --git a/public/R/src/gsalib/R/gsa.read.gatkreport.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R similarity index 100% rename from public/R/src/gsalib/R/gsa.read.gatkreport.R rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R diff --git a/public/R/src/gsalib/R/gsa.read.squidmetrics.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.squidmetrics.R similarity index 100% rename from public/R/src/gsalib/R/gsa.read.squidmetrics.R rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.squidmetrics.R diff --git a/public/R/src/gsalib/R/gsa.read.vcf.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.vcf.R similarity index 100% rename from public/R/src/gsalib/R/gsa.read.vcf.R rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.vcf.R diff --git a/public/R/src/gsalib/R/gsa.warn.R b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.warn.R similarity index 100% rename from public/R/src/gsalib/R/gsa.warn.R rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.warn.R diff --git a/public/R/src/gsalib/Read-and-delete-me b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/Read-and-delete-me similarity index 100% rename from public/R/src/gsalib/Read-and-delete-me rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/Read-and-delete-me diff --git a/public/R/src/gsalib/data/tearsheetdrop.jpg b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/data/tearsheetdrop.jpg similarity index 100% rename from public/R/src/gsalib/data/tearsheetdrop.jpg rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/data/tearsheetdrop.jpg diff --git a/public/R/src/gsalib/man/gsa.error.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.error.Rd similarity index 100% rename from public/R/src/gsalib/man/gsa.error.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.error.Rd diff --git a/public/R/src/gsalib/man/gsa.getargs.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.getargs.Rd similarity index 100% rename from public/R/src/gsalib/man/gsa.getargs.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.getargs.Rd diff --git a/public/R/src/gsalib/man/gsa.message.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.message.Rd similarity index 100% rename from public/R/src/gsalib/man/gsa.message.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.message.Rd diff --git a/public/R/src/gsalib/man/gsa.plot.venn.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.plot.venn.Rd similarity index 100% rename from public/R/src/gsalib/man/gsa.plot.venn.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.plot.venn.Rd diff --git a/public/R/src/gsalib/man/gsa.read.eval.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.eval.Rd similarity index 100% rename from public/R/src/gsalib/man/gsa.read.eval.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.eval.Rd diff --git a/public/R/src/gsalib/man/gsa.read.gatkreport.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.gatkreport.Rd similarity index 100% rename from public/R/src/gsalib/man/gsa.read.gatkreport.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.gatkreport.Rd diff --git a/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.squidmetrics.Rd similarity index 100% rename from public/R/src/gsalib/man/gsa.read.squidmetrics.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.squidmetrics.Rd diff --git a/public/R/src/gsalib/man/gsa.read.vcf.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.vcf.Rd similarity index 100% rename from public/R/src/gsalib/man/gsa.read.vcf.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.vcf.Rd diff --git a/public/R/src/gsalib/man/gsa.warn.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.warn.Rd similarity index 100% rename from public/R/src/gsalib/man/gsa.warn.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.warn.Rd diff --git a/public/R/src/gsalib/man/gsalib-package.Rd b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd similarity index 100% rename from public/R/src/gsalib/man/gsalib-package.Rd rename to public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd diff --git a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java index 58f7942fe..9180447b9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java +++ b/public/java/src/org/broadinstitute/sting/utils/R/RScriptExecutor.java @@ -25,35 +25,35 @@ package org.broadinstitute.sting.utils.R; import org.apache.commons.io.FileUtils; +import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.broadinstitute.sting.commandline.Advanced; import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.ArgumentCollection; -import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate; -import org.broadinstitute.sting.utils.PathUtils; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.io.IOUtils; +import org.broadinstitute.sting.utils.io.Resource; +import org.broadinstitute.sting.utils.runtime.ProcessController; +import org.broadinstitute.sting.utils.runtime.ProcessSettings; import java.io.File; -import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; /** - * Generic service for executing RScripts in the GATK directory - * - * @author Your Name - * @since Date created + * Generic service for executing RScripts */ public class RScriptExecutor { /** * our log */ - protected static Logger logger = Logger.getLogger(RScriptExecutor.class); + private static Logger logger = Logger.getLogger(RScriptExecutor.class); public static class RScriptArgumentCollection { @Advanced - @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false) + @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. Defaults Rscript meaning to use the first available on the environment PATH. For Broad users should 'use R-2.12' or later.", required = false) public String PATH_TO_RSCRIPT = "Rscript"; @Advanced @@ -62,40 +62,119 @@ public class RScriptExecutor { public RScriptArgumentCollection() {} - /** For testing and convenience */ + /* For testing and convenience */ public RScriptArgumentCollection(final String PATH_TO_RSCRIPT, final List PATH_TO_RESOURCES) { this.PATH_TO_RSCRIPT = PATH_TO_RSCRIPT; this.PATH_TO_RESOURCES = PATH_TO_RESOURCES; } } - final RScriptArgumentCollection myArgs; - final boolean exceptOnError; + private final RScriptArgumentCollection myArgs; + private final boolean exceptOnError; + private final List libraries = new ArrayList(); + private final List scriptResources = new ArrayList(); + private final List scriptFiles = new ArrayList(); + private final List args = new ArrayList(); public RScriptExecutor(final RScriptArgumentCollection myArgs, final boolean exceptOnError) { this.myArgs = myArgs; this.exceptOnError = exceptOnError; } - public void callRScripts(String scriptName, Object... scriptArgs) { - callRScripts(scriptName, Arrays.asList(scriptArgs)); + public void addLibrary(RScriptLibrary library) { + this.libraries.add(library); } - public void callRScripts(String scriptName, List scriptArgs) { + public void addScript(Resource script) { + this.scriptResources.add(script); + } + + public void addScript(File script) { + this.scriptFiles.add(script); + } + + /** + * Adds args to the end of the Rscript command line. + * @param args the args. + * @throws NullPointerException if any of the args are null. + */ + public void addArgs(Object... args) { + for (Object arg: args) + this.args.add(arg.toString()); + } + + public void exec() { + List tempFiles = new ArrayList(); try { - final File pathToScript = findScript(scriptName); - if ( pathToScript == null ) return; // we failed but shouldn't exception out - final String argString = Utils.join(" ", scriptArgs); - final String cmdLine = Utils.join(" ", Arrays.asList(myArgs.PATH_TO_RSCRIPT, pathToScript, argString)); - logger.info("Executing RScript: " + cmdLine); - Runtime.getRuntime().exec(cmdLine).waitFor(); - } catch (InterruptedException e) { + File tempLibDir = IOUtils.tempDir("R.", ".lib"); + tempFiles.add(tempLibDir); + + StringBuilder expression = new StringBuilder("tempLibDir = '").append(tempLibDir).append("';"); + + if (this.libraries.size() > 0) { + List tempLibraryPaths = new ArrayList(); + for (RScriptLibrary library: this.libraries) { + File tempLibrary = library.writeTemp(); + tempFiles.add(tempLibrary); + tempLibraryPaths.add(tempLibrary.getAbsolutePath()); + } + + expression.append("install.packages("); + expression.append("pkgs=c('").append(StringUtils.join(tempLibraryPaths, "', '")).append("'), lib=tempLibDir, repos=NULL, type='source', "); + // Install faster by eliminating cruft. + expression.append("INSTALL_opts=c('--no-libs', '--no-data', '--no-help', '--no-demo', '--no-exec')"); + expression.append(");"); + + for (RScriptLibrary library: this.libraries) { + expression.append("require('").append(library.getLibraryName()).append("', lib.loc=tempLibDir);"); + } + } + + for (Resource script: this.scriptResources) { + File tempScript = IOUtils.writeTempResource(script); + tempFiles.add(tempScript); + expression.append("source('").append(tempScript.getAbsolutePath()).append("');"); + } + + for (File script: this.scriptFiles) { + expression.append("source('").append(script.getAbsolutePath()).append("');"); + } + + String[] cmd = new String[this.args.size() + 3]; + int i = 0; + cmd[i++] = myArgs.PATH_TO_RSCRIPT; + cmd[i++] = "-e"; + cmd[i++] = expression.toString(); + for (String arg: this.args) + cmd[i++] = arg; + + ProcessSettings processSettings = new ProcessSettings(cmd); + if (logger.isDebugEnabled()) { + processSettings.getStdoutSettings().printStandard(true); + processSettings.getStderrSettings().printStandard(true); + } + + ProcessController controller = ProcessController.getThreadLocal(); + + logger.debug("Executing: " + Utils.join(" ", cmd)); + logger.debug("Result: " + controller.exec(processSettings).getExitValue()); + + } catch (StingException e) { generateException(e); - } catch (IOException e) { - generateException("Fatal Exception: Perhaps RScript jobs are being spawned too quickly?", e); + } finally { + for (File temp: tempFiles) + FileUtils.deleteQuietly(temp); } } + public void callRScripts(String scriptName, Object... scriptArgs) { + final File pathToScript = findScript(scriptName); + if (pathToScript == null) return; // we failed but shouldn't exception out + addScript(pathToScript); + addArgs(scriptArgs); + exec(); + } + public File findScript(final String scriptName) { for ( String pathToResource : myArgs.PATH_TO_RESOURCES ) { final File f = new File(pathToResource + "/" + scriptName); diff --git a/public/java/src/org/broadinstitute/sting/utils/R/RScriptLibrary.java b/public/java/src/org/broadinstitute/sting/utils/R/RScriptLibrary.java new file mode 100644 index 000000000..60cd7504b --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/R/RScriptLibrary.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.R; + +import org.broadinstitute.sting.utils.io.IOUtils; +import org.broadinstitute.sting.utils.io.Resource; + +import java.io.File; + +/** + * Libraries embedded in the StingUtils package. + */ +public enum RScriptLibrary { + GSALIB("gsalib"); + + private final String name; + + private RScriptLibrary(String name) { + this.name = name; + } + + public String getLibraryName() { + return this.name; + } + + public String getResourcePath() { + return name + ".tar.gz"; + } + + /** + * Writes the library source code to a temporary tar.gz file and returns the path. + * @return The path to the library source code. The caller must delete the code when done. + */ + public File writeTemp() { + return IOUtils.writeTempResource(new Resource(getResourcePath(), RScriptLibrary.class)); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/io/FileExtension.java b/public/java/src/org/broadinstitute/sting/utils/io/FileExtension.java new file mode 100644 index 000000000..cd69ee126 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/io/FileExtension.java @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.io; + +import java.io.File; + +public interface FileExtension { + /** + * Returns a clone of the FileExtension with a new path. + * @param path New path. + * @return New FileExtension + */ + public File withPath(String path); +} diff --git a/public/java/src/org/broadinstitute/sting/utils/io/HardThresholdingOutputStream.java b/public/java/src/org/broadinstitute/sting/utils/io/HardThresholdingOutputStream.java new file mode 100755 index 000000000..26b5ae6fd --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/io/HardThresholdingOutputStream.java @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +package org.broadinstitute.sting.utils.io; + +import org.apache.commons.io.output.ThresholdingOutputStream; + +import java.io.IOException; + +/** + * An output stream which stops at the threshold + * instead of potentially triggering early. + */ +public abstract class HardThresholdingOutputStream extends ThresholdingOutputStream { + protected HardThresholdingOutputStream(int threshold) { + super(threshold); + } + + @Override + public void write(byte[] b) throws IOException { + write(b, 0, b.length); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + int remaining = this.getThreshold() - (int)this.getByteCount(); + if (!isThresholdExceeded() && len > remaining) { + super.write(b, off, remaining); + super.write(b, off + remaining, len - remaining); + } else { + super.write(b, off, len); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java b/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java new file mode 100644 index 000000000..7bfaa0194 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/io/IOUtils.java @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.io; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.FilenameUtils; +import org.apache.commons.io.LineIterator; +import org.apache.commons.lang.StringUtils; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.exceptions.UserException; + +import java.io.*; +import java.util.*; + +public class IOUtils { + private static Logger logger = Logger.getLogger(IOUtils.class); + + /** + * Checks if the temp directory has been setup and throws an exception if they user hasn't set it correctly. + * + * @param tempDir Temporary directory. + */ + public static void checkTempDir(File tempDir) { + String tempDirPath = tempDir.getAbsolutePath(); + // Keeps the user from leaving the temp directory as the default, and on Macs from having pluses + // in the path which can cause problems with the Google Reflections library. + // see also: http://benjchristensen.com/2009/09/22/mac-osx-10-6-java-java-io-tmpdir/ + if (tempDirPath.startsWith("/var/folders/") || (tempDirPath.equals("/tmp")) || (tempDirPath.equals("/tmp/"))) + throw new UserException.BadTmpDir("java.io.tmpdir must be explicitly set"); + if (!tempDir.exists() && !tempDir.mkdirs()) + throw new UserException.BadTmpDir("Could not create directory: " + tempDir.getAbsolutePath()); + } + + /** + * Creates a temp directory with the prefix and optional suffix. + * + * @param prefix Prefix for the directory name. + * @param suffix Optional suffix for the directory name. + * @return The created temporary directory. + */ + public static File tempDir(String prefix, String suffix) { + return tempDir(prefix, suffix, null); + } + + /** + * Creates a temp directory with the prefix and optional suffix. + * + * @param prefix Prefix for the directory name. + * @param suffix Optional suffix for the directory name. + * @param tempDirParent Parent directory for the temp directory. + * @return The created temporary directory. + */ + public static File tempDir(String prefix, String suffix, File tempDirParent) { + try { + if (tempDirParent == null) + tempDirParent = FileUtils.getTempDirectory(); + if (!tempDirParent.exists() && !tempDirParent.mkdirs()) + throw new UserException.BadTmpDir("Could not create temp directory: " + tempDirParent); + File temp = File.createTempFile(prefix + "-", suffix, tempDirParent); + if (!temp.delete()) + throw new UserException.BadTmpDir("Could not delete sub file: " + temp.getAbsolutePath()); + if (!temp.mkdir()) + throw new UserException.BadTmpDir("Could not create sub directory: " + temp.getAbsolutePath()); + return absolute(temp); + } catch (IOException e) { + throw new UserException.BadTmpDir(e.getMessage()); + } + } + + /** + * Writes content to a temp file and returns the path to the temporary file. + * + * @param content to write. + * @param prefix Prefix for the temp file. + * @param suffix Suffix for the temp file. + * @param directory Directory for the temp file. + * @return the path to the temp file. + */ + public static File writeTempFile(String content, String prefix, String suffix, File directory) { + try { + File tempFile = absolute(File.createTempFile(prefix, suffix, directory)); + FileUtils.writeStringToFile(tempFile, content); + return tempFile; + } catch (IOException e) { + throw new UserException.BadTmpDir(e.getMessage()); + } + } + + /** + * Waits for NFS to propagate a file creation, imposing a timeout. + * + * Based on Apache Commons IO FileUtils.waitFor() + * + * @param file The file to wait for. + * @param seconds The maximum time in seconds to wait. + * @return true if the file exists + */ + public static boolean waitFor(File file, int seconds) { + return waitFor(Collections.singletonList(file), seconds).isEmpty(); + } + + /** + * Waits for NFS to propagate a file creation, imposing a timeout. + * + * Based on Apache Commons IO FileUtils.waitFor() + * + * @param files The list of files to wait for. + * @param seconds The maximum time in seconds to wait. + * @return Files that still do not exists at the end of the timeout, or a empty list if all files exists. + */ + public static List waitFor(Collection files, int seconds) { + long timeout = 0; + long tick = 0; + List missingFiles = new ArrayList(); + for (File file : files) + if (!file.exists()) + missingFiles.add(file); + + while (!missingFiles.isEmpty() && timeout <= seconds) { + if (tick >= 10) { + tick = 0; + timeout++; + } + tick++; + try { + Thread.sleep(100); + } catch (InterruptedException ignore) { + } + List newMissingFiles = new ArrayList(); + for (File file : missingFiles) + if (!file.exists()) + newMissingFiles.add(file); + missingFiles = newMissingFiles; + } + return missingFiles; + } + + /** + * Returns the directory at the number of levels deep. + * For example 2 levels of /path/to/dir will return /path/to + * + * @param dir Directory path. + * @param level how many levels deep from the root. + * @return The path to the parent directory that is level-levels deep. + */ + public static File dirLevel(File dir, int level) { + List directories = new ArrayList(); + File parentDir = absolute(dir); + while (parentDir != null) { + directories.add(0, parentDir); + parentDir = parentDir.getParentFile(); + } + if (directories.size() <= level) + return directories.get(directories.size() - 1); + else + return directories.get(level); + } + + /** + * Returns the sub path rooted at the parent. + * + * @param parent The parent directory. + * @param path The sub path to append to the parent, if the path is not absolute. + * @return The absolute path to the file in the parent dir if the path was not absolute, otherwise the original path. + */ + public static File absolute(File parent, String path) { + return absolute(parent, new File(path)); + } + + /** + * Returns the sub path rooted at the parent. + * + * @param parent The parent directory. + * @param file The sub path to append to the parent, if the path is not absolute. + * @return The absolute path to the file in the parent dir if the path was not absolute, otherwise the original path. + */ + public static File absolute(File parent, File file) { + String newPath; + if (file.isAbsolute()) + newPath = absolutePath(file); + else + newPath = absolutePath(new File(parent, file.getPath())); + return replacePath(file, newPath); + } + + /** + * A mix of getCanonicalFile and getAbsoluteFile that returns the + * absolute path to the file without deferencing symbolic links. + * + * @param file the file. + * @return the absolute path to the file. + */ + public static File absolute(File file) { + return replacePath(file, absolutePath(file)); + } + + private static String absolutePath(File file) { + File fileAbs = file.getAbsoluteFile(); + LinkedList names = new LinkedList(); + while (fileAbs != null) { + String name = fileAbs.getName(); + fileAbs = fileAbs.getParentFile(); + + if (".".equals(name)) { + /* skip */ + + /* TODO: What do we do for ".."? + } else if (name == "..") { + + CentOS tcsh says use getCanonicalFile: + ~ $ mkdir -p test1/test2 + ~ $ ln -s test1/test2 test3 + ~ $ cd test3/.. + ~/test1 $ + + Mac bash says keep going with getAbsoluteFile: + ~ $ mkdir -p test1/test2 + ~ $ ln -s test1/test2 test3 + ~ $ cd test3/.. + ~ $ + + For now, leave it and let the shell figure it out. + */ + } else { + names.add(0, name); + } + } + + return ("/" + StringUtils.join(names, "/")); + } + + private static File replacePath(File file, String path) { + if (file instanceof FileExtension) + return ((FileExtension)file).withPath(path); + if (!File.class.equals(file.getClass())) + throw new StingException("Sub classes of java.io.File must also implement FileExtension"); + return new File(path); + } + + /** + * Returns the last lines of the file. + * NOTE: This is only safe to run on smaller files! + * + * @param file File to read. + * @param count Maximum number of lines to return. + * @return The last count lines from file. + * @throws IOException When unable to read the file. + */ + public static List tail(File file, int count) throws IOException { + LinkedList tailLines = new LinkedList(); + FileReader reader = new FileReader(file); + try { + LineIterator iterator = org.apache.commons.io.IOUtils.lineIterator(reader); + int lineCount = 0; + while (iterator.hasNext()) { + String line = iterator.nextLine(); + lineCount++; + if (lineCount > count) + tailLines.removeFirst(); + tailLines.offer(line); + } + } finally { + org.apache.commons.io.IOUtils.closeQuietly(reader); + } + return tailLines; + } + + /** + * Tries to delete a file. Emits a warning if the file was unable to be deleted. + * + * @param file File to delete. + * @return true if the file was deleted. + */ + public static boolean tryDelete(File file) { + boolean deleted = FileUtils.deleteQuietly(file); + if (deleted) + logger.debug("Deleted " + file); + else if (file.exists()) + logger.warn("Unable to delete " + file); + return deleted; + } + + /** + * Writes the an embedded resource to a temp file. + * File is not scheduled for deletion and must be cleaned up by the caller. + * @param resource Embedded resource. + * @return Path to the temp file with the contents of the resource. + */ + public static File writeTempResource(Resource resource) { + File temp; + try { + temp = File.createTempFile(FilenameUtils.getBaseName(resource.getPath()) + ".", "." + FilenameUtils.getExtension(resource.getPath())); + } catch (IOException e) { + throw new UserException.BadTmpDir(e.getMessage()); + } + writeResource(resource, temp); + return temp; + } + + /** + * Writes the an embedded resource to a file. + * File is not scheduled for deletion and must be cleaned up by the caller. + * @param resource Embedded resource. + * @param file File path to write. + */ + public static void writeResource(Resource resource, File file) { + String path = resource.getPath(); + Class clazz = resource.getRelativeClass(); + InputStream inputStream = null; + OutputStream outputStream = null; + try { + if (clazz == null) { + inputStream = ClassLoader.getSystemResourceAsStream(path); + if (inputStream == null) + throw new IllegalArgumentException("Resource not found: " + path); + } else { + inputStream = clazz.getResourceAsStream(path); + if (inputStream == null) + throw new IllegalArgumentException("Resource not found relative to " + clazz + ": " + path); + } + outputStream = FileUtils.openOutputStream(file); + org.apache.commons.io.IOUtils.copy(inputStream, outputStream); + } catch (IOException e) { + throw new StingException(String.format("Unable to copy resource '%s' to '%s'", path, file), e); + } finally { + org.apache.commons.io.IOUtils.closeQuietly(inputStream); + org.apache.commons.io.IOUtils.closeQuietly(outputStream); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/io/Resource.java b/public/java/src/org/broadinstitute/sting/utils/io/Resource.java new file mode 100644 index 000000000..5473511b4 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/io/Resource.java @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.io; + +/** + * Stores a resource by path and a relative class. + */ +public class Resource { + private final String path; + private final Class relativeClass; + + /** + * Create a resource with a path and a relative class. + * @param path Relative or absolute path to the class. + * @param relativeClass Relative class to use as a class loader and for a relative package. + * + * If the relative class is null then the system classloader will be used and the path must be absolute. + */ + public Resource(String path, Class relativeClass) { + this.path = path; + this.relativeClass = relativeClass; + } + + public Class getRelativeClass() { + return relativeClass; + } + + public String getPath() { + return path; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/runtime/CapturedStreamOutput.java b/public/java/src/org/broadinstitute/sting/utils/runtime/CapturedStreamOutput.java new file mode 100755 index 000000000..50622cef1 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/runtime/CapturedStreamOutput.java @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.runtime; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.output.NullOutputStream; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.io.HardThresholdingOutputStream; + +import java.io.*; +import java.util.EnumMap; + +/** + * Stream output captured from a stream. + */ +public class CapturedStreamOutput extends StreamOutput { + private final InputStream processStream; + private final EnumMap outputStreams = new EnumMap(StreamLocation.class); + + /** + * The byte stream to capture content or null if no output string content was requested. + */ + private final ByteArrayOutputStream bufferStream; + + /** + * True if the buffer is truncated. + */ + private boolean bufferTruncated = false; + + /** + * @param settings Settings that define what to capture. + * @param processStream Stream to capture output. + * @param standardStream Stream to write debug output. + */ + public CapturedStreamOutput(OutputStreamSettings settings, InputStream processStream, PrintStream standardStream) { + this.processStream = processStream; + int bufferSize = settings.getBufferSize(); + this.bufferStream = (bufferSize < 0) ? new ByteArrayOutputStream() : new ByteArrayOutputStream(bufferSize); + + for (StreamLocation location : settings.getStreamLocations()) { + OutputStream outputStream; + switch (location) { + case Buffer: + if (bufferSize < 0) { + outputStream = this.bufferStream; + } else { + outputStream = new HardThresholdingOutputStream(bufferSize) { + @Override + protected OutputStream getStream() throws IOException { + return bufferTruncated ? NullOutputStream.NULL_OUTPUT_STREAM : bufferStream; + } + + @Override + protected void thresholdReached() throws IOException { + bufferTruncated = true; + } + }; + } + break; + case File: + try { + outputStream = new FileOutputStream(settings.getOutputFile(), settings.isAppendFile()); + } catch (IOException e) { + throw new UserException.BadInput(e.getMessage()); + } + break; + case Standard: + outputStream = standardStream; + break; + default: + throw new ReviewedStingException("Unexpected stream location: " + location); + } + this.outputStreams.put(location, outputStream); + } + } + + @Override + public byte[] getBufferBytes() { + return bufferStream.toByteArray(); + } + + @Override + public boolean isBufferTruncated() { + return bufferTruncated; + } + + /** + * Drain the input stream to keep the process from backing up until it's empty. + * File streams will be closed automatically when this method returns. + * + * @throws java.io.IOException When unable to read or write. + */ + public void readAndClose() throws IOException { + try { + byte[] buf = new byte[4096]; + int readCount; + while ((readCount = processStream.read(buf)) >= 0) + for (OutputStream outputStream : this.outputStreams.values()) { + outputStream.write(buf, 0, readCount); + } + } finally { + for (StreamLocation location : this.outputStreams.keySet()) { + OutputStream outputStream = this.outputStreams.get(location); + outputStream.flush(); + if (location != StreamLocation.Standard) + IOUtils.closeQuietly(outputStream); + } + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/runtime/InputStreamSettings.java b/public/java/src/org/broadinstitute/sting/utils/runtime/InputStreamSettings.java new file mode 100755 index 000000000..dfa380a68 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/runtime/InputStreamSettings.java @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.runtime; + +import java.io.File; +import java.util.Collections; +import java.util.EnumSet; +import java.util.Set; + +/** + * Settings that define text to write to the process stdin. + */ +public class InputStreamSettings { + private final EnumSet streamLocations = EnumSet.noneOf(StreamLocation.class); + private byte[] inputBuffer; + private File inputFile; + + public InputStreamSettings() { + } + + /** + * @param inputBuffer String to write to stdin. + */ + public InputStreamSettings(String inputBuffer) { + setInputBuffer(inputBuffer); + } + + /** + * @param inputFile File to write to stdin. + */ + public InputStreamSettings(File inputFile) { + setInputFile(inputFile); + } + + /** + * @param inputBuffer String to write to stdin. + * @param inputFile File to write to stdin. + */ + public InputStreamSettings(byte[] inputBuffer, File inputFile) { + setInputBuffer(inputBuffer); + setInputFile(inputFile); + } + + public Set getStreamLocations() { + return Collections.unmodifiableSet(streamLocations); + } + + public byte[] getInputBuffer() { + return inputBuffer; + } + + public void setInputBuffer(String inputBuffer) { + if (inputBuffer == null) + throw new IllegalArgumentException("inputBuffer cannot be null"); + this.streamLocations.add(StreamLocation.Buffer); + this.inputBuffer = inputBuffer.getBytes(); + } + + public void setInputBuffer(byte[] inputBuffer) { + if (inputBuffer == null) + throw new IllegalArgumentException("inputBuffer cannot be null"); + this.streamLocations.add(StreamLocation.Buffer); + this.inputBuffer = inputBuffer; + } + + public void clearInputBuffer() { + this.streamLocations.remove(StreamLocation.Buffer); + this.inputBuffer = null; + } + + public File getInputFile() { + return inputFile; + } + + public void setInputFile(File inputFile) { + if (inputFile == null) + throw new IllegalArgumentException("inputFile cannot be null"); + this.streamLocations.add(StreamLocation.File); + this.inputFile = inputFile; + } + + public void clearInputFile() { + this.streamLocations.remove(StreamLocation.File); + this.inputFile = null; + } + + public void setInputStandard(boolean inputStandard) { + if (inputStandard) + this.streamLocations.add(StreamLocation.Standard); + else + this.streamLocations.remove(StreamLocation.Standard); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/runtime/OutputStreamSettings.java b/public/java/src/org/broadinstitute/sting/utils/runtime/OutputStreamSettings.java new file mode 100755 index 000000000..468ece178 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/runtime/OutputStreamSettings.java @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.runtime; + +import java.io.File; +import java.util.Collections; +import java.util.EnumSet; +import java.util.Set; + +/** + * Settings that define text to capture from a process stream. + */ +public class OutputStreamSettings { + private final EnumSet streamLocations = EnumSet.noneOf(StreamLocation.class); + private int bufferSize; + private File outputFile; + private boolean appendFile; + + public OutputStreamSettings() { + } + + /** + * @param bufferSize The number of bytes to capture, or -1 for unlimited. + */ + public OutputStreamSettings(int bufferSize) { + setBufferSize(bufferSize); + } + + /** + * @param outputFile The file to write output to. + */ + public OutputStreamSettings(File outputFile) { + setOutputFile(outputFile); + } + + /** + * @param outputFile The file to write output to. + * @param append true if the output file should be appended to. + */ + public OutputStreamSettings(File outputFile, boolean append) { + setOutputFile(outputFile, append); + } + + public OutputStreamSettings(int bufferSize, File outputFile, boolean appendFile) { + setBufferSize(bufferSize); + setOutputFile(outputFile, appendFile); + } + + public Set getStreamLocations() { + return Collections.unmodifiableSet(streamLocations); + } + + public int getBufferSize() { + return bufferSize; + } + + public void setBufferSize(int bufferSize) { + this.streamLocations.add(StreamLocation.Buffer); + this.bufferSize = bufferSize; + } + + public void clearBufferSize() { + this.streamLocations.remove(StreamLocation.Buffer); + this.bufferSize = 0; + } + + public File getOutputFile() { + return outputFile; + } + + public boolean isAppendFile() { + return appendFile; + } + + /** + * Overwrites the outputFile with the process output. + * + * @param outputFile File to overwrite. + */ + public void setOutputFile(File outputFile) { + setOutputFile(outputFile, false); + } + + public void setOutputFile(File outputFile, boolean append) { + if (outputFile == null) + throw new IllegalArgumentException("outputFile cannot be null"); + streamLocations.add(StreamLocation.File); + this.outputFile = outputFile; + this.appendFile = append; + } + + public void clearOutputFile() { + streamLocations.remove(StreamLocation.File); + this.outputFile = null; + this.appendFile = false; + } + + public void printStandard(boolean print) { + if (print) + this.streamLocations.add(StreamLocation.Standard); + else + this.streamLocations.remove(StreamLocation.Standard); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessController.java b/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessController.java new file mode 100755 index 000000000..6a3f9c753 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessController.java @@ -0,0 +1,363 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.runtime; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.*; + +/** + * Facade to Runtime.exec() and java.lang.Process. Handles + * running a process to completion and returns stdout and stderr + * as strings. Creates separate threads for reading stdout and stderr, + * then reuses those threads for each process most efficient use is + * to create one of these and use it repeatedly. Instances are not + * thread-safe, however. + * + * TODO: java.io sometimes zombies the backround threads locking up on read(). + * Supposedly NIO has better ways of interrupting a blocked stream but will + * require a little bit of refactoring. + * + * @author Michael Koehrsen + * @author Khalid Shakir + */ +public class ProcessController { + private static Logger logger = Logger.getLogger(ProcessController.class); + + private static enum ProcessStream {Stdout, Stderr} + + // Tracks running processes. + private static final Set running = Collections.synchronizedSet(new HashSet()); + + // Tracks this running process. + private Process process; + + // Threads that capture stdout and stderr + private final OutputCapture stdoutCapture; + private final OutputCapture stderrCapture; + + // When a caller destroyes a controller a new thread local version will be created + private boolean destroyed = false; + + // Communication channels with output capture threads + + // Holds the stdout and stderr sent to the background capture threads + private final Map toCapture = + new EnumMap(ProcessStream.class); + + // Holds the results of the capture from the background capture threads. + // May be the content via toCapture or an StreamOutput.EMPTY if the capture was interrupted. + private final Map fromCapture = + new EnumMap(ProcessStream.class); + + // Useful for debugging if background threads have shut down correctly + private static int nextControllerId = 0; + private final int controllerId; + + public ProcessController() { + // Start the background threads for this controller. + synchronized (running) { + controllerId = nextControllerId++; + } + stdoutCapture = new OutputCapture(ProcessStream.Stdout, controllerId); + stderrCapture = new OutputCapture(ProcessStream.Stderr, controllerId); + stdoutCapture.start(); + stderrCapture.start(); + } + + /** + * Returns a thread local ProcessController. + * Should NOT be closed when finished so it can be reused by the thread. + * + * @return a thread local ProcessController. + */ + public static ProcessController getThreadLocal() { + // If the local controller was destroyed get a fresh instance. + if (threadProcessController.get().destroyed) + threadProcessController.remove(); + return threadProcessController.get(); + } + + /** + * Thread local process controller container. + */ + private static final ThreadLocal threadProcessController = + new ThreadLocal() { + @Override + protected ProcessController initialValue() { + return new ProcessController(); + } + }; + + /** + * Similar to Runtime.exec() but drains the output and error streams. + * + * @param command Command to run. + * @return The result code. + */ + public static int exec(String[] command) { + ProcessController controller = ProcessController.getThreadLocal(); + return controller.exec(new ProcessSettings(command)).getExitValue(); + } + + /** + * Executes a command line program with the settings and waits for it to return, + * processing the output on a background thread. + * + * @param settings Settings to be run. + * @return The output of the command. + */ + public ProcessOutput exec(ProcessSettings settings) { + if (destroyed) + throw new IllegalStateException("This controller was destroyed"); + + ProcessBuilder builder = new ProcessBuilder(settings.getCommand()); + builder.directory(settings.getDirectory()); + + Map settingsEnvironment = settings.getEnvironment(); + if (settingsEnvironment != null) { + Map builderEnvironment = builder.environment(); + builderEnvironment.clear(); + builderEnvironment.putAll(settingsEnvironment); + } + + builder.redirectErrorStream(settings.isRedirectErrorStream()); + + StreamOutput stdout = null; + StreamOutput stderr = null; + + // Start the process running. + + try { + synchronized (toCapture) { + process = builder.start(); + } + running.add(this); + } catch (IOException e) { + throw new ReviewedStingException("Unable to start command: " + StringUtils.join(builder.command(), " ")); + } + + int exitCode; + + try { + // Notify the background threads to start capturing. + synchronized (toCapture) { + toCapture.put(ProcessStream.Stdout, + new CapturedStreamOutput(settings.getStdoutSettings(), process.getInputStream(), System.out)); + toCapture.put(ProcessStream.Stderr, + new CapturedStreamOutput(settings.getStderrSettings(), process.getErrorStream(), System.err)); + toCapture.notifyAll(); + } + + // Write stdin content + InputStreamSettings stdinSettings = settings.getStdinSettings(); + Set streamLocations = stdinSettings.getStreamLocations(); + if (!streamLocations.isEmpty()) { + try { + OutputStream stdinStream = process.getOutputStream(); + for (StreamLocation location : streamLocations) { + InputStream inputStream; + switch (location) { + case Buffer: + inputStream = new ByteArrayInputStream(stdinSettings.getInputBuffer()); + break; + case File: + try { + inputStream = FileUtils.openInputStream(stdinSettings.getInputFile()); + } catch (IOException e) { + throw new UserException.BadInput(e.getMessage()); + } + break; + case Standard: + inputStream = System.in; + break; + default: + throw new ReviewedStingException("Unexpected stream location: " + location); + } + try { + IOUtils.copy(inputStream, stdinStream); + } finally { + if (location != StreamLocation.Standard) + IOUtils.closeQuietly(inputStream); + } + } + stdinStream.flush(); + } catch (IOException e) { + throw new ReviewedStingException("Error writing to stdin on command: " + StringUtils.join(builder.command(), " "), e); + } + } + + // Wait for the process to complete. + try { + process.getOutputStream().close(); + process.waitFor(); + } catch (IOException e) { + throw new ReviewedStingException("Unable to close stdin on command: " + StringUtils.join(builder.command(), " "), e); + } catch (InterruptedException e) { + throw new ReviewedStingException("Process interrupted", e); + } finally { + while (!destroyed && stdout == null || stderr == null) { + synchronized (fromCapture) { + if (fromCapture.containsKey(ProcessStream.Stdout)) + stdout = fromCapture.remove(ProcessStream.Stdout); + if (fromCapture.containsKey(ProcessStream.Stderr)) + stderr = fromCapture.remove(ProcessStream.Stderr); + try { + if (stdout == null || stderr == null) + fromCapture.wait(); + } catch (InterruptedException e) { + // Log the error, ignore the interrupt and wait patiently + // for the OutputCaptures to (via finally) return their + // stdout and stderr. + logger.error(e); + } + } + } + + if (destroyed) { + if (stdout == null) + stdout = StreamOutput.EMPTY; + if (stderr == null) + stderr = StreamOutput.EMPTY; + } + } + } finally { + synchronized (toCapture) { + exitCode = process.exitValue(); + process = null; + } + running.remove(this); + } + + return new ProcessOutput(exitCode, stdout, stderr); + } + + /** + * @return The set of still running processes. + */ + public static Set getRunning() { + synchronized (running) { + return new HashSet(running); + } + } + + /** + * Stops the process from running and tries to ensure process is cleaned up properly. + * NOTE: sub-processes started by process may be zombied with their parents set to pid 1. + * NOTE: capture threads may block on read. + * TODO: Try to use NIO to interrupt streams. + */ + public void tryDestroy() { + destroyed = true; + synchronized (toCapture) { + if (process != null) { + process.destroy(); + IOUtils.closeQuietly(process.getInputStream()); + IOUtils.closeQuietly(process.getErrorStream()); + } + stdoutCapture.interrupt(); + stderrCapture.interrupt(); + toCapture.notifyAll(); + } + } + + @Override + protected void finalize() throws Throwable { + try { + tryDestroy(); + } catch (Exception e) { + logger.error(e); + } + super.finalize(); + } + + private class OutputCapture extends Thread { + private final int controllerId; + private final ProcessStream key; + + /** + * Reads in the output of a stream on a background thread to keep the output pipe from backing up and freezing the called process. + * + * @param key The stdout or stderr key for this output capture. + * @param controllerId Unique id of the controller. + */ + public OutputCapture(ProcessStream key, int controllerId) { + super(String.format("OutputCapture-%d-%s-%s-%d", controllerId, key.name().toLowerCase(), + Thread.currentThread().getName(), Thread.currentThread().getId())); + this.controllerId = controllerId; + this.key = key; + setDaemon(true); + } + + /** + * Runs the capture. + */ + @Override + public void run() { + while (!destroyed) { + StreamOutput processStream = StreamOutput.EMPTY; + try { + // Wait for a new input stream to be passed from this process controller. + CapturedStreamOutput capturedProcessStream = null; + while (!destroyed && capturedProcessStream == null) { + synchronized (toCapture) { + if (toCapture.containsKey(key)) { + capturedProcessStream = toCapture.remove(key); + } else { + toCapture.wait(); + } + } + } + + if (!destroyed) { + // Read in the input stream + processStream = capturedProcessStream; + capturedProcessStream.readAndClose(); + } + } catch (InterruptedException e) { + logger.info("OutputCapture interrupted, exiting"); + break; + } catch (IOException e) { + logger.error("Error reading process output", e); + } finally { + // Send the string back to the process controller. + synchronized (fromCapture) { + fromCapture.put(key, processStream); + fromCapture.notify(); + } + } + } + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessOutput.java b/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessOutput.java new file mode 100755 index 000000000..211008950 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessOutput.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.runtime; + +public class ProcessOutput { + private final int exitValue; + private final StreamOutput stdout; + private final StreamOutput stderr; + + /** + * The output of a process. + * + * @param exitValue The exit value. + * @param stdout The capture of stdout as defined by the stdout OutputStreamSettings. + * @param stderr The capture of stderr as defined by the stderr OutputStreamSettings. + */ + public ProcessOutput(int exitValue, StreamOutput stdout, StreamOutput stderr) { + this.exitValue = exitValue; + this.stdout = stdout; + this.stderr = stderr; + } + + public int getExitValue() { + return exitValue; + } + + public StreamOutput getStdout() { + return stdout; + } + + public StreamOutput getStderr() { + return stderr; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessSettings.java b/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessSettings.java new file mode 100755 index 000000000..b9f67f3a4 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/runtime/ProcessSettings.java @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.runtime; + +import com.sun.corba.se.spi.orbutil.fsm.Input; + +import java.io.File; +import java.util.Map; + +public class ProcessSettings { + private String[] command; + private Map environment; + private File directory; + private boolean redirectErrorStream; + private InputStreamSettings stdinSettings; + private OutputStreamSettings stdoutSettings; + private OutputStreamSettings stderrSettings; + + /** + * @param command Command line to run. + */ + public ProcessSettings(String[] command) { + this(command, false, null, null, null, null, null); + } + + /** + * @param command Command line to run. + * @param redirectErrorStream true if stderr should be sent to stdout. + * @param environment Environment settings to override System.getEnv, or null to use System.getEnv. + * @param directory The directory to run the command in, or null to run in the current directory. + * @param stdinSettings Settings for writing to the process stdin. + * @param stdoutSettings Settings for capturing the process stdout. + * @param stderrSettings Setting for capturing the process stderr. + */ + public ProcessSettings(String[] command, boolean redirectErrorStream, File directory, Map environment, + InputStreamSettings stdinSettings, OutputStreamSettings stdoutSettings, OutputStreamSettings stderrSettings) { + this.command = checkCommand(command); + this.redirectErrorStream = redirectErrorStream; + this.directory = directory; + this.environment = environment; + this.stdinSettings = checkSettings(stdinSettings); + this.stdoutSettings = checkSettings(stdoutSettings); + this.stderrSettings = checkSettings(stderrSettings); + } + + public String[] getCommand() { + return command; + } + + public void setCommand(String[] command) { + this.command = checkCommand(command); + } + + public boolean isRedirectErrorStream() { + return redirectErrorStream; + } + + public void setRedirectErrorStream(boolean redirectErrorStream) { + this.redirectErrorStream = redirectErrorStream; + } + + public File getDirectory() { + return directory; + } + + public void setDirectory(File directory) { + this.directory = directory; + } + + public Map getEnvironment() { + return environment; + } + + public void setEnvironment(Map environment) { + this.environment = environment; + } + + public InputStreamSettings getStdinSettings() { + return stdinSettings; + } + + public void setStdinSettings(InputStreamSettings stdinSettings) { + this.stdinSettings = checkSettings(stdinSettings); + } + + public OutputStreamSettings getStdoutSettings() { + return stdoutSettings; + } + + public void setStdoutSettings(OutputStreamSettings stdoutSettings) { + this.stdoutSettings = checkSettings(stdoutSettings); + } + + public OutputStreamSettings getStderrSettings() { + return stderrSettings; + } + + public void setStderrSettings(OutputStreamSettings stderrSettings) { + this.stderrSettings = checkSettings(stderrSettings); + } + + protected String[] checkCommand(String[] command) { + if (command == null) + throw new IllegalArgumentException("Command is not allowed to be null"); + for (String s: command) + if (s == null) + throw new IllegalArgumentException("Command is not allowed to contain nulls"); + return command; + } + + protected InputStreamSettings checkSettings(InputStreamSettings settings) { + return settings == null ? new InputStreamSettings() : settings; + } + + protected OutputStreamSettings checkSettings(OutputStreamSettings settings) { + return settings == null ? new OutputStreamSettings() : settings; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/runtime/StreamLocation.java b/public/java/src/org/broadinstitute/sting/utils/runtime/StreamLocation.java new file mode 100755 index 000000000..df72180f1 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/runtime/StreamLocation.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.runtime; + +/** + * Where to read/write a stream + */ +public enum StreamLocation { + Buffer, File, Standard +} diff --git a/public/java/src/org/broadinstitute/sting/utils/runtime/StreamOutput.java b/public/java/src/org/broadinstitute/sting/utils/runtime/StreamOutput.java new file mode 100755 index 000000000..5dc94815f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/runtime/StreamOutput.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.runtime; + +/** + * The content of stdout or stderr. + */ +public abstract class StreamOutput { + /** + * Empty stream output when no output is captured due to an error. + */ + public static final StreamOutput EMPTY = new StreamOutput() { + @Override + public byte[] getBufferBytes() { + return new byte[0]; + } + + @Override + public boolean isBufferTruncated() { + return false; + } + }; + + /** + * Returns the content as a string. + * + * @return The content as a string. + */ + public String getBufferString() { + return new String(getBufferBytes()); + } + + /** + * Returns the content as a string. + * + * @return The content as a string. + */ + public abstract byte[] getBufferBytes(); + + /** + * Returns true if the buffer was truncated. + * + * @return true if the buffer was truncated. + */ + public abstract boolean isBufferTruncated(); +} diff --git a/public/java/test/org/broadinstitute/sting/utils/R/RScriptLibraryUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/R/RScriptLibraryUnitTest.java new file mode 100644 index 000000000..19fd5b316 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/R/RScriptLibraryUnitTest.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.R; + +import org.apache.commons.io.FileUtils; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; + +public class RScriptLibraryUnitTest { + @Test + public void testProperties() { + Assert.assertEquals(RScriptLibrary.GSALIB.getLibraryName(), "gsalib"); + Assert.assertEquals(RScriptLibrary.GSALIB.getResourcePath(), "gsalib.tar.gz"); + } + + @Test + public void testWriteTemp() { + File file = RScriptLibrary.GSALIB.writeTemp(); + Assert.assertTrue(file.exists(), "R library was not written to temp file: " + file); + FileUtils.deleteQuietly(file); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/io/IOUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/io/IOUtilsUnitTest.java new file mode 100644 index 000000000..4caf7f485 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/io/IOUtilsUnitTest.java @@ -0,0 +1,197 @@ +package org.broadinstitute.sting.utils.io; + +import org.apache.commons.io.FileUtils; +import org.broadinstitute.sting.BaseTest; +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class IOUtilsUnitTest extends BaseTest { + @Test + public void testGoodTempDir() { + IOUtils.checkTempDir(new File("/tmp/queue")); + } + + @Test(expectedExceptions=UserException.BadTmpDir.class) + public void testBadTempDir() { + IOUtils.checkTempDir(new File("/tmp")); + } + + @Test + public void testAbsoluteSubDir() { + File subDir = IOUtils.absolute(new File("."), new File("/path/to/file")); + Assert.assertEquals(subDir, new File("/path/to/file")); + + subDir = IOUtils.absolute(new File("/different/path"), new File("/path/to/file")); + Assert.assertEquals(subDir, new File("/path/to/file")); + + subDir = IOUtils.absolute(new File("/different/path"), new File(".")); + Assert.assertEquals(subDir, new File("/different/path")); + } + + @Test + public void testRelativeSubDir() throws IOException { + File subDir = IOUtils.absolute(new File("."), new File("path/to/file")); + Assert.assertEquals(subDir.getCanonicalFile(), new File("path/to/file").getCanonicalFile()); + + subDir = IOUtils.absolute(new File("/different/path"), new File("path/to/file")); + Assert.assertEquals(subDir, new File("/different/path/path/to/file")); + } + + @Test + public void testDottedSubDir() throws IOException { + File subDir = IOUtils.absolute(new File("."), new File("path/../to/file")); + Assert.assertEquals(subDir.getCanonicalFile(), new File("path/../to/./file").getCanonicalFile()); + + subDir = IOUtils.absolute(new File("."), new File("/path/../to/file")); + Assert.assertEquals(subDir, new File("/path/../to/file")); + + subDir = IOUtils.absolute(new File("/different/../path"), new File("path/to/file")); + Assert.assertEquals(subDir, new File("/different/../path/path/to/file")); + + subDir = IOUtils.absolute(new File("/different/./path"), new File("/path/../to/file")); + Assert.assertEquals(subDir, new File("/path/../to/file")); + } + + @Test + public void testTempDir() { + File tempDir = IOUtils.tempDir("Q-Unit-Test", "", new File("queueTempDirToDelete")); + Assert.assertTrue(tempDir.exists()); + Assert.assertFalse(tempDir.isFile()); + Assert.assertTrue(tempDir.isDirectory()); + boolean deleted = IOUtils.tryDelete(tempDir); + Assert.assertTrue(deleted); + Assert.assertFalse(tempDir.exists()); + } + + @Test + public void testDirLevel() { + File dir = IOUtils.dirLevel(new File("/path/to/directory"), 1); + Assert.assertEquals(dir, new File("/path")); + + dir = IOUtils.dirLevel(new File("/path/to/directory"), 2); + Assert.assertEquals(dir, new File("/path/to")); + + dir = IOUtils.dirLevel(new File("/path/to/directory"), 3); + Assert.assertEquals(dir, new File("/path/to/directory")); + + dir = IOUtils.dirLevel(new File("/path/to/directory"), 4); + Assert.assertEquals(dir, new File("/path/to/directory")); + } + + @Test + public void testAbsolute() { + File dir = IOUtils.absolute(new File("/path/./to/./directory/.")); + Assert.assertEquals(dir, new File("/path/to/directory")); + + dir = IOUtils.absolute(new File("/")); + Assert.assertEquals(dir, new File("/")); + + dir = IOUtils.absolute(new File("/.")); + Assert.assertEquals(dir, new File("/")); + + dir = IOUtils.absolute(new File("/././.")); + Assert.assertEquals(dir, new File("/")); + + dir = IOUtils.absolute(new File("/./directory/.")); + Assert.assertEquals(dir, new File("/directory")); + + dir = IOUtils.absolute(new File("/./directory/./")); + Assert.assertEquals(dir, new File("/directory")); + + dir = IOUtils.absolute(new File("/./directory./")); + Assert.assertEquals(dir, new File("/directory.")); + + dir = IOUtils.absolute(new File("/./.directory/")); + Assert.assertEquals(dir, new File("/.directory")); + } + + @Test + public void testTail() throws IOException { + List lines = Arrays.asList( + "chr18_random 4262 3154410390 50 51", + "chr19_random 301858 3154414752 50 51", + "chr21_random 1679693 3154722662 50 51", + "chr22_random 257318 3156435963 50 51", + "chrX_random 1719168 3156698441 50 51"); + List tail = IOUtils.tail(new File(BaseTest.hg18Reference + ".fai"), 5); + Assert.assertEquals(tail.size(), 5); + for (int i = 0; i < 5; i++) + Assert.assertEquals(tail.get(i), lines.get(i)); + } + + @Test + public void testWriteSystemFile() throws IOException { + File temp = createTempFile("temp.", ".properties"); + try { + IOUtils.writeResource(new Resource("StingText.properties", null), temp); + } finally { + FileUtils.deleteQuietly(temp); + } + } + + @Test + public void testWriteSystemTempFile() throws IOException { + File temp = IOUtils.writeTempResource(new Resource("StingText.properties", null)); + try { + Assert.assertTrue(temp.getName().startsWith("StingText"), "File does not start with 'StingText.': " + temp); + Assert.assertTrue(temp.getName().endsWith(".properties"), "File does not end with '.properties': " + temp); + } finally { + FileUtils.deleteQuietly(temp); + } + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testMissingSystemFile() throws IOException { + File temp = createTempFile("temp.", ".properties"); + try { + IOUtils.writeResource(new Resource("MissingStingText.properties", null), temp); + } finally { + FileUtils.deleteQuietly(temp); + } + } + + @Test + public void testWriteRelativeFile() throws IOException { + File temp = createTempFile("temp.", ".properties"); + try { + IOUtils.writeResource(new Resource("/StingText.properties", IOUtils.class), temp); + } finally { + FileUtils.deleteQuietly(temp); + } + } + + @Test + public void testWriteRelativeTempFile() throws IOException { + File temp = IOUtils.writeTempResource(new Resource("/StingText.properties", IOUtils.class)); + try { + Assert.assertTrue(temp.getName().startsWith("StingText"), "File does not start with 'StingText.': " + temp); + Assert.assertTrue(temp.getName().endsWith(".properties"), "File does not end with '.properties': " + temp); + } finally { + FileUtils.deleteQuietly(temp); + } + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testMissingRelativeFile() throws IOException { + File temp = createTempFile("temp.", ".properties"); + try { + // Looking for /org/broadinstitute/sting/utils/file/StingText.properties + IOUtils.writeResource(new Resource("StingText.properties", IOUtils.class), temp); + } finally { + FileUtils.deleteQuietly(temp); + } + } + + @Test + public void testResourceProperties() { + Resource resource = new Resource("foo", Resource.class); + Assert.assertEquals(resource.getPath(), "foo"); + Assert.assertEquals(resource.getRelativeClass(), Resource.class); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/runtime/ProcessControllerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/runtime/ProcessControllerUnitTest.java new file mode 100644 index 000000000..7a31ceee0 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/runtime/ProcessControllerUnitTest.java @@ -0,0 +1,517 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.runtime; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang.StringUtils; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.io.IOUtils; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +public class ProcessControllerUnitTest extends BaseTest { + private static final String NL = String.format("%n"); + + @Test(timeOut = 60 * 1000) + public void testDestroyThreadLocal() throws InterruptedException { + for (int i = 0; i < 3; i++) { + final ProcessController controller = ProcessController.getThreadLocal(); + final ProcessSettings job = new ProcessSettings( + new String[] {"sh", "-c", "echo Hello World && sleep 600 && echo Goodbye"}); + job.getStdoutSettings().setBufferSize(-1); + + Thread t = new Thread(new Runnable() { + @Override + public void run() { + System.out.println("BACK: Starting on background thread"); + ProcessOutput result = controller.exec(job); + // Assert in background thread doesn't make it to main thread but does print a trace. + Assert.assertTrue(result.getExitValue() != 0, "Destroy-attempted job returned zero exit status"); + System.out.println("BACK: Background thread exiting"); + } + }); + + System.out.println("MAIN: Starting background thread"); + t.start(); + System.out.println("MAIN: Sleeping main thread 3s"); + Thread.sleep(3000); + System.out.println("MAIN: Destroying job"); + controller.tryDestroy(); + System.out.println("MAIN: Not waiting on background thread to exit"); + // Using standard java.io this was blocking on linux. + // TODO: try again with NIO. + //t.join(); + //System.out.println("MAIN: Background thread exited"); + } + } + + @Test + public void testReuseAfterError() { + ProcessController controller = new ProcessController(); + + ProcessSettings job; + + for (int i = 0; i < 3; i++) { + // Test bad command + job = new ProcessSettings(new String[] {"no_such_command"}); + try { + controller.exec(job); + } catch (ReviewedStingException e) { + /* Was supposed to throw an exception */ + } + + // Test exit != 0 + job = new ProcessSettings(new String[] {"cat", "non_existent_file"}); + int exitValue = controller.exec(job).getExitValue(); + Assert.assertTrue(exitValue != 0, "'cat' non existent file returned 0"); + + // Text success + job = new ProcessSettings(new String[] {"echo", "Hello World"}); + exitValue = controller.exec(job).getExitValue(); + Assert.assertEquals(exitValue, 0, "Echo failed"); + } + } + + @Test + public void testEnvironment() { + String key = "MY_NEW_VAR"; + String value = "value is here"; + + ProcessSettings job = new ProcessSettings(new String[] {"sh", "-c", "echo $"+key}); + job.getStdoutSettings().setBufferSize(-1); + job.setRedirectErrorStream(true); + + Map env = new HashMap(System.getenv()); + env.put(key, value); + job.setEnvironment(env); + + ProcessController controller = new ProcessController(); + ProcessOutput result = controller.exec(job); + int exitValue = result.getExitValue(); + + Assert.assertEquals(exitValue, 0, "Echo environment variable failed"); + Assert.assertEquals(result.getStdout().getBufferString(), value + NL, "Echo environment returned unexpected output"); + } + + @Test + public void testDirectory() throws IOException { + File dir = null; + try { + dir = IOUtils.tempDir("temp.", "").getCanonicalFile(); + + ProcessSettings job = new ProcessSettings(new String[] {"pwd"}); + job.getStdoutSettings().setBufferSize(-1); + job.setRedirectErrorStream(true); + job.setDirectory(dir); + + ProcessController controller = new ProcessController(); + ProcessOutput result = controller.exec(job); + int exitValue = result.getExitValue(); + + Assert.assertEquals(exitValue, 0, "Getting working directory failed"); + + Assert.assertEquals(result.getStdout().getBufferString(), dir.getAbsolutePath() + NL, + "Setting/getting working directory returned unexpected output"); + } finally { + FileUtils.deleteQuietly(dir); + } + } + + @Test + public void testReadStdInBuffer() { + String bufferText = "Hello from buffer"; + ProcessSettings job = new ProcessSettings(new String[] {"cat"}); + job.getStdoutSettings().setBufferSize(-1); + job.setRedirectErrorStream(true); + job.getStdinSettings().setInputBuffer(bufferText); + + ProcessController controller = new ProcessController(); + ProcessOutput output = controller.exec(job); + + Assert.assertEquals(output.getStdout().getBufferString(), bufferText, + "Unexpected output from cat stdin buffer"); + } + + @Test + public void testReadStdInFile() { + File input = null; + try { + String fileText = "Hello from file"; + input = IOUtils.writeTempFile(fileText, "stdin.", ".txt", null); + + ProcessSettings job = new ProcessSettings(new String[] {"cat"}); + job.getStdoutSettings().setBufferSize(-1); + job.setRedirectErrorStream(true); + job.getStdinSettings().setInputFile(input); + + ProcessController controller = new ProcessController(); + ProcessOutput output = controller.exec(job); + + Assert.assertEquals(output.getStdout().getBufferString(), fileText, + "Unexpected output from cat stdin file"); + } finally { + FileUtils.deleteQuietly(input); + } + } + + @Test + public void testWriteStdOut() { + ProcessSettings job = new ProcessSettings(new String[] {"echo", "Testing to stdout"}); + // Not going to call the System.setOut() for now. Just running a basic visual test. + job.getStdoutSettings().printStandard(true); + job.setRedirectErrorStream(true); + + System.out.println("testWriteStdOut: Writing two lines to std out..."); + ProcessController controller = new ProcessController(); + controller.exec(job); + job.setCommand(new String[]{"cat", "non_existent_file"}); + controller.exec(job); + System.out.println("testWriteStdOut: ...two lines should have been printed to std out"); + } + + @Test + public void testErrorToOut() throws IOException { + File outFile = null; + File errFile = null; + try { + outFile = BaseTest.createTempFile("temp", ""); + errFile = BaseTest.createTempFile("temp", ""); + + ProcessSettings job = new ProcessSettings(new String[]{"cat", "non_existent_file"}); + job.getStdoutSettings().setOutputFile(outFile); + job.getStdoutSettings().setBufferSize(-1); + job.getStderrSettings().setOutputFile(errFile); + job.getStderrSettings().setBufferSize(-1); + job.setRedirectErrorStream(true); + + ProcessOutput result = new ProcessController().exec(job); + int exitValue = result.getExitValue(); + + Assert.assertTrue(exitValue != 0, "'cat' non existent file returned 0"); + + String fileString, bufferString; + + fileString = FileUtils.readFileToString(outFile); + Assert.assertTrue(fileString.length() > 0, "Out file was length 0"); + + bufferString = result.getStdout().getBufferString(); + Assert.assertTrue(bufferString.length() > 0, "Out buffer was length 0"); + + Assert.assertFalse(result.getStdout().isBufferTruncated(), "Out buffer was truncated"); + Assert.assertEquals(bufferString.length(), fileString.length(), "Out buffer length did not match file length"); + + fileString = FileUtils.readFileToString(errFile); + Assert.assertEquals(fileString, "", "Unexpected output to err file"); + + bufferString = result.getStderr().getBufferString(); + Assert.assertEquals(bufferString, "", "Unexepected output to err buffer"); + } finally { + FileUtils.deleteQuietly(outFile); + FileUtils.deleteQuietly(errFile); + } + } + + @Test + public void testErrorToErr() throws IOException { + File outFile = null; + File errFile = null; + try { + outFile = BaseTest.createTempFile("temp", ""); + errFile = BaseTest.createTempFile("temp", ""); + + ProcessSettings job = new ProcessSettings(new String[]{"cat", "non_existent_file"}); + job.getStdoutSettings().setOutputFile(outFile); + job.getStdoutSettings().setBufferSize(-1); + job.getStderrSettings().setOutputFile(errFile); + job.getStderrSettings().setBufferSize(-1); + job.setRedirectErrorStream(false); + + ProcessOutput result = new ProcessController().exec(job); + int exitValue = result.getExitValue(); + + Assert.assertTrue(exitValue != 0, "'cat' non existent file returned 0"); + + String fileString, bufferString; + + fileString = FileUtils.readFileToString(errFile); + Assert.assertTrue(fileString.length() > 0, "Err file was length 0"); + + bufferString = result.getStderr().getBufferString(); + Assert.assertTrue(bufferString.length() > 0, "Err buffer was length 0"); + + Assert.assertFalse(result.getStderr().isBufferTruncated(), "Err buffer was truncated"); + Assert.assertEquals(bufferString.length(), fileString.length(), "Err buffer length did not match file length"); + + fileString = FileUtils.readFileToString(outFile); + Assert.assertEquals(fileString, "", "Unexpected output to out file"); + + bufferString = result.getStdout().getBufferString(); + Assert.assertEquals(bufferString, "", "Unexepected output to out buffer"); + } finally { + FileUtils.deleteQuietly(outFile); + FileUtils.deleteQuietly(errFile); + } + } + + private static final String TRUNCATE_TEXT = "Hello World"; + private static final byte[] TRUNCATE_OUTPUT_BYTES = (TRUNCATE_TEXT + NL).getBytes(); + + /** + * @return Test truncating content vs. not truncating (run at -1/+1 size) + */ + @DataProvider(name = "truncateSizes") + public Object[][] getTruncateBufferSizes() { + int l = TRUNCATE_OUTPUT_BYTES.length; + return new Object[][]{ + new Object[]{0, 0}, + new Object[]{l, l}, + new Object[]{l + 1, l}, + new Object[]{l - 1, l - 1} + }; + } + + @Test(dataProvider = "truncateSizes") + public void testTruncateBuffer(int truncateLen, int expectedLen) { + byte[] expected = Arrays.copyOf(TRUNCATE_OUTPUT_BYTES, expectedLen); + + String[] command = {"echo", TRUNCATE_TEXT}; + ProcessController controller = new ProcessController(); + + ProcessSettings job = new ProcessSettings(command); + job.getStdoutSettings().setBufferSize(truncateLen); + ProcessOutput result = controller.exec(job); + + int exitValue = result.getExitValue(); + + Assert.assertEquals(exitValue, 0, + String.format("Echo returned %d: %s", exitValue, TRUNCATE_TEXT)); + + byte[] bufferBytes = result.getStdout().getBufferBytes(); + + Assert.assertEquals(bufferBytes, expected, + String.format("Output buffer didn't match (%d vs %d)", expected.length, bufferBytes.length)); + + boolean truncated = result.getStdout().isBufferTruncated(); + + Assert.assertEquals(truncated, TRUNCATE_OUTPUT_BYTES.length > truncateLen, + "Unexpected buffer truncation result"); + } + + private static final String[] LONG_COMMAND = getLongCommand(); + private static final String LONG_COMMAND_STRING = StringUtils.join(LONG_COMMAND, " "); + private static final String LONG_COMMAND_DESCRIPTION = ""; + + @DataProvider(name = "echoCommands") + public Object[][] getEchoCommands() { + + new EchoCommand(new String[]{"echo", "Hello", "World"}, "Hello World" + NL); + new EchoCommand(new String[]{"echo", "'Hello", "World"}, "'Hello World" + NL); + new EchoCommand(new String[]{"echo", "Hello", "World'"}, "Hello World'" + NL); + new EchoCommand(new String[]{"echo", "'Hello", "World'"}, "'Hello World'" + NL); + + String[] longCommand = new String[LONG_COMMAND.length + 1]; + longCommand[0] = "echo"; + System.arraycopy(LONG_COMMAND, 0, longCommand, 1, LONG_COMMAND.length); + new EchoCommand(longCommand, LONG_COMMAND_STRING + NL) { + @Override + public String toString() { + return LONG_COMMAND_DESCRIPTION; + } + }; + + return TestDataProvider.getTests(EchoCommand.class); + } + + @Test(dataProvider = "echoCommands") + public void testEcho(EchoCommand script) throws IOException { + File outputFile = null; + try { + outputFile = BaseTest.createTempFile("temp", ""); + + ProcessSettings job = new ProcessSettings(script.command); + if (script.output != null) { + job.getStdoutSettings().setOutputFile(outputFile); + job.getStdoutSettings().setBufferSize(script.output.getBytes().length); + } + + ProcessOutput result = new ProcessController().exec(job); + int exitValue = result.getExitValue(); + + Assert.assertEquals(exitValue, 0, + String.format("Echo returned %d: %s", exitValue, script)); + + if (script.output != null) { + + String fileString = FileUtils.readFileToString(outputFile); + Assert.assertEquals(fileString, script.output, + String.format("Output file didn't match (%d vs %d): %s", + fileString.length(), script.output.length(), script)); + + String bufferString = result.getStdout().getBufferString(); + Assert.assertEquals(bufferString, script.output, + String.format("Output content didn't match (%d vs %d): %s", + bufferString.length(), script.output.length(), script)); + + Assert.assertFalse(result.getStdout().isBufferTruncated(), + "Output content was truncated: " + script); + } + } finally { + FileUtils.deleteQuietly(outputFile); + } + } + + @Test(expectedExceptions = ReviewedStingException.class) + public void testUnableToStart() { + ProcessSettings job = new ProcessSettings(new String[]{"no_such_command"}); + new ProcessController().exec(job); + } + + @DataProvider(name = "scriptCommands") + public Object[][] getScriptCommands() { + new ScriptCommand(true, "echo Hello World", "Hello World" + NL); + new ScriptCommand(false, "echo 'Hello World", null); + new ScriptCommand(false, "echo Hello World'", null); + new ScriptCommand(true, "echo 'Hello World'", "Hello World" + NL); + new ScriptCommand(true, "echo \"Hello World\"", "Hello World" + NL); + new ScriptCommand(false, "no_such_echo Hello World", null); + new ScriptCommand(true, "echo #", NL); + new ScriptCommand(true, "echo \\#", "#" + NL); + new ScriptCommand(true, "echo \\\\#", "\\#" + NL); + + new ScriptCommand(true, "echo " + LONG_COMMAND_STRING, LONG_COMMAND_STRING + NL) { + @Override + public String toString() { + return LONG_COMMAND_DESCRIPTION; + } + }; + + return TestDataProvider.getTests(ScriptCommand.class); + } + + @Test(dataProvider = "scriptCommands") + public void testScript(ScriptCommand script) throws IOException { + File scriptFile = null; + File outputFile = null; + try { + scriptFile = writeScript(script.content); + outputFile = BaseTest.createTempFile("temp", ""); + + ProcessSettings job = new ProcessSettings(new String[]{"sh", scriptFile.getAbsolutePath()}); + if (script.output != null) { + job.getStdoutSettings().setOutputFile(outputFile); + job.getStdoutSettings().setBufferSize(script.output.getBytes().length); + } + + ProcessOutput result = new ProcessController().exec(job); + int exitValue = result.getExitValue(); + + Assert.assertEquals(exitValue == 0, script.succeed, + String.format("Script returned %d: %s", exitValue, script)); + + if (script.output != null) { + + String fileString = FileUtils.readFileToString(outputFile); + Assert.assertEquals(fileString, script.output, + String.format("Output file didn't match (%d vs %d): %s", + fileString.length(), script.output.length(), script)); + + String bufferString = result.getStdout().getBufferString(); + Assert.assertEquals(bufferString, script.output, + String.format("Output content didn't match (%d vs %d): %s", + bufferString.length(), script.output.length(), script)); + + Assert.assertFalse(result.getStdout().isBufferTruncated(), + "Output content was truncated: " + script); + } + } finally { + FileUtils.deleteQuietly(scriptFile); + FileUtils.deleteQuietly(outputFile); + } + } + + private static String[] getLongCommand() { + // This command fails on some systems with a 4096 character limit when run via the old sh -c "echo ...", + // but works on the same systems when run via sh