From e2b41082af6828f64288f5c7571dea6499fde643 Mon Sep 17 00:00:00 2001 From: depristo Date: Wed, 2 Jun 2010 22:26:32 +0000 Subject: [PATCH] GATK now does automatic adaptor filtering in locus iterators (but not expt. downsampling iterator). General support for LocusIteratorFilters just like read filters but only applying at particular bases. Updated tools with new MD5 sums due to adaptor bases in their integrationtest data. Not that as a side effect here reads close to each other with odd orientations are also filtered out. Updated minor argument to VariantRecalibrator to change the qStep value on the command line git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3481 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/executive/WindowMaker.java | 2 +- ...agmentFilter.java => InAdaptorFilter.java} | 26 +++------- .../gatk/iterators/LocusIteratorByState.java | 44 +++++++++-------- .../gatk/iterators/LocusIteratorFilter.java | 42 ++++++++++++++++ .../sting/gatk/walkers/LocusWalker.java | 8 ++-- .../VariantGaussianMixtureModel.java | 3 +- .../VariantRecalibrator.java | 4 +- .../sting/utils/sam/ReadUtils.java | 48 ++++++++++++++----- .../org/broadinstitute/sting/WalkerTest.java | 4 +- .../providers/LocusViewTemplate.java | 24 +++++----- .../VariantAnnotatorIntegrationTest.java | 4 +- .../CallableLociWalkerIntegrationTest.java | 4 +- .../DepthOfCoverageIntegrationTest.java | 32 ++++++------- .../UnifiedGenotyperIntegrationTest.java | 10 ++-- .../RecalibrationWalkersIntegrationTest.java | 14 +++--- 15 files changed, 165 insertions(+), 104 deletions(-) rename java/src/org/broadinstitute/sting/gatk/filters/{TinyFragmentFilter.java => InAdaptorFilter.java} (75%) create mode 100755 java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorFilter.java diff --git a/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java b/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java index 4deff1f9a..9eff72e40 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java @@ -62,7 +62,7 @@ public class WindowMaker implements Iterable, I * @param iterator The data source for this window. * @param intervals The set of intervals over which to traverse. */ - public WindowMaker(StingSAMIterator iterator, List intervals, List filters, EnumSet discards ) { + public WindowMaker(StingSAMIterator iterator, List intervals, List filters, List discards ) { this.sourceInfo = iterator.getSourceInfo(); this.readIterator = iterator; diff --git a/java/src/org/broadinstitute/sting/gatk/filters/TinyFragmentFilter.java b/java/src/org/broadinstitute/sting/gatk/filters/InAdaptorFilter.java similarity index 75% rename from java/src/org/broadinstitute/sting/gatk/filters/TinyFragmentFilter.java rename to java/src/org/broadinstitute/sting/gatk/filters/InAdaptorFilter.java index f307ee696..faffd8b45 100755 --- a/java/src/org/broadinstitute/sting/gatk/filters/TinyFragmentFilter.java +++ b/java/src/org/broadinstitute/sting/gatk/filters/InAdaptorFilter.java @@ -26,6 +26,8 @@ package org.broadinstitute.sting.gatk.filters; import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; +import org.broadinstitute.sting.gatk.iterators.LocusIteratorFilter; /** * Filters out read pairs where the reads are so long relative to the over fragment size that they are @@ -61,24 +63,8 @@ import net.sf.samtools.SAMRecord; * @author depristo * @version 0.1 */ -public class TinyFragmentFilter implements SamRecordFilter { - public boolean filterOut(final SAMRecord rec) { - long isize = rec.getInferredInsertSize(); - if ( isize == 0 ) - return false; // unmapped pair -- cannot filter out - else { - long start = rec.getAlignmentStart(); - long end = rec.getAlignmentEnd(); - long mateStart = rec.getMateAlignmentStart(); - long mateEnd = rec.getAlignmentStart() + isize; - boolean bad = rec.getReadNegativeStrandFlag() ? start < mateStart : end > mateEnd; - System.out.printf("%s %d %d %d %d %d => %b%n", rec.getReadName(), start, end, mateStart, mateEnd, isize, bad); - if ( bad ) { - //System.out.printf("TinyFragment: " + rec.format()); - return true; - } else { - return false; - } - } +public class InAdaptorFilter implements LocusIteratorFilter { + public boolean filterOut(final SAMRecord rec, long basePos) { + return ReadUtils.readPairBaseOverlapType(rec, basePos) == ReadUtils.OverlapType.IN_ADAPTOR; } -} +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index bfb1adcb4..4492dabf2 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.*; +import net.sf.picard.filter.SamRecordFilter; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; @@ -41,12 +42,16 @@ import java.util.*; /** Iterator that traverses a SAM File, accumulating information on a per-locus basis */ public class LocusIteratorByState extends LocusIterator { - private static long discarded_adaptor_bases = 0L; - private static long discarded_overlapped_bases = 0L; + private static long discarded_bases = 0L; private static long observed_bases = 0L; - public enum Discard { ADAPTOR_BASES, SECOND_READ_OVERLAPPING_BASES } - public static final EnumSet NO_DISCARDS = EnumSet.noneOf(Discard.class); + // + // todo -- eric, add your UG filters here + // + //public enum Discard { ADAPTOR_BASES } + //public static final EnumSet NO_DISCARDS = EnumSet.noneOf(Discard.class); + + public static final List NO_FILTERS = Arrays.asList(); /** * the overflow tracker, which makes sure we get a limited number of warnings for locus pile-ups that @@ -259,7 +264,7 @@ public class LocusIteratorByState extends LocusIterator { //final boolean DEBUG2 = false && DEBUG; private Reads readInfo; private AlignmentContext nextAlignmentContext; - private EnumSet discards; + private List filters = new ArrayList(); // ----------------------------------------------------------------------------------------------------------------- // @@ -267,13 +272,13 @@ public class LocusIteratorByState extends LocusIterator { // // ----------------------------------------------------------------------------------------------------------------- public LocusIteratorByState(final Iterator samIterator, Reads readInformation ) { - this(samIterator, readInformation, NO_DISCARDS); + this(samIterator, readInformation, NO_FILTERS); } - public LocusIteratorByState(final Iterator samIterator, Reads readInformation, EnumSet discards ) { + public LocusIteratorByState(final Iterator samIterator, Reads readInformation, List filters ) { this.it = new PushbackIterator(samIterator); this.readInfo = readInformation; - this.discards = discards; + this.filters = filters; overflowTracker = new LocusOverflowTracker(readInformation.getMaxReadsAtLocus()); } @@ -401,19 +406,10 @@ public class LocusIteratorByState extends LocusIterator { // todo -- performance problem -- should be lazy, really for ( SAMRecordState state : readStates ) { if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) { - ReadUtils.OverlapType overlapType = ReadUtils.readPairBaseOverlapType(state.getRead(), getLocation().getStart()); - if (discards.contains(Discard.ADAPTOR_BASES) && - overlapType == ReadUtils.OverlapType.IN_ADAPTOR ) { - discarded_adaptor_bases++; + if ( filterRead(state.getRead(), getLocation().getStart(), filters ) ) { + discarded_bases++; //printStatus("Adaptor bases", discarded_adaptor_bases); continue; - } else if ( discards.contains(Discard.SECOND_READ_OVERLAPPING_BASES) && - overlapType == ReadUtils.OverlapType.OVERLAPPING && - state.getRead().getSecondOfPairFlag() ) { - // only discard second bases in the base pair - discarded_overlapped_bases++; - //printStatus("Overlapping bases", discarded_overlapped_bases); - continue; } else { observed_bases++; pile.add(new PileupElement(state.getRead(), state.getReadOffset())); @@ -439,6 +435,16 @@ public class LocusIteratorByState extends LocusIterator { } } + private static boolean filterRead(SAMRecord rec, long pos, List filters) { + for ( LocusIteratorFilter filter : filters ) { + if ( filter.filterOut(rec, pos) ) { + return true; + } + } + + return false; + } + private void printStatus(final String title, long n) { if ( n % 10000 == 0 ) System.out.printf("%s %d / %d = %.2f%n", title, n, observed_bases, 100.0 * n / (observed_bases + 1)); diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorFilter.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorFilter.java new file mode 100755 index 000000000..103b63fba --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorFilter.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.iterators; + +import net.sf.samtools.SAMRecord; + +/** + * API for filtering SAMRecords at a particular base + */ +public interface LocusIteratorFilter { + + /** + * Determines whether a SAMRecord matches this filter + * + * @param record the SAMRecord to evaluate + * @param basePos the chromosomal position of the current locus + * @return true if the SAMRecord matches the filter (and should be removed), otherwise false + */ + public boolean filterOut(SAMRecord record, long basePos); +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java index 1d4dd149d..5d9eb5258 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java @@ -7,7 +7,9 @@ import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter; import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter; +import org.broadinstitute.sting.gatk.filters.InAdaptorFilter; import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; +import org.broadinstitute.sting.gatk.iterators.LocusIteratorFilter; import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMRecord; @@ -60,8 +62,8 @@ public abstract class LocusWalker extends Walker getDiscards() { - return LocusIteratorByState.NO_DISCARDS; - //return EnumSet.of(LocusIteratorByState.Discard.ADAPTOR_BASES); + public List getDiscards() { + LocusIteratorFilter filter = new InAdaptorFilter(); + return Arrays.asList(filter); } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModel.java index 573bbc094..30c522522 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModel.java @@ -368,13 +368,12 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel } public final void outputOptimizationCurve( final VariantDatum[] data, final String outputPrefix, - final int desiredNumVariants, final Double[] FDRtranches ) { + final int desiredNumVariants, final Double[] FDRtranches, double QUAL_STEP ) { final int numVariants = data.length; final boolean[] markedVariant = new boolean[numVariants]; final double MAX_QUAL = 100.0; - final double QUAL_STEP = 0.1; final int NUM_BINS = (int) ((MAX_QUAL / QUAL_STEP) + 1); final int numKnownAtCut[] = new int[NUM_BINS]; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 998e25968..59cbe1349 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -89,6 +89,8 @@ public class VariantRecalibrator extends RodWalker [record in hand] + * s2 + * <-----------------------| + * + * s1, e1, and s2 are all in the record. From isize we can can compute e2 as s1 + isize + 1 + * + * s2 + * |-----------------------> + * s1 e1 + * <-----------------------| [record in hand] + * + * Here we cannot calculate e2 since the record carries s2 and e1 + isize is s2 now! + * + * This makes the following code a little nasty, since we can only detect if a base is in the adaptor, but not + * if it overlaps the read. + * + * @param rec + * @param basePos + * @param adaptorLength + * @return + */ public static OverlapType readPairBaseOverlapType(final SAMRecord rec, long basePos, final int adaptorLength) { OverlapType state = OverlapType.NOT_OVERLAPPING; + long isize = rec.getInferredInsertSize(); - if ( isize > 0 ) { // we're not an unmapped pair -- cannot filter out + if ( isize != 0 ) { // we're not an unmapped pair -- cannot filter out long adaptorStart, adaptorEnd; long mateStart = rec.getMateAlignmentStart(); - long mateEnd = rec.getAlignmentStart() + isize; + long mateEnd = -1; if ( rec.getReadNegativeStrandFlag() ) { // we are on the negative strand, so our mate is on the positive strand @@ -123,20 +149,18 @@ public class ReadUtils { adaptorEnd = mateStart - 1; } else { // we are on the positive strand, so our mate is on the negative strand + mateEnd = rec.getAlignmentStart() + isize - 1; adaptorStart = mateEnd + 1; adaptorEnd = mateEnd + adaptorLength; } - boolean inMate = basePos >= mateStart && basePos <= mateEnd; - boolean inAdapator = basePos >= adaptorStart && basePos < adaptorEnd; + boolean inAdapator = basePos >= adaptorStart && basePos <= adaptorEnd; - - if ( inAdapator ) state = OverlapType.IN_ADAPTOR; - else if ( inMate ) state = OverlapType.OVERLAPPING; - -// if ( inMate || inAdapator ) -// System.out.printf("baseOverlapState: %s start=%d base=%d mateStart=%d mateStop=%d adaptorStart=%d adaptorEnd=%d => %s%n", -// rec.getReadName(), rec.getAlignmentStart(), basePos, mateStart, mateEnd, adaptorStart, adaptorEnd, state); + if ( inAdapator ) { + state = OverlapType.IN_ADAPTOR; +// System.out.printf("baseOverlapState: %50s negStrand=%b base=%d start=%d stop=%d, mateStart=%d mateStop=%d adaptorStart=%d adaptorEnd=%d isize=%d => %s%n", +// rec.getReadName(), rec.getReadNegativeStrandFlag(), basePos, rec.getAlignmentStart(), rec.getAlignmentEnd(), mateStart, mateEnd, adaptorStart, adaptorEnd, isize, state); + } } return state; diff --git a/java/test/org/broadinstitute/sting/WalkerTest.java b/java/test/org/broadinstitute/sting/WalkerTest.java index 8d3bc4b38..aa1391447 100755 --- a/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/java/test/org/broadinstitute/sting/WalkerTest.java @@ -120,8 +120,8 @@ public class WalkerTest extends BaseTest { System.out.printf("##### Test %s is going fail #####%n", name); String pathToExpectedMD5File = getMD5Path(expectedMD5, "[No DB file found]"); String pathToFileMD5File = getMD5Path(filemd5sum, "[No DB file found]"); - System.out.printf("##### Path to expected file (MD5=%s): %s%n", expectedMD5, pathToExpectedMD5File); - System.out.printf("##### Path to expected file (MD5=%s): %s%n", filemd5sum, pathToFileMD5File); + System.out.printf("##### Path to expected file (MD5=%s): %s%n", expectedMD5, pathToExpectedMD5File); + System.out.printf("##### Path to calculated file (MD5=%s): %s%n", filemd5sum, pathToFileMD5File); System.out.printf("##### Diff command: diff %s %s%n", pathToExpectedMD5File, pathToFileMD5File); // todo -- add support for simple inline display of the first N differences for text file diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java index cb9a42892..e6abcdbbb 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java @@ -49,7 +49,7 @@ public abstract class LocusViewTemplate extends BaseTest { GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5); Shard shard = new LocusShard(Collections.singletonList(shardBounds)); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_DISCARDS); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, window.getLocus(), window, null, null); @@ -65,7 +65,7 @@ public abstract class LocusViewTemplate extends BaseTest { GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5); Shard shard = new LocusShard(Collections.singletonList(shardBounds)); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_DISCARDS); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); @@ -80,7 +80,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_DISCARDS); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -94,7 +94,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_DISCARDS); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -108,7 +108,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_DISCARDS); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -122,7 +122,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_DISCARDS); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -136,7 +136,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_DISCARDS); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -151,7 +151,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read1, read2); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_DISCARDS); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -170,7 +170,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_DISCARDS); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -189,7 +189,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_DISCARDS); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -210,7 +210,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4, read5, read6); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_DISCARDS); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -238,7 +238,7 @@ public abstract class LocusViewTemplate extends BaseTest { read07, read08, read09, read10, read11, read12); Shard shard = new LocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15))); - WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_DISCARDS); + WindowMaker windowMaker = new WindowMaker(iterator,shard.getGenomeLocs(),new ArrayList(), LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); LocusView view = createView(dataProvider); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index a0608ed38..ac4294efe 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("08fa964f33cdf934af8cb46a9cc03ff2")); + Arrays.asList("a3d781ed67bd80b02eeeba4407af93d1")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("fc240f58f073071a6edf4d11b74fb493")); + Arrays.asList("bfbdd00135812778138443ff1c45ac1f")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java index b0daa3be2..fee000ebe 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java @@ -36,7 +36,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest { public void testCallableLociWalker1() { String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -summary %s"; WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2, - Arrays.asList("b3a273984da6744d6de4ca0ab3eb759b", "33b8b285a738d8d5daf6e98af698a4eb")); + Arrays.asList("884c9c2d96419d990a708d2bd98fcefa", "ed4c255bb78313b8e7982127caf3d6c4")); executeTest("formatBed", spec); } @@ -52,7 +52,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest { public void testCallableLociWalker3() { String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -minDepth 10 -maxDepth 100 --minBaseQuality 10 --minMappingQuality 20 -summary %s"; WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2, - Arrays.asList("3fee7d7d0e305f439db29b4e641d1c20", "99c54acdad7e81ccf219b8a70cd26917")); + Arrays.asList("86bd1a5f79356b3656412c4b1c60709a", "6fefb144a60b89c27293ce5ca6e10e6a")); executeTest("formatBed lots of arguments", spec); } } \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java index ce26569af..2bc90ed85 100644 --- a/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java @@ -57,25 +57,25 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec(cmd,0, new ArrayList()); // now add the expected files that get generated - spec.addAuxFile("fc742e346be2344557cf8c039f467508", baseOutputFile); - spec.addAuxFile("e58b701b01ec0dbe75c146295434ba3b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts")); + spec.addAuxFile("494c174ea0773bda98297a0cbdc188eb", baseOutputFile); + spec.addAuxFile("9df5e7e07efeb34926c94a724714c219", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts")); spec.addAuxFile("b9a7748e5aec4dc06daed893c901c00d", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions")); - spec.addAuxFile("848e556ec7e03e9b0398d189d7cbb4ad", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_statistics")); - spec.addAuxFile("acd3dfb97ef64ea6547c001640acd194", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary")); - spec.addAuxFile("cac8e7a688d9bbe781232c61091d3237", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics")); - spec.addAuxFile("73c31412b75dc7014549096de7c0c609", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary")); - spec.addAuxFile("38fb89e1bb52d0342f97f72e86956b79", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_counts")); - spec.addAuxFile("f9f2941ee39577ac2f80668e7f6b3d4b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions")); + spec.addAuxFile("9cd395f47b329b9dd00ad024fcac9929", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_statistics")); + spec.addAuxFile("ca95d2508366d32bf91bf0b0009a023a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary")); + spec.addAuxFile("f6dbd74d32a48abe71ce08d300bce983", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics")); + spec.addAuxFile("7962a7c09c43ff8b339fa52bce51bfca", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary")); + spec.addAuxFile("b82846df660f0aac8429aec57c2a62d6", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_counts")); + spec.addAuxFile("d32a8c425fadcc4c048bd8b48d0f61e5", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions")); spec.addAuxFile("7b9d0e93bf5b5313995be7010ef1f528", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_statistics")); - spec.addAuxFile("8c5266ef3c0031d3b0311839b7f59245", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary")); - spec.addAuxFile("cc7ee5075a932dba486e78824ca34202", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics")); - spec.addAuxFile("39afa53bae210f4684951b908aa36c7d", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary")); - spec.addAuxFile("529353375d23c529228b38119c51e269", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts")); + spec.addAuxFile("755463d88222c81b84f99615e7b4cfd6", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary")); + spec.addAuxFile("e70952f241eebb9b5448f2e7cb288131", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics")); + spec.addAuxFile("51e4c04dfcb4a20c552ca6f013977fa8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary")); + spec.addAuxFile("d53431022f7387fe9ac47814ab1fcd88", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts")); spec.addAuxFile("650ee3714da7fbad7832c9d4ad49eb51", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions")); - spec.addAuxFile("925cc5b49286e0222bce6251d1baafc7", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics")); - spec.addAuxFile("7a2fc8a2fba91adfbdc90dcc0e1ef79b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary")); - spec.addAuxFile("f3315551081331bc322c53b11412d707", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics")); - spec.addAuxFile("455f9f0c4461bcf0b231fef704266881", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary")); + spec.addAuxFile("df0ba76e0e6082c0d29fcfd68efc6b77", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics")); + spec.addAuxFile("a52395c883ce8f1a62444d214fe37e88", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary")); + spec.addAuxFile("a50011571334f17e950ad3ed1149e350", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics")); + spec.addAuxFile("f12a5f97b69718333c4987e3beb98f06", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary")); execute("testBaseOutputNoFiltering",spec); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index bfeca54c1..8fd67f142 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -35,7 +35,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("d9d99c7ce4ea63a907183893de1dd905")); + Arrays.asList("4e403194fd00552804a0907bf905cffb")); executeTest("testMultiSamplePilot1 - Joint Estimate", spec); } @@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000", 1, - Arrays.asList("23dfd7747ec6149e59abd753d2a8b00c")); + Arrays.asList("987aaf0268a364d35e4375bab2d69c9e")); executeTest("testMultiSamplePilot2 - Joint Estimate", spec); } @@ -85,8 +85,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testParameter() { HashMap e = new HashMap(); - e.put( "-genotype", "0ac7ab893a3f550cb1b8c34f28baedf6" ); - e.put( "-all_bases", "40520c3020f6abcb60e8b632d9614554" ); + e.put( "-genotype", "96106a735876dc4ca9aa839610bee56f" ); + e.put( "-all_bases", "85b7bd300cc6852e25be430dd02e789f" ); e.put( "--min_base_quality_score 26", "ecc1b0dd618eae9b9f62db2742ac3306" ); e.put( "--min_mapping_quality_score 26", "75bd53d5070f1146350d633321a165e3" ); e.put( "--max_mismatches_in_40bp_window 5", "8e1236b7f0f6c19a0be808b2d44e3255" ); @@ -127,7 +127,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -beagle %s" + " -L 1:10,023,400-10,024,000", 1, - Arrays.asList("5077d80806e24865b5c12553843a5f85")); + Arrays.asList("0a6f06f1900c2b965a1d0ac413df8151")); executeTest(String.format("testOtherOutput"), spec); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index 06af44530..e19d2fdb5 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -17,10 +17,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariates1() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "e5b2d5a2f4283718dae678cbc84be847" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "7a23c3ffc3917f95117971d642bbeb56" ); e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "89084b43b824f9e3c5e2afdfe0930542"); e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7d6428a76e07ed4b99351aa4df89634d" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "6ede6fc840c4e5070a58a919b48e7504" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "a582a86adffee2c9ee79a00b424a6cd9" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -50,10 +50,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibrator1() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "6c59d291c37d053e0f188b762f3060a5" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "fd5eca3a40a971d5eabf9ab792bd0295" ); e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "e5d9fc628dcf4f0ae115a6e6cc5423fe"); e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7ebdce416b72679e1cf88cc9886a5edc" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "4cd060de1e4e6d21fcd1c7a4ce7824f5" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "b00679024ce8dcaf611907109a7e9a27" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -81,7 +81,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesUseOriginalQuals() { HashMap e = new HashMap(); - e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "26ae1bede4f337901b6194753f6cf914"); + e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "784488e7024f3e5398ca462d6b8f97c4"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -303,7 +303,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesNoIndex() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "850f2a2d5bc94cc22b3b038b424252c6" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "481de3cbecad59c00cc148bbcd279e60" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -329,7 +329,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorNoIndex() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "aa38b04c6b58badabb6b09d590284a2a" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "9733d0c5954dcdf5b9bb0ad0b6eb8232" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey();