bug fixes to LIBS and LIBH following ultra-aggressive regression testing across 454, solid, and solexa
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1558 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0721c450c2
commit
d9588e6083
|
|
@ -323,8 +323,8 @@
|
|||
<target name="clean" description="clean up">
|
||||
<delete dir="out"/>
|
||||
<delete dir="build"/>
|
||||
<delete dir="${dist}"/>
|
||||
<delete dir="lib"/>
|
||||
<delete dir="staging"/>
|
||||
<delete dir="${dist}"/>
|
||||
</target>
|
||||
</project>
|
||||
|
|
|
|||
|
|
@ -358,7 +358,8 @@ public class GenomeAnalysisEngine {
|
|||
argCollection.downsampleCoverage,
|
||||
!argCollection.unsafe,
|
||||
filters,
|
||||
argCollection.readMaxPileup);
|
||||
argCollection.readMaxPileup,
|
||||
walker.includeReadsWithDeletionAtLoci() );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -31,6 +31,17 @@ public class Reads {
|
|||
private Boolean beSafe = null;
|
||||
private List<SamRecordFilter> supplementalFilters = null;
|
||||
private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT
|
||||
private boolean includeReadsWithDeletionAtLoci = false;
|
||||
|
||||
|
||||
/**
|
||||
* Return true if the walker wants to see reads that contain deletions when looking at locus pileups
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public boolean includeReadsWithDeletionAtLoci() {
|
||||
return includeReadsWithDeletionAtLoci;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a list of the files acting as sources of reads.
|
||||
|
|
@ -110,7 +121,8 @@ public class Reads {
|
|||
Integer downsampleCoverage,
|
||||
Boolean beSafe,
|
||||
List<SamRecordFilter> supplementalFilters,
|
||||
int maximumReadsAtLocus) {
|
||||
int maximumReadsAtLocus,
|
||||
boolean includeReadsWithDeletionAtLoci) {
|
||||
this.readsFiles = samFiles;
|
||||
this.validationStringency = strictness;
|
||||
this.downsamplingFraction = downsampleFraction;
|
||||
|
|
@ -118,5 +130,6 @@ public class Reads {
|
|||
this.beSafe = beSafe;
|
||||
this.supplementalFilters = supplementalFilters;
|
||||
this.maximumReadsAtLocus = maximumReadsAtLocus;
|
||||
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -142,4 +142,39 @@ public class AlignmentContext {
|
|||
reads = downsampledReads;
|
||||
offsets = downsampledOffsets;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns only the reads in ac that do not contain spanning deletions of this locus
|
||||
*
|
||||
* @param ac
|
||||
* @return
|
||||
*/
|
||||
public static AlignmentContext withoutSpanningDeletions( AlignmentContext ac ) {
|
||||
return subsetDeletions( ac, true );
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns only the reads in ac that do contain spanning deletions of this locus
|
||||
*
|
||||
* @param ac
|
||||
* @return
|
||||
*/
|
||||
public static AlignmentContext withSpanningDeletions( AlignmentContext ac ) {
|
||||
return subsetDeletions( ac, false );
|
||||
}
|
||||
|
||||
private static AlignmentContext subsetDeletions( AlignmentContext ac, boolean readsWithoutDeletions ) {
|
||||
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>(ac.getReads().size());
|
||||
ArrayList<Integer> offsets = new ArrayList<Integer>(ac.getReads().size());
|
||||
for ( int i = 0; i < ac.getReads().size(); i++ ) {
|
||||
SAMRecord read = ac.getReads().get(i);
|
||||
int offset = ac.getOffsets().get(i);
|
||||
if ( (offset == -1 && ! readsWithoutDeletions) || (offset != -1 && readsWithoutDeletions) ) {
|
||||
reads.add(read);
|
||||
offsets.add(offset);
|
||||
}
|
||||
}
|
||||
|
||||
return new AlignmentContext(ac.getLocation(), reads, offsets);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -218,13 +218,15 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
collectPendingReads(readInfo.getMaxReadsAtLocus());
|
||||
|
||||
// todo -- performance problem -- should be lazy, really
|
||||
LinkedList<SAMRecord> reads = new LinkedList<SAMRecord>();
|
||||
LinkedList<Integer> offsets = new LinkedList<Integer>();
|
||||
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>(readStates.size());
|
||||
ArrayList<Integer> offsets = new ArrayList<Integer>(readStates.size());
|
||||
for ( SAMRecordState state : readStates ) {
|
||||
// todo -- enable D operation in alignment contexts
|
||||
if ( state.getCurrentCigarOperator() != CigarOperator.D ) {
|
||||
reads.add(state.getRead());
|
||||
offsets.add(state.getReadOffset());
|
||||
} else if ( readInfo.includeReadsWithDeletionAtLoci() ) {
|
||||
reads.add(state.getRead());
|
||||
offsets.add(-1);
|
||||
}
|
||||
}
|
||||
GenomeLoc loc = getLocation();
|
||||
|
|
@ -236,7 +238,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
// printState();
|
||||
//}
|
||||
|
||||
return new AlignmentContext(loc, reads, offsets);
|
||||
return reads.size() == 0 ? next() : new AlignmentContext(loc, reads, offsets);
|
||||
}
|
||||
|
||||
private void collectPendingReads(final int maximumPileupSize) {
|
||||
|
|
|
|||
|
|
@ -29,18 +29,64 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Display the depth of coverage at a given locus.
|
||||
*/
|
||||
public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>> {
|
||||
@Argument(fullName="suppressLocusPrinting",doc="Suppress printing",required=false)
|
||||
public boolean suppressPrinting = false;
|
||||
enum printType {
|
||||
NONE,
|
||||
COMPACT,
|
||||
DETAILED
|
||||
}
|
||||
|
||||
@Argument(fullName="printStyle", shortName = "s", doc="Printing style: NONE, COMPACT, or DETAILED", required=false)
|
||||
public printType printStyle = printType.COMPACT;
|
||||
|
||||
@Argument(fullName="excludeDeletions", shortName = "ed", doc="If true, we will exclude reads with deletions at a locus in coverage",required=false)
|
||||
public boolean excludeDeletionsInCoverage = false;
|
||||
|
||||
@Argument(fullName="minMAPQ", shortName = "minMAPQ", doc="If provided, we will exclude reads with MAPQ < this value at a locus in coverage",required=false)
|
||||
public int excludeMAPQBelowThis = -1;
|
||||
|
||||
public boolean includeReadsWithDeletionAtLoci() { return ! excludeDeletionsInCoverage; }
|
||||
|
||||
public void initialize() {
|
||||
switch ( printStyle ) {
|
||||
case COMPACT:
|
||||
out.printf("locus depth%n");
|
||||
break;
|
||||
case DETAILED:
|
||||
out.printf("locus nCleanReads nDeletionReads nLowMAPQReads%n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( !suppressPrinting )
|
||||
out.printf("%s: %d%n", context.getLocation(), context.getReads().size() );
|
||||
return context.getReads().size();
|
||||
int nCleanReads = 0, nBadMAPQReads = 0, nDeletionReads = 0;
|
||||
|
||||
for ( int i = 0; i < context.getReads().size(); i++ ) {
|
||||
SAMRecord read = context.getReads().get(i);
|
||||
int offset = context.getOffsets().get(i);
|
||||
|
||||
if ( read.getMappingQuality() < excludeMAPQBelowThis ) nBadMAPQReads++;
|
||||
else if ( offset == -1 ) nDeletionReads++;
|
||||
else nCleanReads++;
|
||||
}
|
||||
|
||||
int nTotalReads = nCleanReads + (excludeDeletionsInCoverage ? 0 : nDeletionReads);
|
||||
|
||||
switch ( printStyle ) {
|
||||
case COMPACT:
|
||||
out.printf("%s %8d%n", context.getLocation(), nTotalReads);
|
||||
break;
|
||||
case DETAILED:
|
||||
out.printf("%s %8d %8d %8d %8d%n", context.getLocation(), nTotalReads, nCleanReads, nDeletionReads, nBadMAPQReads);
|
||||
break;
|
||||
}
|
||||
|
||||
return nTotalReads;
|
||||
}
|
||||
|
||||
public boolean isReduceByInterval() {
|
||||
|
|
|
|||
|
|
@ -1,38 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
||||
// Null traversal. For ATK performance measuring.
|
||||
// j.maguire 3-7-2009
|
||||
|
||||
public class NullWalker extends LocusWalker<Integer, Integer> {
|
||||
public void initialize() {
|
||||
}
|
||||
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return true; // We are keeping all the reads
|
||||
}
|
||||
|
||||
// Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Given result of map function
|
||||
public Integer reduceInit()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
public Integer reduce(Integer value, Integer sum)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
public void onTraversalDone() {
|
||||
}
|
||||
}
|
||||
|
|
@ -43,6 +43,24 @@ public abstract class Walker<MapType, ReduceType> {
|
|||
return GenomeAnalysisEngine.instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* (conceptual static) method that states whether you want to see reads piling up at a locus
|
||||
* that contain a deletion at the locus.
|
||||
*
|
||||
* ref: ATCTGA
|
||||
* read1: ATCTGA
|
||||
* read2: AT--GA
|
||||
*
|
||||
* Normally, the locus iterator only returns a list of read1 at this locus at position 3, but
|
||||
* if this function returns true, then the system will return (read1, read2) with offsets
|
||||
* of (3, -1). The -1 offset indicates a deletion in the read.
|
||||
*
|
||||
* @return false if you don't want to see deletions, or true if you do
|
||||
*/
|
||||
public boolean includeReadsWithDeletionAtLoci() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public void initialize() { }
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -461,6 +461,15 @@ public class Utils {
|
|||
return count;
|
||||
}
|
||||
|
||||
public static <T> int countOccurrences(T x, List<T> l) {
|
||||
int count = 0;
|
||||
for ( T y : l ) {
|
||||
if ( x.equals(y) ) count++;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
public static byte listMaxByte(List<Byte> quals) {
|
||||
if ( quals.size() == 0 ) return 0;
|
||||
byte m = quals.get(0);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,49 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.io.File;
|
||||
|
||||
public class DepthOfCoverageIntegrationTest extends WalkerTest {
|
||||
private static String root = "-L 1:10,164,500-10,164,520 -R /broad/1KG/reference/human_b36_both.fasta -T DepthOfCoverage -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam";
|
||||
static HashMap<String, String> expectations = new HashMap<String, String>();
|
||||
static {
|
||||
expectations.put("-s COMPACT -minMAPQ 1", "a6b2005e37f48545e9604e0f809cf618");
|
||||
expectations.put("-s COMPACT", "c3cd44717487ad3c7d6d969583d9bb8c");
|
||||
expectations.put("-s COMPACT -ed", "73aacc9febea393a4c3760bd6c70a557");
|
||||
expectations.put("-s NONE", "628201adcef01e8bc3e9cdf96b86003b");
|
||||
expectations.put("-s NONE -minMAPQ 1", "2425ccbcfdfb9e9a98fcf30c6f2bfcdd");
|
||||
expectations.put("-s NONE -minMAPQ 100", "09eaf5a4c811e4ac74de5d179a9ffbe7");
|
||||
expectations.put("-s DETAILED -minMAPQ 1", "9059436a57f8c28ef16ee9b2c7cd3ebb");
|
||||
expectations.put("-s DETAILED", "fac3bb643f34eb9d90dac0e0358b55ab");
|
||||
expectations.put("-s DETAILED -ed", "4d991221aa5e206de11878e88c380de1");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDepthOfCoverage1() {
|
||||
|
||||
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
||||
String extraArgs = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s",
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
executeTest("testDepthOfCoverage1", spec);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDepthOfCoverage454() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T DepthOfCoverage -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam -L 1:10,001,890-10,001,895 -s DETAILED -o %s",
|
||||
1, // just one output file
|
||||
Arrays.asList("5c94916ec193ca825c681dd0175c6164"));
|
||||
executeTest("testDepthOfCoverage454", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -49,18 +49,19 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
String md5 = entry.getValue();
|
||||
String paramsFile = paramsFiles.get(bam);
|
||||
System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile);
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-R /broad/1KG/reference/human_b36_both.fasta" +
|
||||
" --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
|
||||
" -T TableRecalibration" +
|
||||
" -I " + bam +
|
||||
" -L 1:10,000,000-20,000,000" +
|
||||
" --outputBam %s" +
|
||||
" --params " + paramsFile,
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
executeTest("testTableRecalibrator1", spec);
|
||||
if ( paramsFile != null ) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-R /broad/1KG/reference/human_b36_both.fasta" +
|
||||
" --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
|
||||
" -T TableRecalibration" +
|
||||
" -I " + bam +
|
||||
" -L 1:10,000,000-20,000,000" +
|
||||
" --outputBam %s" +
|
||||
" --params " + paramsFile,
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
executeTest("testTableRecalibrator1", spec);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue