bug fixes to LIBS and LIBH following ultra-aggressive regression testing across 454, solid, and solexa

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1558 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-09-09 15:36:12 +00:00
parent 0721c450c2
commit d9588e6083
11 changed files with 198 additions and 62 deletions

View File

@ -323,8 +323,8 @@
<target name="clean" description="clean up">
<delete dir="out"/>
<delete dir="build"/>
<delete dir="${dist}"/>
<delete dir="lib"/>
<delete dir="staging"/>
<delete dir="${dist}"/>
</target>
</project>

View File

@ -358,7 +358,8 @@ public class GenomeAnalysisEngine {
argCollection.downsampleCoverage,
!argCollection.unsafe,
filters,
argCollection.readMaxPileup);
argCollection.readMaxPileup,
walker.includeReadsWithDeletionAtLoci() );
}
/**

View File

@ -31,6 +31,17 @@ public class Reads {
private Boolean beSafe = null;
private List<SamRecordFilter> supplementalFilters = null;
private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT
private boolean includeReadsWithDeletionAtLoci = false;
/**
* Return true if the walker wants to see reads that contain deletions when looking at locus pileups
*
* @return
*/
public boolean includeReadsWithDeletionAtLoci() {
return includeReadsWithDeletionAtLoci;
}
/**
* Gets a list of the files acting as sources of reads.
@ -110,7 +121,8 @@ public class Reads {
Integer downsampleCoverage,
Boolean beSafe,
List<SamRecordFilter> supplementalFilters,
int maximumReadsAtLocus) {
int maximumReadsAtLocus,
boolean includeReadsWithDeletionAtLoci) {
this.readsFiles = samFiles;
this.validationStringency = strictness;
this.downsamplingFraction = downsampleFraction;
@ -118,5 +130,6 @@ public class Reads {
this.beSafe = beSafe;
this.supplementalFilters = supplementalFilters;
this.maximumReadsAtLocus = maximumReadsAtLocus;
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
}
}

View File

@ -142,4 +142,39 @@ public class AlignmentContext {
reads = downsampledReads;
offsets = downsampledOffsets;
}
/**
* Returns only the reads in ac that do not contain spanning deletions of this locus
*
* @param ac
* @return
*/
public static AlignmentContext withoutSpanningDeletions( AlignmentContext ac ) {
return subsetDeletions( ac, true );
}
/**
* Returns only the reads in ac that do contain spanning deletions of this locus
*
* @param ac
* @return
*/
public static AlignmentContext withSpanningDeletions( AlignmentContext ac ) {
return subsetDeletions( ac, false );
}
private static AlignmentContext subsetDeletions( AlignmentContext ac, boolean readsWithoutDeletions ) {
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>(ac.getReads().size());
ArrayList<Integer> offsets = new ArrayList<Integer>(ac.getReads().size());
for ( int i = 0; i < ac.getReads().size(); i++ ) {
SAMRecord read = ac.getReads().get(i);
int offset = ac.getOffsets().get(i);
if ( (offset == -1 && ! readsWithoutDeletions) || (offset != -1 && readsWithoutDeletions) ) {
reads.add(read);
offsets.add(offset);
}
}
return new AlignmentContext(ac.getLocation(), reads, offsets);
}
}

View File

@ -218,13 +218,15 @@ public class LocusIteratorByState extends LocusIterator {
collectPendingReads(readInfo.getMaxReadsAtLocus());
// todo -- performance problem -- should be lazy, really
LinkedList<SAMRecord> reads = new LinkedList<SAMRecord>();
LinkedList<Integer> offsets = new LinkedList<Integer>();
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>(readStates.size());
ArrayList<Integer> offsets = new ArrayList<Integer>(readStates.size());
for ( SAMRecordState state : readStates ) {
// todo -- enable D operation in alignment contexts
if ( state.getCurrentCigarOperator() != CigarOperator.D ) {
reads.add(state.getRead());
offsets.add(state.getReadOffset());
} else if ( readInfo.includeReadsWithDeletionAtLoci() ) {
reads.add(state.getRead());
offsets.add(-1);
}
}
GenomeLoc loc = getLocation();
@ -236,7 +238,7 @@ public class LocusIteratorByState extends LocusIterator {
// printState();
//}
return new AlignmentContext(loc, reads, offsets);
return reads.size() == 0 ? next() : new AlignmentContext(loc, reads, offsets);
}
private void collectPendingReads(final int maximumPileupSize) {

View File

@ -29,18 +29,64 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.Pair;
import net.sf.samtools.SAMRecord;
/**
* Display the depth of coverage at a given locus.
*/
public class DepthOfCoverageWalker extends LocusWalker<Integer, Pair<Long, Long>> {
@Argument(fullName="suppressLocusPrinting",doc="Suppress printing",required=false)
public boolean suppressPrinting = false;
enum printType {
NONE,
COMPACT,
DETAILED
}
@Argument(fullName="printStyle", shortName = "s", doc="Printing style: NONE, COMPACT, or DETAILED", required=false)
public printType printStyle = printType.COMPACT;
@Argument(fullName="excludeDeletions", shortName = "ed", doc="If true, we will exclude reads with deletions at a locus in coverage",required=false)
public boolean excludeDeletionsInCoverage = false;
@Argument(fullName="minMAPQ", shortName = "minMAPQ", doc="If provided, we will exclude reads with MAPQ < this value at a locus in coverage",required=false)
public int excludeMAPQBelowThis = -1;
public boolean includeReadsWithDeletionAtLoci() { return ! excludeDeletionsInCoverage; }
public void initialize() {
switch ( printStyle ) {
case COMPACT:
out.printf("locus depth%n");
break;
case DETAILED:
out.printf("locus nCleanReads nDeletionReads nLowMAPQReads%n");
break;
}
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( !suppressPrinting )
out.printf("%s: %d%n", context.getLocation(), context.getReads().size() );
return context.getReads().size();
int nCleanReads = 0, nBadMAPQReads = 0, nDeletionReads = 0;
for ( int i = 0; i < context.getReads().size(); i++ ) {
SAMRecord read = context.getReads().get(i);
int offset = context.getOffsets().get(i);
if ( read.getMappingQuality() < excludeMAPQBelowThis ) nBadMAPQReads++;
else if ( offset == -1 ) nDeletionReads++;
else nCleanReads++;
}
int nTotalReads = nCleanReads + (excludeDeletionsInCoverage ? 0 : nDeletionReads);
switch ( printStyle ) {
case COMPACT:
out.printf("%s %8d%n", context.getLocation(), nTotalReads);
break;
case DETAILED:
out.printf("%s %8d %8d %8d %8d%n", context.getLocation(), nTotalReads, nCleanReads, nDeletionReads, nBadMAPQReads);
break;
}
return nTotalReads;
}
public boolean isReduceByInterval() {

View File

@ -1,38 +0,0 @@
package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
// Null traversal. For ATK performance measuring.
// j.maguire 3-7-2009
public class NullWalker extends LocusWalker<Integer, Integer> {
public void initialize() {
}
// Do we actually want to operate on the context?
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
return true; // We are keeping all the reads
}
// Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context)
{
return 1;
}
// Given result of map function
public Integer reduceInit()
{
return 0;
}
public Integer reduce(Integer value, Integer sum)
{
return 0;
}
public void onTraversalDone() {
}
}

View File

@ -43,6 +43,24 @@ public abstract class Walker<MapType, ReduceType> {
return GenomeAnalysisEngine.instance;
}
/**
* (conceptual static) method that states whether you want to see reads piling up at a locus
* that contain a deletion at the locus.
*
* ref: ATCTGA
* read1: ATCTGA
* read2: AT--GA
*
* Normally, the locus iterator only returns a list of read1 at this locus at position 3, but
* if this function returns true, then the system will return (read1, read2) with offsets
* of (3, -1). The -1 offset indicates a deletion in the read.
*
* @return false if you don't want to see deletions, or true if you do
*/
public boolean includeReadsWithDeletionAtLoci() {
return false;
}
public void initialize() { }
/**

View File

@ -461,6 +461,15 @@ public class Utils {
return count;
}
public static <T> int countOccurrences(T x, List<T> l) {
int count = 0;
for ( T y : l ) {
if ( x.equals(y) ) count++;
}
return count;
}
public static byte listMaxByte(List<Byte> quals) {
if ( quals.size() == 0 ) return 0;
byte m = quals.get(0);

View File

@ -0,0 +1,49 @@
package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.WalkerTest;
import org.junit.Test;
import java.util.HashMap;
import java.util.Map;
import java.util.Arrays;
import java.util.List;
import java.io.File;
public class DepthOfCoverageIntegrationTest extends WalkerTest {
private static String root = "-L 1:10,164,500-10,164,520 -R /broad/1KG/reference/human_b36_both.fasta -T DepthOfCoverage -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam";
static HashMap<String, String> expectations = new HashMap<String, String>();
static {
expectations.put("-s COMPACT -minMAPQ 1", "a6b2005e37f48545e9604e0f809cf618");
expectations.put("-s COMPACT", "c3cd44717487ad3c7d6d969583d9bb8c");
expectations.put("-s COMPACT -ed", "73aacc9febea393a4c3760bd6c70a557");
expectations.put("-s NONE", "628201adcef01e8bc3e9cdf96b86003b");
expectations.put("-s NONE -minMAPQ 1", "2425ccbcfdfb9e9a98fcf30c6f2bfcdd");
expectations.put("-s NONE -minMAPQ 100", "09eaf5a4c811e4ac74de5d179a9ffbe7");
expectations.put("-s DETAILED -minMAPQ 1", "9059436a57f8c28ef16ee9b2c7cd3ebb");
expectations.put("-s DETAILED", "fac3bb643f34eb9d90dac0e0358b55ab");
expectations.put("-s DETAILED -ed", "4d991221aa5e206de11878e88c380de1");
}
@Test
public void testDepthOfCoverage1() {
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
String extraArgs = entry.getKey();
String md5 = entry.getValue();
WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s",
1, // just one output file
Arrays.asList(md5));
executeTest("testDepthOfCoverage1", spec);
}
}
@Test
public void testDepthOfCoverage454() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T DepthOfCoverage -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam -L 1:10,001,890-10,001,895 -s DETAILED -o %s",
1, // just one output file
Arrays.asList("5c94916ec193ca825c681dd0175c6164"));
executeTest("testDepthOfCoverage454", spec);
}
}

View File

@ -49,18 +49,19 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
String md5 = entry.getValue();
String paramsFile = paramsFiles.get(bam);
System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile);
WalkerTestSpec spec = new WalkerTestSpec(
"-R /broad/1KG/reference/human_b36_both.fasta" +
" --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
" -T TableRecalibration" +
" -I " + bam +
" -L 1:10,000,000-20,000,000" +
" --outputBam %s" +
" --params " + paramsFile,
1, // just one output file
Arrays.asList(md5));
executeTest("testTableRecalibrator1", spec);
if ( paramsFile != null ) {
WalkerTestSpec spec = new WalkerTestSpec(
"-R /broad/1KG/reference/human_b36_both.fasta" +
" --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
" -T TableRecalibration" +
" -I " + bam +
" -L 1:10,000,000-20,000,000" +
" --outputBam %s" +
" --params " + paramsFile,
1, // just one output file
Arrays.asList(md5));
executeTest("testTableRecalibrator1", spec);
}
}
}
}