Better location for the downsampling of reads in PrintReads

* using the filter() instead of map() makes for a cleaner walker.
   * renaming the unit tests to make more sense with the other unit and integration tests
This commit is contained in:
Mauricio Carneiro 2012-01-14 12:13:15 -05:00
parent 3a9d9789ae
commit cec7107762
2 changed files with 16 additions and 18 deletions

View File

@ -31,8 +31,11 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.File; import java.io.File;
import java.util.Collection; import java.util.Collection;
@ -40,10 +43,6 @@ import java.util.Random;
import java.util.Set; import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/** /**
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file. * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
* *
@ -201,7 +200,8 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
nReadsToPrint--; // n > 0 means there are still reads to be printed. nReadsToPrint--; // n > 0 means there are still reads to be printed.
} }
return true; // if downsample option is turned off (= 1) then don't waste time getting the next random number.
return (downsampleRatio == 1 || random.nextDouble() < downsampleRatio);
} }
/** /**
@ -212,7 +212,7 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
* @return the read itself * @return the read itself
*/ */
public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) { public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
return (random.nextDouble() < downsampleRatio) ? read : null; return read;
} }
/** /**
@ -233,7 +233,6 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
* @return the SAMFileWriter, so that the next reduce can emit to the same source * @return the SAMFileWriter, so that the next reduce can emit to the same source
*/ */
public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) { public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) {
if (read != null)
output.addAlignment(read); output.addAlignment(read);
return output; return output;
} }

View File

@ -1,20 +1,19 @@
package org.broadinstitute.sting.gatk.walkers; package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.sam.ArtificialReadsTraversal; import org.broadinstitute.sting.utils.sam.ArtificialReadsTraversal;
import org.broadinstitute.sting.utils.sam.ArtificialSAMFileWriter; import org.broadinstitute.sting.utils.sam.ArtificialSAMFileWriter;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.annotations.BeforeMethod; import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
/* /*
* Copyright (c) 2009 The Broad Institute * Copyright (c) 2009 The Broad Institute
@ -44,11 +43,11 @@ import org.testng.annotations.Test;
/** /**
* @author aaron * @author aaron
* <p/> * <p/>
* Class PrintReadsWalkerUnitTest * Class PrintReadsUnitTest
* <p/> * <p/>
* This tests the print reads walker, using the artificial reads traversal * This tests the print reads walker, using the artificial reads traversal
*/ */
public class PrintReadsWalkerUnitTest extends BaseTest { public class PrintReadsUnitTest extends BaseTest {
/** /**
* our private fake reads traversal. This traversal seeds the * our private fake reads traversal. This traversal seeds the