diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index ac69738d3..f029b768e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -30,11 +30,13 @@ import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; import java.io.File; import java.util.Collection; +import java.util.Random; import java.util.Set; import java.util.TreeSet; @@ -70,12 +72,21 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; * -I input2.bam \ * --read_filter MappingQualityZero * + * // Prints the first 2000 reads in the BAM file * java -Xmx2g -jar GenomeAnalysisTK.jar \ * -R ref.fasta \ * -T PrintReads \ * -o output.bam \ * -I input.bam \ * -n 2000 + * + * // Downsamples BAM file to 25% + * java -Xmx2g -jar GenomeAnalysisTK.jar \ + * -R ref.fasta \ + * -T PrintReads \ + * -o output.bam \ + * -I input.bam \ + * -ds 0.25 * * */ @@ -95,9 +106,18 @@ public class PrintReadsWalker extends ReadWalker { @Argument(fullName = "platform", shortName = "platform", doc="Exclude all reads with this platform from the output", required = false) String platform = null; + /** + * Only prints the first n reads of the file + */ @Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false) int nReadsToPrint = -1; + /** + * Downsamples the bam file by the given ratio, printing only approximately the given percentage of reads. The downsampling is balanced (over the entire coverage) + */ + @Argument(fullName = "downsample_coverage", shortName = "ds", doc="Downsample BAM to desired coverage", required = false) + public double downsampleRatio = 1.0; + /** * Only reads from samples listed in the provided file(s) will be included in the output. */ @@ -112,6 +132,8 @@ public class PrintReadsWalker extends ReadWalker { private TreeSet samplesToChoose = new TreeSet(); private boolean SAMPLES_SPECIFIED = false; + + Random random; /** * The initialize function. @@ -132,13 +154,15 @@ public class PrintReadsWalker extends ReadWalker { if(!samplesToChoose.isEmpty()) { SAMPLES_SPECIFIED = true; } + + random = GenomeAnalysisEngine.getRandomGenerator(); } /** * The reads filter function. * - * @param ref the reference bases that correspond to our read, if a reference was provided + * @param ref the reference bases that correspond to our read, if a reference was provided * @param read the read itself, as a SAMRecord * @return true if the read passes the filter, false if it doesn't */ @@ -188,12 +212,13 @@ public class PrintReadsWalker extends ReadWalker { * @return the read itself */ public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) { - return read; + return (random.nextDouble() < downsampleRatio) ? read : null; } /** * reduceInit is called once before any calls to the map function. We use it here to setup the output * bam file, if it was specified on the command line + * * @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise */ public SAMFileWriter reduceInit() { @@ -202,12 +227,14 @@ public class PrintReadsWalker extends ReadWalker { /** * given a read and a output location, reduce by emitting the read - * @param read the read itself + * + * @param read the read itself * @param output the output source * @return the SAMFileWriter, so that the next reduce can emit to the same source */ public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) { - output.addAlignment(read); + if (read != null) + output.addAlignment(read); return output; } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java index 8cd10048a..484641981 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java @@ -60,19 +60,23 @@ public class PrintReadsWalkerUnitTest extends BaseTest { private ReferenceContext bases = null; //private ReferenceContext ref = new ReferenceContext() + PrintReadsWalker walker; + ArtificialSAMFileWriter writer; + @BeforeMethod public void before() { trav = new ArtificialReadsTraversal(); readTotal = ( ( trav.endingChr - trav.startingChr ) + 1 ) * trav.readsPerChr + trav.unMappedReads; + + walker = new PrintReadsWalker(); + writer = new ArtificialSAMFileWriter(); + walker.out = writer; + walker.initialize(); } /** test that we get out the same number of reads we put in */ @Test public void testReadCount() { - PrintReadsWalker walker = new PrintReadsWalker(); - ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter(); - walker.out = writer; - trav.traverse(walker, null, writer); assertEquals(writer.getRecords().size(), readTotal); } @@ -80,10 +84,6 @@ public class PrintReadsWalkerUnitTest extends BaseTest { /** test that we're ok with a null read */ @Test public void testNullRead() { - PrintReadsWalker walker = new PrintReadsWalker(); - ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter(); - walker.out = writer; - SAMRecord rec = walker.map(bases, null, null); assertTrue(rec == null); } @@ -91,10 +91,6 @@ public class PrintReadsWalkerUnitTest extends BaseTest { /** tes that we get the read we put into the map function */ @Test public void testReturnRead() { - PrintReadsWalker walker = new PrintReadsWalker(); - ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter(); - walker.out = writer; - SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000); GATKSAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50); SAMRecord ret = walker.map(bases, rec, null); @@ -102,20 +98,6 @@ public class PrintReadsWalkerUnitTest extends BaseTest { assertTrue(ret.getReadName().equals(rec.getReadName())); } - /** test that the read makes it to the output source */ - @Test - public void testReducingRead() { - PrintReadsWalker walker = new PrintReadsWalker(); - ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter(); - walker.out = writer; - - SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000); - SAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50); - SAMRecord ret = walker.map(bases, null,null); - walker.reduce(ret,writer); - - assertTrue(writer.getRecords().size() == 1); - } }