Added "unbiased" downsampling parameter to PrintReads
* also cleaned up and updated part of the unit tests for print reads. Needs a more thorough cleaning.
This commit is contained in:
parent
2c3176eb80
commit
28aa353501
|
|
@ -30,11 +30,13 @@ import net.sf.samtools.SAMReadGroupRecord;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.Random;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
|
@ -70,12 +72,21 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
* -I input2.bam \
|
* -I input2.bam \
|
||||||
* --read_filter MappingQualityZero
|
* --read_filter MappingQualityZero
|
||||||
*
|
*
|
||||||
|
* // Prints the first 2000 reads in the BAM file
|
||||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||||
* -R ref.fasta \
|
* -R ref.fasta \
|
||||||
* -T PrintReads \
|
* -T PrintReads \
|
||||||
* -o output.bam \
|
* -o output.bam \
|
||||||
* -I input.bam \
|
* -I input.bam \
|
||||||
* -n 2000
|
* -n 2000
|
||||||
|
*
|
||||||
|
* // Downsamples BAM file to 25%
|
||||||
|
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||||
|
* -R ref.fasta \
|
||||||
|
* -T PrintReads \
|
||||||
|
* -o output.bam \
|
||||||
|
* -I input.bam \
|
||||||
|
* -ds 0.25
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
@ -95,9 +106,18 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||||
@Argument(fullName = "platform", shortName = "platform", doc="Exclude all reads with this platform from the output", required = false)
|
@Argument(fullName = "platform", shortName = "platform", doc="Exclude all reads with this platform from the output", required = false)
|
||||||
String platform = null;
|
String platform = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Only prints the first n reads of the file
|
||||||
|
*/
|
||||||
@Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false)
|
@Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false)
|
||||||
int nReadsToPrint = -1;
|
int nReadsToPrint = -1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Downsamples the bam file by the given ratio, printing only approximately the given percentage of reads. The downsampling is balanced (over the entire coverage)
|
||||||
|
*/
|
||||||
|
@Argument(fullName = "downsample_coverage", shortName = "ds", doc="Downsample BAM to desired coverage", required = false)
|
||||||
|
public double downsampleRatio = 1.0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Only reads from samples listed in the provided file(s) will be included in the output.
|
* Only reads from samples listed in the provided file(s) will be included in the output.
|
||||||
*/
|
*/
|
||||||
|
|
@ -112,6 +132,8 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||||
|
|
||||||
private TreeSet<String> samplesToChoose = new TreeSet<String>();
|
private TreeSet<String> samplesToChoose = new TreeSet<String>();
|
||||||
private boolean SAMPLES_SPECIFIED = false;
|
private boolean SAMPLES_SPECIFIED = false;
|
||||||
|
|
||||||
|
Random random;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The initialize function.
|
* The initialize function.
|
||||||
|
|
@ -132,13 +154,15 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||||
if(!samplesToChoose.isEmpty()) {
|
if(!samplesToChoose.isEmpty()) {
|
||||||
SAMPLES_SPECIFIED = true;
|
SAMPLES_SPECIFIED = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
random = GenomeAnalysisEngine.getRandomGenerator();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The reads filter function.
|
* The reads filter function.
|
||||||
*
|
*
|
||||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||||
* @param read the read itself, as a SAMRecord
|
* @param read the read itself, as a SAMRecord
|
||||||
* @return true if the read passes the filter, false if it doesn't
|
* @return true if the read passes the filter, false if it doesn't
|
||||||
*/
|
*/
|
||||||
|
|
@ -188,12 +212,13 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||||
* @return the read itself
|
* @return the read itself
|
||||||
*/
|
*/
|
||||||
public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||||
return read;
|
return (random.nextDouble() < downsampleRatio) ? read : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* reduceInit is called once before any calls to the map function. We use it here to setup the output
|
* reduceInit is called once before any calls to the map function. We use it here to setup the output
|
||||||
* bam file, if it was specified on the command line
|
* bam file, if it was specified on the command line
|
||||||
|
*
|
||||||
* @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise
|
* @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise
|
||||||
*/
|
*/
|
||||||
public SAMFileWriter reduceInit() {
|
public SAMFileWriter reduceInit() {
|
||||||
|
|
@ -202,12 +227,14 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* given a read and a output location, reduce by emitting the read
|
* given a read and a output location, reduce by emitting the read
|
||||||
* @param read the read itself
|
*
|
||||||
|
* @param read the read itself
|
||||||
* @param output the output source
|
* @param output the output source
|
||||||
* @return the SAMFileWriter, so that the next reduce can emit to the same source
|
* @return the SAMFileWriter, so that the next reduce can emit to the same source
|
||||||
*/
|
*/
|
||||||
public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) {
|
public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) {
|
||||||
output.addAlignment(read);
|
if (read != null)
|
||||||
|
output.addAlignment(read);
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -60,19 +60,23 @@ public class PrintReadsWalkerUnitTest extends BaseTest {
|
||||||
private ReferenceContext bases = null;
|
private ReferenceContext bases = null;
|
||||||
//private ReferenceContext ref = new ReferenceContext()
|
//private ReferenceContext ref = new ReferenceContext()
|
||||||
|
|
||||||
|
PrintReadsWalker walker;
|
||||||
|
ArtificialSAMFileWriter writer;
|
||||||
|
|
||||||
@BeforeMethod
|
@BeforeMethod
|
||||||
public void before() {
|
public void before() {
|
||||||
trav = new ArtificialReadsTraversal();
|
trav = new ArtificialReadsTraversal();
|
||||||
readTotal = ( ( trav.endingChr - trav.startingChr ) + 1 ) * trav.readsPerChr + trav.unMappedReads;
|
readTotal = ( ( trav.endingChr - trav.startingChr ) + 1 ) * trav.readsPerChr + trav.unMappedReads;
|
||||||
|
|
||||||
|
walker = new PrintReadsWalker();
|
||||||
|
writer = new ArtificialSAMFileWriter();
|
||||||
|
walker.out = writer;
|
||||||
|
walker.initialize();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** test that we get out the same number of reads we put in */
|
/** test that we get out the same number of reads we put in */
|
||||||
@Test
|
@Test
|
||||||
public void testReadCount() {
|
public void testReadCount() {
|
||||||
PrintReadsWalker walker = new PrintReadsWalker();
|
|
||||||
ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
|
|
||||||
walker.out = writer;
|
|
||||||
|
|
||||||
trav.traverse(walker, null, writer);
|
trav.traverse(walker, null, writer);
|
||||||
assertEquals(writer.getRecords().size(), readTotal);
|
assertEquals(writer.getRecords().size(), readTotal);
|
||||||
}
|
}
|
||||||
|
|
@ -80,10 +84,6 @@ public class PrintReadsWalkerUnitTest extends BaseTest {
|
||||||
/** test that we're ok with a null read */
|
/** test that we're ok with a null read */
|
||||||
@Test
|
@Test
|
||||||
public void testNullRead() {
|
public void testNullRead() {
|
||||||
PrintReadsWalker walker = new PrintReadsWalker();
|
|
||||||
ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
|
|
||||||
walker.out = writer;
|
|
||||||
|
|
||||||
SAMRecord rec = walker.map(bases, null, null);
|
SAMRecord rec = walker.map(bases, null, null);
|
||||||
assertTrue(rec == null);
|
assertTrue(rec == null);
|
||||||
}
|
}
|
||||||
|
|
@ -91,10 +91,6 @@ public class PrintReadsWalkerUnitTest extends BaseTest {
|
||||||
/** tes that we get the read we put into the map function */
|
/** tes that we get the read we put into the map function */
|
||||||
@Test
|
@Test
|
||||||
public void testReturnRead() {
|
public void testReturnRead() {
|
||||||
PrintReadsWalker walker = new PrintReadsWalker();
|
|
||||||
ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
|
|
||||||
walker.out = writer;
|
|
||||||
|
|
||||||
SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000);
|
SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000);
|
||||||
GATKSAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50);
|
GATKSAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50);
|
||||||
SAMRecord ret = walker.map(bases, rec, null);
|
SAMRecord ret = walker.map(bases, rec, null);
|
||||||
|
|
@ -102,20 +98,6 @@ public class PrintReadsWalkerUnitTest extends BaseTest {
|
||||||
assertTrue(ret.getReadName().equals(rec.getReadName()));
|
assertTrue(ret.getReadName().equals(rec.getReadName()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** test that the read makes it to the output source */
|
|
||||||
@Test
|
|
||||||
public void testReducingRead() {
|
|
||||||
PrintReadsWalker walker = new PrintReadsWalker();
|
|
||||||
ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
|
|
||||||
walker.out = writer;
|
|
||||||
|
|
||||||
SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000);
|
|
||||||
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50);
|
|
||||||
SAMRecord ret = walker.map(bases, null,null);
|
|
||||||
walker.reduce(ret,writer);
|
|
||||||
|
|
||||||
assertTrue(writer.getRecords().size() == 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue