2010-04-20 07:00:08 +08:00
|
|
|
/*
|
|
|
|
|
* Copyright (c) 2010 The Broad Institute
|
2010-04-20 23:26:32 +08:00
|
|
|
*
|
2010-04-20 07:00:08 +08:00
|
|
|
* Permission is hereby granted, free of charge, to any person
|
|
|
|
|
* obtaining a copy of this software and associated documentation
|
2010-04-20 23:26:32 +08:00
|
|
|
* files (the "Software"), to deal in the Software without
|
2010-04-20 07:00:08 +08:00
|
|
|
* restriction, including without limitation the rights to use,
|
|
|
|
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
* copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following
|
|
|
|
|
* conditions:
|
2010-04-20 23:26:32 +08:00
|
|
|
*
|
2010-04-20 07:00:08 +08:00
|
|
|
* The above copyright notice and this permission notice shall be
|
|
|
|
|
* included in all copies or substantial portions of the Software.
|
|
|
|
|
*
|
2010-04-20 23:26:32 +08:00
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
2010-04-20 07:00:08 +08:00
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
|
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
|
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
|
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
|
|
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
2009-12-02 05:19:02 +08:00
|
|
|
package org.broadinstitute.sting.oneoffprojects.walkers;
|
2009-04-20 22:07:14 +08:00
|
|
|
|
2010-03-03 23:56:44 +08:00
|
|
|
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
2009-04-20 22:07:14 +08:00
|
|
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
2010-04-20 07:00:08 +08:00
|
|
|
import org.broadinstitute.sting.commandline.Argument;
|
|
|
|
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
2009-04-20 22:07:14 +08:00
|
|
|
import net.sf.samtools.SAMRecord;
|
|
|
|
|
import net.sf.samtools.SAMFileWriter;
|
|
|
|
|
import net.sf.samtools.SAMFileHeader;
|
|
|
|
|
|
|
|
|
|
import java.util.ArrayList;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* ReadErrorRateWalker assesses the error rate per read position ('cycle') by comparing the
|
|
|
|
|
* read to its home on the reference and noting the mismatch rate. It ignores reads with
|
|
|
|
|
* indels in them, treats high and low-quality references bases the same, and does not count
|
|
|
|
|
* ambiguous bases as mismatches. It's also thread-safe, so you can process a slew of reads
|
|
|
|
|
* in short order.
|
|
|
|
|
*
|
|
|
|
|
* @author Kiran Garimella
|
|
|
|
|
*/
|
|
|
|
|
public class IOCrusherWalker extends ReadWalker<SAMRecord, ArrayList<SAMFileWriter>> {
|
2009-05-07 09:22:01 +08:00
|
|
|
@Argument(shortName="nWaysOut",doc="n ways out",required=false)
|
|
|
|
|
public int nWaysOut = 1;
|
2009-04-20 22:07:14 +08:00
|
|
|
|
2009-05-07 09:22:01 +08:00
|
|
|
@Argument(shortName="readScaling",doc="read scaling",required=false)
|
|
|
|
|
public float readScaling = 1;
|
2009-04-20 22:07:14 +08:00
|
|
|
|
2009-05-06 06:43:40 +08:00
|
|
|
@Argument(shortName="outputBase",doc="output base",required=true)
|
2009-04-20 22:07:14 +08:00
|
|
|
public String outputBase;
|
|
|
|
|
|
2009-07-30 00:11:45 +08:00
|
|
|
@Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false)
|
|
|
|
|
public Integer BAMcompression = 5;
|
|
|
|
|
|
2009-04-20 22:07:14 +08:00
|
|
|
public long nReadsRead = 0;
|
|
|
|
|
public long nReadsWritten = 0;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
*
|
|
|
|
|
*/
|
2010-03-03 23:56:44 +08:00
|
|
|
public SAMRecord map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
2009-04-20 22:07:14 +08:00
|
|
|
nReadsRead++;
|
|
|
|
|
return read;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
public ArrayList<SAMFileWriter> reduceInit() {
|
|
|
|
|
ArrayList<SAMFileWriter> outputs = new ArrayList<SAMFileWriter>(nWaysOut);
|
|
|
|
|
for ( int i = 0; i < nWaysOut; i++ ) {
|
2009-07-30 07:00:15 +08:00
|
|
|
SAMFileHeader header = this.getToolkit().getSAMFileHeader();
|
2010-04-20 07:00:08 +08:00
|
|
|
outputs.add(ReadUtils.createSAMFileWriterWithCompression(header, true, outputBase + "." + i + ".bam", BAMcompression));
|
2009-04-20 22:07:14 +08:00
|
|
|
}
|
|
|
|
|
return outputs;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Summarize the error rate data.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
public ArrayList<SAMFileWriter> reduce(SAMRecord read, ArrayList<SAMFileWriter> outputs) {
|
|
|
|
|
for ( SAMFileWriter out : outputs ) {
|
|
|
|
|
if ( readScaling >= 1.0 ) {
|
|
|
|
|
int nCopies = (int)Math.ceil(readScaling);
|
|
|
|
|
for ( int i = 0; i < nCopies; i++) {
|
|
|
|
|
out.addAlignment(read);
|
|
|
|
|
nReadsWritten++;
|
|
|
|
|
}
|
|
|
|
|
} else if ( Math.random() < readScaling ) {
|
|
|
|
|
out.addAlignment(read);
|
|
|
|
|
nReadsWritten++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return outputs;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
public void onTraversalDone(ArrayList<SAMFileWriter> outputs) {
|
|
|
|
|
for ( SAMFileWriter out : outputs ) {
|
|
|
|
|
out.close();
|
|
|
|
|
}
|
|
|
|
|
System.out.printf("Reads: read %d written %d%n", nReadsRead, nReadsWritten);
|
|
|
|
|
}
|
|
|
|
|
}
|