More improvements to the duplicate quality combiner, making progress towards a clean system
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@788 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
04e51c8d1d
commit
30c63daf89
|
|
@ -176,12 +176,12 @@ public class TraverseDuplicates extends TraversalEngine {
|
||||||
// Send each unique read to the map function
|
// Send each unique read to the map function
|
||||||
for ( SAMRecord unique : uniqueReads ) {
|
for ( SAMRecord unique : uniqueReads ) {
|
||||||
List<SAMRecord> l = Arrays.asList(unique);
|
List<SAMRecord> l = Arrays.asList(unique);
|
||||||
sum = mapOne(dupWalker, l, site, refBases, locus, sum);
|
sum = mapOne(dupWalker, uniqueReads, l, site, refBases, locus, sum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( duplicateReads.size() > 0 )
|
if ( duplicateReads.size() > 0 )
|
||||||
sum = mapOne(dupWalker, duplicateReads, site, refBases, locus, sum);
|
sum = mapOne(dupWalker, uniqueReads, duplicateReads, site, refBases, locus, sum);
|
||||||
|
|
||||||
printProgress("dups", site);
|
printProgress("dups", site);
|
||||||
|
|
||||||
|
|
@ -231,14 +231,15 @@ public class TraverseDuplicates extends TraversalEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
public <M, T> T mapOne(DuplicateWalker<M, T> dupWalker,
|
public <M, T> T mapOne(DuplicateWalker<M, T> dupWalker,
|
||||||
List<SAMRecord> readSet,
|
List<SAMRecord> uniqueReads,
|
||||||
|
List<SAMRecord> duplicateReads,
|
||||||
GenomeLoc site,
|
GenomeLoc site,
|
||||||
byte[] refBases,
|
byte[] refBases,
|
||||||
LocusContext locus,
|
LocusContext locus,
|
||||||
T sum) {
|
T sum) {
|
||||||
final boolean keepMeP = dupWalker.filter(site, refBases, locus, readSet);
|
final boolean keepMeP = dupWalker.filter(site, refBases, locus, uniqueReads, duplicateReads);
|
||||||
if (keepMeP) {
|
if (keepMeP) {
|
||||||
M x = dupWalker.map(site, refBases, locus, readSet);
|
M x = dupWalker.map(site, refBases, locus, uniqueReads, duplicateReads);
|
||||||
sum = dupWalker.reduce(x, sum);
|
sum = dupWalker.reduce(x, sum);
|
||||||
}
|
}
|
||||||
return sum;
|
return sum;
|
||||||
|
|
|
||||||
|
|
@ -17,20 +17,23 @@ import net.sf.samtools.SAMRecord;
|
||||||
@Requires({DataSource.READS,DataSource.REFERENCE})
|
@Requires({DataSource.READS,DataSource.REFERENCE})
|
||||||
public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||||
// Do we actually want to operate on the context?
|
// Do we actually want to operate on the context?
|
||||||
public boolean filter(GenomeLoc loc, byte[] refBases, LocusContext context, List<SAMRecord> duplicateReads) {
|
public boolean filter(GenomeLoc loc, byte[] refBases, LocusContext context,
|
||||||
|
List<SAMRecord> uniqueReads,
|
||||||
|
List<SAMRecord> duplicateReads) {
|
||||||
return true; // We are keeping all the reads
|
return true; // We are keeping all the reads
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Called by the traversal engine to decide whether to send non-duplicates as lists of
|
* Called by the traversal engine to decide whether to send non-duplicates as lists of
|
||||||
* singleton reads to the map function. By default it's false.
|
* singleton reads to the map function. By default it's false.
|
||||||
*
|
*
|
||||||
* @return true if you want to see non duplicates during the traversal
|
* @return true if you want to see non duplicates during the traversal
|
||||||
*/
|
*/
|
||||||
public boolean mapUniqueReadsTooP() { return false; }
|
public boolean mapUniqueReadsTooP() { return false; }
|
||||||
|
|
||||||
// Map over the org.broadinstitute.sting.gatk.LocusContext
|
public abstract MapType map(GenomeLoc loc, byte[] refBases, LocusContext context,
|
||||||
public abstract MapType map(GenomeLoc loc, byte[] refBases, LocusContext context, List<SAMRecord> duplicateReads);
|
List<SAMRecord> uniqueReads,
|
||||||
|
List<SAMRecord> duplicateReads);
|
||||||
|
|
||||||
// Given result of map function
|
// Given result of map function
|
||||||
public abstract ReduceType reduceInit();
|
public abstract ReduceType reduceInit();
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,133 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
|
import net.sf.samtools.*;
|
||||||
|
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ReadErrorRateWalker assesses the error rate per read position ('cycle') by comparing the
|
||||||
|
* read to its home on the reference and noting the mismatch rate. It ignores reads with
|
||||||
|
* indels in them, treats high and low-quality references bases the same, and does not count
|
||||||
|
* ambiguous bases as mismatches. It's also thread-safe, so you can process a slew of reads
|
||||||
|
* in short order.
|
||||||
|
*
|
||||||
|
* @author Kiran Garimella
|
||||||
|
*/
|
||||||
|
public class ReplaceQuals extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||||
|
@Argument(shortName="inputQualsBAM",doc="BAM files containing qualities to be replaced",required=true)
|
||||||
|
public String inputQualsBAM;
|
||||||
|
|
||||||
|
@Argument(shortName="outputBAM", required=false, doc="output BAM file for reads with replaced quals")
|
||||||
|
public String outputFilename = null;
|
||||||
|
|
||||||
|
public int MAX_READS_TO_LOAD = -1;
|
||||||
|
|
||||||
|
private HashMap<String, Pair<SAMRecord, SAMRecord>> readNameToPairs;
|
||||||
|
private int READ_PRINT_MOD = 100000;
|
||||||
|
private final boolean DEBUG = false;
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
readNameToPairs = new HashMap<String, Pair<SAMRecord, SAMRecord>>();
|
||||||
|
|
||||||
|
SAMFileReader samReader = new SAMFileReader(new File(inputQualsBAM));
|
||||||
|
samReader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
||||||
|
|
||||||
|
int nReads = 0;
|
||||||
|
logger.info("Starting to read inputQualsBAM = " + inputQualsBAM);
|
||||||
|
for ( SAMRecord read : samReader ) {
|
||||||
|
//System.out.printf("READ is %s%n", read.format());
|
||||||
|
|
||||||
|
final String name = read.getReadName();
|
||||||
|
Pair<SAMRecord, SAMRecord> binding = readNameToPairs.containsKey(name) ? readNameToPairs.get(name) : new Pair<SAMRecord, SAMRecord>(null, null);
|
||||||
|
if ( read.getFirstOfPairFlag() ) {
|
||||||
|
binding.first = read;
|
||||||
|
} else {
|
||||||
|
binding.second = read;
|
||||||
|
}
|
||||||
|
readNameToPairs.put(name, binding);
|
||||||
|
|
||||||
|
if ( ++nReads % READ_PRINT_MOD == 0 ) {
|
||||||
|
logger.info(String.format(" Read %d reads so far...", nReads));
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( nReads > MAX_READS_TO_LOAD && MAX_READS_TO_LOAD != -1 )
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info("Done reading input BAM");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public SAMRecord map(char[] ref, SAMRecord read) {
|
||||||
|
final String name = read.getReadName();
|
||||||
|
|
||||||
|
if ( readNameToPairs.containsKey(name) ) {
|
||||||
|
Pair<SAMRecord, SAMRecord> binding = readNameToPairs.get(name);
|
||||||
|
SAMRecord qRead = read.getFirstOfPairFlag() ? binding.first : binding.second;
|
||||||
|
if (qRead != null) {
|
||||||
|
|
||||||
|
if ( DEBUG ) {
|
||||||
|
System.out.printf("Replacing read %s quals with %s%n", read.getReadName(), qRead.getReadName());
|
||||||
|
System.out.printf("%s%n", read.getReadName());
|
||||||
|
System.out.printf("%s%n", qRead.getReadName());
|
||||||
|
System.out.printf("%s%n", read.getReadString());
|
||||||
|
System.out.printf("%s%n", qRead.getReadString());
|
||||||
|
System.out.printf("%s%n", read.getBaseQualityString());
|
||||||
|
System.out.printf("%s%n", qRead.getBaseQualityString());
|
||||||
|
|
||||||
|
//if (! read.getReadString().equals(qRead.getReadString()))
|
||||||
|
// throw new RuntimeException(String.format("BUG: equating %s and %s but bases are different", read.getReadName(), qRead.getReadName()));
|
||||||
|
}
|
||||||
|
|
||||||
|
read.setBaseQualities(qRead.getBaseQualities());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return read;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------------------------
|
||||||
|
// Standard i/o reduce
|
||||||
|
//
|
||||||
|
public void onTraversalDone(SAMFileWriter output) {
|
||||||
|
if ( output != null ) {
|
||||||
|
output.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMFileWriter reduceInit() {
|
||||||
|
if ( outputFilename != null ) { // ! outputBamFile.equals("") ) {
|
||||||
|
SAMFileWriterFactory fact = new SAMFileWriterFactory();
|
||||||
|
SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader();
|
||||||
|
return fact.makeBAMWriter(header, true, new File(outputFilename));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public SAMFileWriter reduce(SAMRecord read, SAMFileWriter output) {
|
||||||
|
if ( output != null ) {
|
||||||
|
output.addAlignment(read);
|
||||||
|
} else {
|
||||||
|
out.println(read.format());
|
||||||
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -48,9 +48,9 @@ public class CombineDuplicatesWalker extends DuplicateWalker<SAMRecord, SAMFileW
|
||||||
}
|
}
|
||||||
|
|
||||||
public SAMFileWriter reduceInit() {
|
public SAMFileWriter reduceInit() {
|
||||||
if ( outputFilename != null ) { // ! outputBamFile.equals("") ) {
|
if ( outputFilename != null ) {
|
||||||
SAMFileWriterFactory fact = new SAMFileWriterFactory();
|
SAMFileWriterFactory fact = new SAMFileWriterFactory();
|
||||||
SAMFileHeader header = this.getToolkit().getSamReader().getFileHeader();
|
SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader();
|
||||||
return fact.makeBAMWriter(header, true, new File(outputFilename));
|
return fact.makeBAMWriter(header, true, new File(outputFilename));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
@ -83,8 +83,11 @@ public class CombineDuplicatesWalker extends DuplicateWalker<SAMRecord, SAMFileW
|
||||||
* @param duplicateReads
|
* @param duplicateReads
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public SAMRecord map(GenomeLoc loc, byte[] refBases, LocusContext context, List<SAMRecord> duplicateReads) {
|
public SAMRecord map(GenomeLoc loc, byte[] refBases, LocusContext context,
|
||||||
//logger.info(String.format("%s has %d duplicates%n", loc, duplicateReads.size()));
|
List<SAMRecord> uniqueReads,
|
||||||
|
List<SAMRecord> duplicateReads) {
|
||||||
|
//logger.info(String.format("%s has %d duplicates and %d non-duplicates", loc, duplicateReads.size(), uniqueReads.size()));
|
||||||
|
|
||||||
SAMRecord combinedRead = null;
|
SAMRecord combinedRead = null;
|
||||||
|
|
||||||
if ( duplicateReads.size() == 1 && ! duplicateReads.get(0).getDuplicateReadFlag() ) {
|
if ( duplicateReads.size() == 1 && ! duplicateReads.get(0).getDuplicateReadFlag() ) {
|
||||||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.Pair;
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.duplicates.DuplicateComp;
|
import org.broadinstitute.sting.utils.duplicates.DuplicateComp;
|
||||||
import org.broadinstitute.sting.utils.duplicates.DupUtils;
|
import org.broadinstitute.sting.utils.duplicates.DupUtils;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
|
@ -58,51 +59,63 @@ class QualityTracker {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void inc(int b1Q, int b2Q, boolean mismatchP) {
|
public void inc(int b1Qi, int b2Qi, boolean mismatchP, boolean orderDependent) {
|
||||||
|
int b1Q = orderDependent ? b1Qi : Math.max(b1Qi, b2Qi);
|
||||||
|
int b2Q = orderDependent ? b2Qi : Math.min(b1Qi, b2Qi);
|
||||||
|
|
||||||
if ( b1Q > MAX_QUAL_SCORE ) throw new RuntimeException("Unexpectedly large base quality " + b1Q);
|
if ( b1Q > MAX_QUAL_SCORE ) throw new RuntimeException("Unexpectedly large base quality " + b1Q);
|
||||||
if ( b2Q > MAX_QUAL_SCORE ) throw new RuntimeException("Unexpectedly large base quality " + b2Q);
|
if ( b2Q > MAX_QUAL_SCORE ) throw new RuntimeException("Unexpectedly large base quality " + b2Q);
|
||||||
|
|
||||||
mismatchesByQ[b1Q][b2Q].inc(mismatchP);
|
mismatchesByQ[b1Q][b2Q].inc(mismatchP);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void inc(DuplicateComp dc) {
|
public void inc(DuplicateComp dc, boolean orderDependent) {
|
||||||
inc(dc.getQLarger(), dc.getQSmaller(), dc.isMismatchP());
|
inc(dc.getQLarger(), dc.getQSmaller(), dc.isMismatchP(), orderDependent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int probMismatchQ1Q2(int q1, int q2) {
|
||||||
|
double e1 = 1 - QualityUtils.qualToProb(q1);
|
||||||
|
double e2 = 1 - QualityUtils.qualToProb(q2);
|
||||||
|
double eMM = e1 * (1 - e2) + (1 - e1) * e2 - 1/3 * e1 * e2;
|
||||||
|
return QualityUtils.probToQual(1 - eMM, 0.0);
|
||||||
|
}
|
||||||
|
|
||||||
public void printToStream(PrintStream out, boolean filterUnobserved) {
|
public void printToStream(PrintStream out, boolean filterUnobserved) {
|
||||||
out.printf("Q1\tQ2\t%s%n", mismatchesByQ[0][0].headerString());
|
out.printf("Q1\tQ2\tQmin\t%s%n", mismatchesByQ[0][0].headerString());
|
||||||
for ( int i = 0; i < MAX_QUAL_SCORE; i++ ) {
|
for ( int i = 0; i < MAX_QUAL_SCORE; i++ ) {
|
||||||
for ( int j = 0; j < MAX_QUAL_SCORE; j++ ) {
|
for ( int j = 0; j < MAX_QUAL_SCORE; j++ ) {
|
||||||
MismatchCounter mc = mismatchesByQ[i][j];
|
MismatchCounter mc = mismatchesByQ[i][j];
|
||||||
//System.out.printf("MC = %s%n", mc);
|
//System.out.printf("MC = %s%n", mc);
|
||||||
if ( filterUnobserved && mc.nObs == 0 )
|
if ( filterUnobserved && mc.nObs == 0 )
|
||||||
continue;
|
continue;
|
||||||
out.printf("%d\t%d\t%s\t%n", i, j, mc.toString());
|
out.printf("%d\t%d\t%d\t%s\t%n", i, j, probMismatchQ1Q2(i,j), mc.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: mdepristo
|
|
||||||
* Date: Feb 22, 2009
|
|
||||||
* Time: 2:52:28 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class DuplicateQualsWalker extends DuplicateWalker<List<DuplicateComp>, QualityTracker> {
|
public class DuplicateQualsWalker extends DuplicateWalker<List<DuplicateComp>, QualityTracker> {
|
||||||
@Argument(fullName="filterUnobservedQuals", shortName="filterUnobservedQuals", required=false, doc="Show only quality bins with at least one observation in the data")
|
@Argument(fullName="filterUnobservedQuals", required=false, doc="Show only quality bins with at least one observation in the data")
|
||||||
public boolean FILTER_UNOBSERVED_QUALS = false;
|
public boolean FILTER_UNOBSERVED_QUALS = false;
|
||||||
|
|
||||||
@Argument(fullName="maxPairwiseCompsPerDupSet", shortName="maxPairwiseCompsPerDupSet", required=false, doc="Maximumize number of pairwise comparisons to perform among duplicate read sets")
|
@Argument(fullName="maxPairwiseCompsPerDupSet", required=false, doc="Maximumize number of pairwise comparisons to perform among duplicate read sets")
|
||||||
public int MAX_PAIRSIZE_COMPS_PER_DUPLICATE_SET = 100;
|
public int MAX_PAIRSIZE_COMPS_PER_DUPLICATE_SET = 100;
|
||||||
|
|
||||||
@Argument(fullName="combinedQuals", shortName="combinedQuals", required=false, doc="Combine and assess pairwise base qualities")
|
@Argument(fullName="combinedQuals", required=false, doc="Combine and assess pairwise base qualities")
|
||||||
public boolean COMBINE_QUALS = false;
|
public boolean COMBINE_QUALS = false;
|
||||||
|
|
||||||
@Argument(fullName="combineAllDups", shortName="combineAllDups", required=false, doc="Combine and assess pairwise base qualities")
|
@Argument(fullName="combineAllDups", required=false, doc="Combine and assess pairwise base qualities")
|
||||||
public boolean COMBINE_ALL_DUPS = false;
|
public boolean COMBINE_ALL_DUPS = false;
|
||||||
|
|
||||||
|
@Argument(fullName="orderDependent", required=false, doc="")
|
||||||
|
public boolean orderDependent = false;
|
||||||
|
|
||||||
|
@Argument(fullName="compareToUniqueReads", required=false, doc="If true, then we will compare only to unique (i.e., non-duplicated molecules) at the same duplicate site")
|
||||||
|
public boolean compareToUniqueReads = false;
|
||||||
|
|
||||||
|
@Argument(fullName="comparePairToSingleton", required=false, doc="If true, then we will compare a combined dup to a random other read in the duplicate set, not a combined pair itself")
|
||||||
|
public boolean comparePairToSingleton = false;
|
||||||
|
|
||||||
final boolean DEBUG = false;
|
final boolean DEBUG = false;
|
||||||
final private boolean ACTUALLY_DO_WORK = true;
|
final private boolean ACTUALLY_DO_WORK = true;
|
||||||
|
|
||||||
|
|
@ -116,15 +129,17 @@ public class DuplicateQualsWalker extends DuplicateWalker<List<DuplicateComp>, Q
|
||||||
|
|
||||||
public QualityTracker reduce(List<DuplicateComp> dupComps, QualityTracker tracker) {
|
public QualityTracker reduce(List<DuplicateComp> dupComps, QualityTracker tracker) {
|
||||||
for ( DuplicateComp dc : dupComps ) {
|
for ( DuplicateComp dc : dupComps ) {
|
||||||
tracker.inc(dc);
|
tracker.inc(dc, orderDependent);
|
||||||
}
|
}
|
||||||
|
|
||||||
return tracker;
|
return tracker;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Print out data for regression
|
// Print out data for regression
|
||||||
public List<DuplicateComp> map(GenomeLoc loc, byte[] refBases, LocusContext context, List<SAMRecord> duplicateReads) {
|
public List<DuplicateComp> map(GenomeLoc loc, byte[] refBases, LocusContext context,
|
||||||
//out.printf("%s has %d duplicates%n", loc, duplicateReads.size());
|
List<SAMRecord> uniqueReads,
|
||||||
|
List<SAMRecord> duplicateReads) {
|
||||||
|
//logger.info(String.format("%s has %d duplicates and %d non-duplicates", loc, duplicateReads.size(), uniqueReads.size()));
|
||||||
List<DuplicateComp> pairwiseComps = new ArrayList<DuplicateComp>();
|
List<DuplicateComp> pairwiseComps = new ArrayList<DuplicateComp>();
|
||||||
|
|
||||||
if ( ! ACTUALLY_DO_WORK )
|
if ( ! ACTUALLY_DO_WORK )
|
||||||
|
|
@ -135,15 +150,21 @@ public class DuplicateQualsWalker extends DuplicateWalker<List<DuplicateComp>, Q
|
||||||
if ( combinedReads != null ) {
|
if ( combinedReads != null ) {
|
||||||
SAMRecord combined1 = combinedReads.first;
|
SAMRecord combined1 = combinedReads.first;
|
||||||
SAMRecord combined2 = combinedReads.second;
|
SAMRecord combined2 = combinedReads.second;
|
||||||
addPairwiseMatches( pairwiseComps, combined1, combined2 );
|
|
||||||
|
if ( comparePairToSingleton )
|
||||||
|
pairwiseComps = addPairwiseMatches( pairwiseComps, combined1, duplicateReads.get(2), uniqueReads );
|
||||||
|
else
|
||||||
|
pairwiseComps = addPairwiseMatches( pairwiseComps, combined1, combined2, uniqueReads );
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int nComparisons = 0;
|
int nComparisons = 0;
|
||||||
for ( SAMRecord read1 : duplicateReads ) {
|
for ( SAMRecord read1 : duplicateReads ) {
|
||||||
for ( SAMRecord read2 : duplicateReads ) {
|
for ( SAMRecord read2 : duplicateReads ) {
|
||||||
if ( DupUtils.usableDuplicate(read1, read2) ) {
|
if ( read1.hashCode() < read2.hashCode() && DupUtils.usableDuplicate(read1, read2) ) {
|
||||||
|
// the hashcode insures we don't do A vs. B and B vs. A
|
||||||
|
//System.out.printf("Comparing %s against %s%n", read1, read2);
|
||||||
nComparisons++;
|
nComparisons++;
|
||||||
addPairwiseMatches( pairwiseComps, read1, read2 );
|
pairwiseComps = addPairwiseMatches( pairwiseComps, read1, read2, uniqueReads );
|
||||||
if ( nComparisons > MAX_PAIRSIZE_COMPS_PER_DUPLICATE_SET )
|
if ( nComparisons > MAX_PAIRSIZE_COMPS_PER_DUPLICATE_SET )
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -155,7 +176,32 @@ public class DuplicateQualsWalker extends DuplicateWalker<List<DuplicateComp>, Q
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<DuplicateComp> addPairwiseMatches(List<DuplicateComp> comps,
|
private List<DuplicateComp> addPairwiseMatches(List<DuplicateComp> comps,
|
||||||
SAMRecord read1, SAMRecord read2 ) {
|
SAMRecord read1, SAMRecord read2,
|
||||||
|
List<SAMRecord> uniqueReads ) {
|
||||||
|
if ( compareToUniqueReads ) {
|
||||||
|
// we want to compare to a read in the unique read set
|
||||||
|
if ( uniqueReads.size() > 0 ) { // there's actually something to compare to
|
||||||
|
SAMRecord uniqueRead = uniqueReads.get(0); // might as well get the first one
|
||||||
|
return pairwiseMatches(comps, read1, uniqueRead);
|
||||||
|
} else {
|
||||||
|
return comps;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// default, just do read1 vs. read2
|
||||||
|
return pairwiseMatches(comps, read1, read2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the pairwise mismatches between reads read1 and read2 and adds the result to the comps list.
|
||||||
|
* Doesn't contain any logic deciding what to compare, just does read1 and read2
|
||||||
|
*
|
||||||
|
* @param comps
|
||||||
|
* @param read1
|
||||||
|
* @param read2
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
private List<DuplicateComp> pairwiseMatches(List<DuplicateComp> comps, SAMRecord read1, SAMRecord read2 ) {
|
||||||
byte[] read1Bases = read1.getReadBases();
|
byte[] read1Bases = read1.getReadBases();
|
||||||
byte[] read1Quals = read1.getBaseQualities();
|
byte[] read1Quals = read1.getBaseQualities();
|
||||||
byte[] read2Bases = read2.getReadBases();
|
byte[] read2Bases = read2.getReadBases();
|
||||||
|
|
@ -164,7 +210,7 @@ public class DuplicateQualsWalker extends DuplicateWalker<List<DuplicateComp>, Q
|
||||||
for ( int i = 0; i < read1Bases.length; i++) {
|
for ( int i = 0; i < read1Bases.length; i++) {
|
||||||
byte qual1 = read1Quals[i];
|
byte qual1 = read1Quals[i];
|
||||||
byte qual2 = read2Quals[i];
|
byte qual2 = read2Quals[i];
|
||||||
boolean mismatchP = read1Bases[i] != read2Bases[i];
|
boolean mismatchP = ! BaseUtils.basesAreEqual(read1Bases[i], read2Bases[i]);
|
||||||
DuplicateComp dc = new DuplicateComp(qual1, qual2, mismatchP);
|
DuplicateComp dc = new DuplicateComp(qual1, qual2, mismatchP);
|
||||||
comps.add(dc);
|
comps.add(dc);
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue