System for traversing duplicate reads, along with a walker to compute quality scores among duplicates and a smarter method to combine quality scores across duplicates -- v1
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@624 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
71e8f47a6c
commit
2204be43eb
|
|
@ -15,6 +15,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
|
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -89,8 +90,9 @@ public class TraverseDuplicates extends TraversalEngine {
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<SAMRecord> collectDuplicates(List<SAMRecord> reads) {
|
private Pair<List<SAMRecord>, List<SAMRecord>> splitDuplicates(List<SAMRecord> reads) {
|
||||||
ArrayList<SAMRecord> dups = new ArrayList<SAMRecord>();
|
List<SAMRecord> uniques = new ArrayList<SAMRecord>();
|
||||||
|
List<SAMRecord> dups = new ArrayList<SAMRecord>();
|
||||||
|
|
||||||
// find the first duplicate
|
// find the first duplicate
|
||||||
SAMRecord key = null;
|
SAMRecord key = null;
|
||||||
|
|
@ -104,7 +106,7 @@ public class TraverseDuplicates extends TraversalEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
// At this point, there are two possibilities, we have found at least one dup or not
|
// At this point, there are two possibilities, we have found at least one dup or not
|
||||||
//System.out.printf("Key is %s%n", key);
|
// if it's a dup, add it to the dups list, otherwise add it to the uniques list
|
||||||
if ( key != null ) {
|
if ( key != null ) {
|
||||||
final GenomeLoc keyLoc = new GenomeLoc(key);
|
final GenomeLoc keyLoc = new GenomeLoc(key);
|
||||||
final GenomeLoc keyMateLoc = new GenomeLoc(key.getMateReferenceIndex(), key.getMateAlignmentStart(), key.getMateAlignmentStart());
|
final GenomeLoc keyMateLoc = new GenomeLoc(key.getMateReferenceIndex(), key.getMateAlignmentStart(), key.getMateAlignmentStart());
|
||||||
|
|
@ -117,11 +119,15 @@ public class TraverseDuplicates extends TraversalEngine {
|
||||||
// we are at the same position as the dup and have the same mat pos, it's a dup
|
// we are at the same position as the dup and have the same mat pos, it's a dup
|
||||||
if (DEBUG) logger.debug(String.format(" => Adding read to dups list: %s%n", read));
|
if (DEBUG) logger.debug(String.format(" => Adding read to dups list: %s%n", read));
|
||||||
dups.add(read);
|
dups.add(read);
|
||||||
|
} else {
|
||||||
|
uniques.add(read);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
uniques = reads;
|
||||||
}
|
}
|
||||||
|
|
||||||
return dups;
|
return new Pair<List<SAMRecord>, List<SAMRecord>>(uniques, dups);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -146,9 +152,13 @@ public class TraverseDuplicates extends TraversalEngine {
|
||||||
for (SAMRecord read: iter) {
|
for (SAMRecord read: iter) {
|
||||||
// get the genome loc from the read
|
// get the genome loc from the read
|
||||||
GenomeLoc site = new GenomeLoc(read);
|
GenomeLoc site = new GenomeLoc(read);
|
||||||
logger.debug(String.format("*** TraverseDuplicates.traverse at %s", site));
|
|
||||||
List<SAMRecord> reads = readsAtLoc(read, iter);
|
List<SAMRecord> reads = readsAtLoc(read, iter);
|
||||||
List<SAMRecord> duplicateReads = collectDuplicates(reads);
|
Pair<List<SAMRecord>, List<SAMRecord>> split = splitDuplicates(reads);
|
||||||
|
List<SAMRecord> uniqueReads = split.getFirst();
|
||||||
|
List<SAMRecord> duplicateReads = split.getSecond();
|
||||||
|
|
||||||
|
logger.debug(String.format("*** TraverseDuplicates.traverse at %s has %d unique and %d duplicate reads",
|
||||||
|
site, uniqueReads.size(), duplicateReads.size()));
|
||||||
|
|
||||||
// Jump forward in the reference to this locus location
|
// Jump forward in the reference to this locus location
|
||||||
LocusContext locus = new LocusContext(site, duplicateReads, Arrays.asList(0));
|
LocusContext locus = new LocusContext(site, duplicateReads, Arrays.asList(0));
|
||||||
|
|
@ -161,11 +171,16 @@ public class TraverseDuplicates extends TraversalEngine {
|
||||||
|
|
||||||
byte[] refBases = new byte[0];
|
byte[] refBases = new byte[0];
|
||||||
|
|
||||||
final boolean keepMeP = dupWalker.filter(site, refBases, locus, duplicateReads);
|
if ( dupWalker.mapUniqueReadsTooP() ) {
|
||||||
if (keepMeP) {
|
// Send each unique read to the map function
|
||||||
M x = dupWalker.map(site, refBases, locus, duplicateReads);
|
for ( SAMRecord unique : uniqueReads ) {
|
||||||
sum = dupWalker.reduce(x, sum);
|
List<SAMRecord> l = Arrays.asList(unique);
|
||||||
|
sum = mapOne(dupWalker, l, site, refBases, locus, sum);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( duplicateReads.size() > 0 )
|
||||||
|
sum = mapOne(dupWalker, duplicateReads, site, refBases, locus, sum);
|
||||||
|
|
||||||
printProgress("dups", site);
|
printProgress("dups", site);
|
||||||
|
|
||||||
|
|
@ -214,6 +229,19 @@ public class TraverseDuplicates extends TraversalEngine {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public <M, T> T mapOne(DuplicateWalker<M, T> dupWalker,
|
||||||
|
List<SAMRecord> readSet,
|
||||||
|
GenomeLoc site,
|
||||||
|
byte[] refBases,
|
||||||
|
LocusContext locus,
|
||||||
|
T sum) {
|
||||||
|
final boolean keepMeP = dupWalker.filter(site, refBases, locus, readSet);
|
||||||
|
if (keepMeP) {
|
||||||
|
M x = dupWalker.map(site, refBases, locus, readSet);
|
||||||
|
sum = dupWalker.reduce(x, sum);
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,15 @@ public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapTyp
|
||||||
public boolean requiresReads() { return true; }
|
public boolean requiresReads() { return true; }
|
||||||
public boolean cannotHandleReads() { return false; }
|
public boolean cannotHandleReads() { return false; }
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called by the traversal engine to decide whether to send non-duplicates as lists of
|
||||||
|
* singleton reads to the map function. By default it's false.
|
||||||
|
*
|
||||||
|
* @return true if you want to see non duplicates during the traversal
|
||||||
|
*/
|
||||||
|
public boolean mapUniqueReadsTooP() { return false; }
|
||||||
|
|
||||||
// Map over the org.broadinstitute.sting.gatk.LocusContext
|
// Map over the org.broadinstitute.sting.gatk.LocusContext
|
||||||
public abstract MapType map(GenomeLoc loc, byte[] refBases, LocusContext context, List<SAMRecord> duplicateReads);
|
public abstract MapType map(GenomeLoc loc, byte[] refBases, LocusContext context, List<SAMRecord> duplicateReads);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,108 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.duplicates.DuplicateComp;
|
||||||
|
import org.broadinstitute.sting.utils.duplicates.DupUtils;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMFileWriter;
|
||||||
|
import net.sf.samtools.SAMFileWriterFactory;
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: mdepristo
|
||||||
|
* Date: Feb 22, 2009
|
||||||
|
* Time: 2:52:28 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public class CombineDuplicatesWalker extends DuplicateWalker<SAMRecord, SAMFileWriter> {
|
||||||
|
@Argument(fullName="outputBAM", shortName="outputBAM", required=false, defaultValue="", doc="BAM File to write combined duplicates to")
|
||||||
|
public String outputFilename;
|
||||||
|
|
||||||
|
@Argument(fullName="includeUniqueReads", shortName="includeUniqueReads", required=false, defaultValue="true", doc="If true, also writes out non-duplicate reads in file")
|
||||||
|
public boolean INCLUDE_UNIQUE_READS;
|
||||||
|
|
||||||
|
@Argument(fullName="maxQ", shortName="maxQ", required=false, defaultValue="50",
|
||||||
|
doc="The maximum Q score allowed for combined reads, reflects the background error rate giving rise to perfect bases that don't correspond to the reference")
|
||||||
|
public int MAX_QUALITY_SCORE;
|
||||||
|
|
||||||
|
final boolean DEBUG = false;
|
||||||
|
|
||||||
|
public boolean mapUniqueReadsTooP() {
|
||||||
|
return INCLUDE_UNIQUE_READS;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------------------------------
|
||||||
|
// Standard i/o reduce
|
||||||
|
//
|
||||||
|
public void onTraversalDone(SAMFileWriter output) {
|
||||||
|
if ( output != null ) {
|
||||||
|
output.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMFileWriter reduceInit() {
|
||||||
|
if ( outputFilename != null ) { // ! outputFile.equals("") ) {
|
||||||
|
SAMFileWriterFactory fact = new SAMFileWriterFactory();
|
||||||
|
SAMFileHeader header = this.getToolkit().getSamReader().getFileHeader();
|
||||||
|
return fact.makeBAMWriter(header, true, new File(outputFilename));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public SAMFileWriter reduce(SAMRecord read, SAMFileWriter output) {
|
||||||
|
if ( output != null ) {
|
||||||
|
output.addAlignment(read);
|
||||||
|
} else {
|
||||||
|
out.println(read.format());
|
||||||
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a combined read given the input list of non-unique reads. If there's just one read in the
|
||||||
|
* set, it's considered unique and returned. If there's more than one, the N-way combine
|
||||||
|
* duplicate function is invoked.
|
||||||
|
*
|
||||||
|
* @param loc
|
||||||
|
* @param refBases
|
||||||
|
* @param context
|
||||||
|
* @param duplicateReads
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public SAMRecord map(GenomeLoc loc, byte[] refBases, LocusContext context, List<SAMRecord> duplicateReads) {
|
||||||
|
//logger.info(String.format("%s has %d duplicates%n", loc, duplicateReads.size()));
|
||||||
|
SAMRecord combinedRead = null;
|
||||||
|
|
||||||
|
if ( duplicateReads.size() == 1 ) {
|
||||||
|
// we are a unique read
|
||||||
|
combinedRead = duplicateReads.get(0);
|
||||||
|
} else {
|
||||||
|
// actually call the combine function
|
||||||
|
//for (SAMRecord read : duplicateReads ) {
|
||||||
|
// System.out.printf("Read %s%n", read.format());
|
||||||
|
//}
|
||||||
|
combinedRead = DupUtils.combineDuplicates(duplicateReads, MAX_QUALITY_SCORE);
|
||||||
|
}
|
||||||
|
|
||||||
|
return combinedRead;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,20 +1,17 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers;
|
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
|
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
import org.broadinstitute.sting.utils.Pair;
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.duplicates.DuplicateComp;
|
||||||
|
import org.broadinstitute.sting.utils.duplicates.DupUtils;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
|
@ -68,7 +65,7 @@ class QualityTracker {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void inc(DuplicateComp dc) {
|
public void inc(DuplicateComp dc) {
|
||||||
inc(dc.qLarger, dc.qSmaller, dc.mismatchP);
|
inc(dc.getQLarger(), dc.getQSmaller(), dc.isMismatchP());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void printToStream(PrintStream out, boolean filterUnobserved) {
|
public void printToStream(PrintStream out, boolean filterUnobserved) {
|
||||||
|
|
@ -85,21 +82,6 @@ class QualityTracker {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class DuplicateComp {
|
|
||||||
int qLarger;
|
|
||||||
int qSmaller;
|
|
||||||
boolean mismatchP;
|
|
||||||
|
|
||||||
public DuplicateComp( int b1Q, int b2Q, boolean mismatchP ) {
|
|
||||||
qLarger = Math.max(b1Q, b2Q);
|
|
||||||
qSmaller = Math.min(b1Q, b2Q);
|
|
||||||
this.mismatchP = mismatchP;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return String.format("%d %d %b", qLarger, qSmaller, mismatchP);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -118,6 +100,9 @@ public class DuplicateQualsWalker extends DuplicateWalker<List<DuplicateComp>, Q
|
||||||
@Argument(fullName="combinedQuals", shortName="combinedQuals", required=false, doc="Combine and assess pairwise base qualities")
|
@Argument(fullName="combinedQuals", shortName="combinedQuals", required=false, doc="Combine and assess pairwise base qualities")
|
||||||
public boolean COMBINE_QUALS = false;
|
public boolean COMBINE_QUALS = false;
|
||||||
|
|
||||||
|
@Argument(fullName="combineAllDups", shortName="combineAllDups", required=false, defaultValue="false", doc="Combine and assess pairwise base qualities")
|
||||||
|
public boolean COMBINE_ALL_DUPS;
|
||||||
|
|
||||||
final boolean DEBUG = false;
|
final boolean DEBUG = false;
|
||||||
final private boolean ACTUALLY_DO_WORK = true;
|
final private boolean ACTUALLY_DO_WORK = true;
|
||||||
|
|
||||||
|
|
@ -146,18 +131,17 @@ public class DuplicateQualsWalker extends DuplicateWalker<List<DuplicateComp>, Q
|
||||||
return pairwiseComps;
|
return pairwiseComps;
|
||||||
|
|
||||||
if ( COMBINE_QUALS ) {
|
if ( COMBINE_QUALS ) {
|
||||||
Pair<SAMRecord, SAMRecord> combinedReads = combinedReadPair( duplicateReads );
|
Pair<SAMRecord, SAMRecord> combinedReads = DupUtils.combinedReadPair( duplicateReads );
|
||||||
if ( combinedReads != null ) {
|
if ( combinedReads != null ) {
|
||||||
SAMRecord combined1 = combinedReads.first;
|
SAMRecord combined1 = combinedReads.first;
|
||||||
SAMRecord combined2 = combinedReads.second;
|
SAMRecord combined2 = combinedReads.second;
|
||||||
addPairwiseMatches( pairwiseComps, combined1, combined2 );
|
addPairwiseMatches( pairwiseComps, combined1, combined2 );
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
int nComparisons = 0;
|
int nComparisons = 0;
|
||||||
for ( SAMRecord read1 : duplicateReads ) {
|
for ( SAMRecord read1 : duplicateReads ) {
|
||||||
for ( SAMRecord read2 : duplicateReads ) {
|
for ( SAMRecord read2 : duplicateReads ) {
|
||||||
if ( usableDuplicate(read1, read2) ) {
|
if ( DupUtils.usableDuplicate(read1, read2) ) {
|
||||||
nComparisons++;
|
nComparisons++;
|
||||||
addPairwiseMatches( pairwiseComps, read1, read2 );
|
addPairwiseMatches( pairwiseComps, read1, read2 );
|
||||||
if ( nComparisons > MAX_PAIRSIZE_COMPS_PER_DUPLICATE_SET )
|
if ( nComparisons > MAX_PAIRSIZE_COMPS_PER_DUPLICATE_SET )
|
||||||
|
|
@ -170,10 +154,6 @@ public class DuplicateQualsWalker extends DuplicateWalker<List<DuplicateComp>, Q
|
||||||
return pairwiseComps;
|
return pairwiseComps;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean usableDuplicate( SAMRecord read1, SAMRecord read2 ) {
|
|
||||||
return read1 != read2 && read1.getReadLength() == read2.getReadLength();
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<DuplicateComp> addPairwiseMatches(List<DuplicateComp> comps,
|
private List<DuplicateComp> addPairwiseMatches(List<DuplicateComp> comps,
|
||||||
SAMRecord read1, SAMRecord read2 ) {
|
SAMRecord read1, SAMRecord read2 ) {
|
||||||
byte[] read1Bases = read1.getReadBases();
|
byte[] read1Bases = read1.getReadBases();
|
||||||
|
|
@ -191,94 +171,4 @@ public class DuplicateQualsWalker extends DuplicateWalker<List<DuplicateComp>, Q
|
||||||
|
|
||||||
return comps;
|
return comps;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Pair<SAMRecord, SAMRecord> combinedReadPair( List<SAMRecord> duplicateReads ) {
|
|
||||||
if ( duplicateReads.size() < 4 )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
SAMRecord c1 = combineDuplicates(duplicateReads.get(0),duplicateReads.get(1));
|
|
||||||
SAMRecord c2 = combineDuplicates(duplicateReads.get(2),duplicateReads.get(3));
|
|
||||||
return new Pair<SAMRecord, SAMRecord>(c1, c2);
|
|
||||||
}
|
|
||||||
|
|
||||||
private SAMRecord sample3rdRead( List<SAMRecord> duplicateReads, SAMRecord read1, SAMRecord read2 ) {
|
|
||||||
if ( duplicateReads.size() <= 2 ) {
|
|
||||||
// no third unique read is available
|
|
||||||
return null;
|
|
||||||
} else {
|
|
||||||
for ( SAMRecord read3 : duplicateReads ) {
|
|
||||||
if ( usableDuplicate(read1, read3) && usableDuplicate(read2, read3) )
|
|
||||||
return read3;
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private SAMRecord tmpCopyRead(SAMRecord read) {
|
|
||||||
SAMRecord copy = new SAMRecord(read.getHeader());
|
|
||||||
copy.setReadName(read.getReadName());
|
|
||||||
//copy.setReadString(final String value) {
|
|
||||||
copy.setReadBases(read.getReadBases());
|
|
||||||
copy.setBaseQualities(read.getBaseQualities());
|
|
||||||
copy.setReferenceName(read.getReferenceName());
|
|
||||||
copy.setReferenceIndex(read.getReferenceIndex());
|
|
||||||
copy.setMateReferenceName(read.getMateReferenceName());
|
|
||||||
copy.setMateReferenceIndex(read.getMateReferenceIndex());
|
|
||||||
copy.setAlignmentStart(read.getAlignmentStart());
|
|
||||||
//copy.setAlignmentEnd(read.getAlignmentEnd());
|
|
||||||
copy.setMateAlignmentStart(read.getMateAlignmentStart());
|
|
||||||
copy.setInferredInsertSize(read.getInferredInsertSize());
|
|
||||||
copy.setMappingQuality(read.getMappingQuality());
|
|
||||||
copy.setCigar(read.getCigar());
|
|
||||||
copy.setFlags(copy.getFlags());
|
|
||||||
|
|
||||||
return copy;
|
|
||||||
}
|
|
||||||
|
|
||||||
private SAMRecord combineDuplicates(SAMRecord read1, SAMRecord read2) {
|
|
||||||
byte[] read1Bases = read1.getReadBases();
|
|
||||||
byte[] read1Quals = read1.getBaseQualities();
|
|
||||||
byte[] read2Bases = read2.getReadBases();
|
|
||||||
byte[] read2Quals = read2.getBaseQualities();
|
|
||||||
|
|
||||||
byte[] bases = new byte[read1Bases.length];
|
|
||||||
byte[] quals = new byte[read1Bases.length];
|
|
||||||
|
|
||||||
SAMRecord c = tmpCopyRead(read1);
|
|
||||||
for ( int i = 0; i < read1Bases.length; i++) {
|
|
||||||
byte base1 = read1Bases[i];
|
|
||||||
byte base2 = read2Bases[i];
|
|
||||||
byte qual1 = read1Quals[i];
|
|
||||||
byte qual2 = read2Quals[i];
|
|
||||||
final double p1 = QualityUtils.qualToProb(qual1);
|
|
||||||
final double p2 = QualityUtils.qualToProb(qual2);
|
|
||||||
|
|
||||||
double pc;
|
|
||||||
byte basec;
|
|
||||||
|
|
||||||
if ( base1 == base2 ) {
|
|
||||||
// agreement
|
|
||||||
basec = base1;
|
|
||||||
pc = 1 - (1 - p1) * (1 - p2);
|
|
||||||
} else {
|
|
||||||
// disagreement
|
|
||||||
basec = p1 > p2 ? base1 : base2;
|
|
||||||
pc = p1 > p2 ? p1 : p2;
|
|
||||||
//pc = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
bases[i] = basec;
|
|
||||||
quals[i] = QualityUtils.probToQual(pc, 0.0);
|
|
||||||
|
|
||||||
if ( DEBUG )
|
|
||||||
logger.debug(String.format("Combining %s (Q%2d) with %s (Q%2d) -> %s (Q%2d)%s%n",
|
|
||||||
(char)base1, qual1, (char)base2, qual2, (char)bases[i], quals[i],
|
|
||||||
base1 == base2 ? "" : " [MISMATCH]"));
|
|
||||||
}
|
|
||||||
c.setReadBases(bases);
|
|
||||||
c.setBaseQualities(quals);
|
|
||||||
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue