More information from the duplicate combiner quality metrics
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@590 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
2b6466ea00
commit
8925df2e1e
|
|
@ -7,6 +7,8 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
|
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -48,7 +50,7 @@ class MismatchCounter {
|
||||||
}
|
}
|
||||||
|
|
||||||
class QualityTracker {
|
class QualityTracker {
|
||||||
final private int MAX_QUAL_SCORE = 50;
|
final private int MAX_QUAL_SCORE = 100;
|
||||||
MismatchCounter[][] mismatchesByQ = new MismatchCounter[MAX_QUAL_SCORE][MAX_QUAL_SCORE];
|
MismatchCounter[][] mismatchesByQ = new MismatchCounter[MAX_QUAL_SCORE][MAX_QUAL_SCORE];
|
||||||
|
|
||||||
public QualityTracker() {
|
public QualityTracker() {
|
||||||
|
|
@ -113,6 +115,10 @@ public class DuplicateQualsWalker extends DuplicateWalker<List<DuplicateComp>, Q
|
||||||
@Argument(fullName="maxPairwiseCompsPerDupSet", shortName="maxPairwiseCompsPerDupSet", required=false, defaultValue="100", doc="Maximumize number of pairwise comparisons to perform among duplicate read sets")
|
@Argument(fullName="maxPairwiseCompsPerDupSet", shortName="maxPairwiseCompsPerDupSet", required=false, defaultValue="100", doc="Maximumize number of pairwise comparisons to perform among duplicate read sets")
|
||||||
public int MAX_PAIRSIZE_COMPS_PER_DUPLICATE_SET;
|
public int MAX_PAIRSIZE_COMPS_PER_DUPLICATE_SET;
|
||||||
|
|
||||||
|
@Argument(fullName="combinedQuals", shortName="combinedQuals", required=false, defaultValue="false", doc="Combine and assess pairwise base qualities")
|
||||||
|
public boolean COMBINE_QUALS;
|
||||||
|
|
||||||
|
final boolean DEBUG = false;
|
||||||
final private boolean ACTUALLY_DO_WORK = true;
|
final private boolean ACTUALLY_DO_WORK = true;
|
||||||
|
|
||||||
public void onTraversalDone(QualityTracker result) {
|
public void onTraversalDone(QualityTracker result) {
|
||||||
|
|
@ -134,38 +140,145 @@ public class DuplicateQualsWalker extends DuplicateWalker<List<DuplicateComp>, Q
|
||||||
// Print out data for regression
|
// Print out data for regression
|
||||||
public List<DuplicateComp> map(GenomeLoc loc, byte[] refBases, LocusContext context, List<SAMRecord> duplicateReads) {
|
public List<DuplicateComp> map(GenomeLoc loc, byte[] refBases, LocusContext context, List<SAMRecord> duplicateReads) {
|
||||||
//out.printf("%s has %d duplicates%n", loc, duplicateReads.size());
|
//out.printf("%s has %d duplicates%n", loc, duplicateReads.size());
|
||||||
List<DuplicateComp> comps = new ArrayList<DuplicateComp>();
|
List<DuplicateComp> pairwiseComps = new ArrayList<DuplicateComp>();
|
||||||
|
|
||||||
if ( ! ACTUALLY_DO_WORK )
|
if ( ! ACTUALLY_DO_WORK )
|
||||||
return comps;
|
return pairwiseComps;
|
||||||
|
|
||||||
int nComparisons = 0;
|
if ( COMBINE_QUALS ) {
|
||||||
for ( SAMRecord read1 : duplicateReads ) {
|
Pair<SAMRecord, SAMRecord> combinedReads = combinedReadPair( duplicateReads );
|
||||||
byte[] read1Bases = read1.getReadBases();
|
if ( combinedReads != null ) {
|
||||||
byte[] read1Quals = read1.getBaseQualities();
|
SAMRecord combined1 = combinedReads.first;
|
||||||
|
SAMRecord combined2 = combinedReads.second;
|
||||||
for ( SAMRecord read2 : duplicateReads ) {
|
addPairwiseMatches( pairwiseComps, combined1, combined2 );
|
||||||
if ( read1 != read2 && read1.getReadLength() == read2.getReadLength()) {
|
}
|
||||||
byte[] read2Bases = read2.getReadBases();
|
}
|
||||||
byte[] read2Quals = read2.getBaseQualities();
|
else {
|
||||||
nComparisons++;
|
int nComparisons = 0;
|
||||||
|
for ( SAMRecord read1 : duplicateReads ) {
|
||||||
for ( int i = 0; i < read1Bases.length; i++) {
|
for ( SAMRecord read2 : duplicateReads ) {
|
||||||
byte read1Q = read1Quals[i];
|
if ( usableDuplicate(read1, read2) ) {
|
||||||
byte read2Q = read2Quals[i];
|
nComparisons++;
|
||||||
boolean mismatchP = read1Bases[i] != read2Bases[i];
|
addPairwiseMatches( pairwiseComps, read1, read2 );
|
||||||
DuplicateComp dc = new DuplicateComp(read1Q, read2Q, mismatchP);
|
if ( nComparisons > MAX_PAIRSIZE_COMPS_PER_DUPLICATE_SET )
|
||||||
|
break;
|
||||||
//logger.debug(String.format("dc: %s", dc));
|
|
||||||
comps.add(dc);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( nComparisons > MAX_PAIRSIZE_COMPS_PER_DUPLICATE_SET )
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return pairwiseComps;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean usableDuplicate( SAMRecord read1, SAMRecord read2 ) {
|
||||||
|
return read1 != read2 && read1.getReadLength() == read2.getReadLength();
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<DuplicateComp> addPairwiseMatches(List<DuplicateComp> comps,
|
||||||
|
SAMRecord read1, SAMRecord read2 ) {
|
||||||
|
byte[] read1Bases = read1.getReadBases();
|
||||||
|
byte[] read1Quals = read1.getBaseQualities();
|
||||||
|
byte[] read2Bases = read2.getReadBases();
|
||||||
|
byte[] read2Quals = read2.getBaseQualities();
|
||||||
|
|
||||||
|
for ( int i = 0; i < read1Bases.length; i++) {
|
||||||
|
byte qual1 = read1Quals[i];
|
||||||
|
byte qual2 = read2Quals[i];
|
||||||
|
boolean mismatchP = read1Bases[i] != read2Bases[i];
|
||||||
|
DuplicateComp dc = new DuplicateComp(qual1, qual2, mismatchP);
|
||||||
|
comps.add(dc);
|
||||||
|
}
|
||||||
|
|
||||||
return comps;
|
return comps;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Pair<SAMRecord, SAMRecord> combinedReadPair( List<SAMRecord> duplicateReads ) {
|
||||||
|
if ( duplicateReads.size() < 4 )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
SAMRecord c1 = combineDuplicates(duplicateReads.get(0),duplicateReads.get(1));
|
||||||
|
SAMRecord c2 = combineDuplicates(duplicateReads.get(2),duplicateReads.get(3));
|
||||||
|
return new Pair<SAMRecord, SAMRecord>(c1, c2);
|
||||||
|
}
|
||||||
|
|
||||||
|
private SAMRecord sample3rdRead( List<SAMRecord> duplicateReads, SAMRecord read1, SAMRecord read2 ) {
|
||||||
|
if ( duplicateReads.size() <= 2 ) {
|
||||||
|
// no third unique read is available
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
for ( SAMRecord read3 : duplicateReads ) {
|
||||||
|
if ( usableDuplicate(read1, read3) && usableDuplicate(read2, read3) )
|
||||||
|
return read3;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private SAMRecord tmpCopyRead(SAMRecord read) {
|
||||||
|
SAMRecord copy = new SAMRecord(read.getHeader());
|
||||||
|
copy.setReadName(read.getReadName());
|
||||||
|
//copy.setReadString(final String value) {
|
||||||
|
copy.setReadBases(read.getReadBases());
|
||||||
|
copy.setBaseQualities(read.getBaseQualities());
|
||||||
|
copy.setReferenceName(read.getReferenceName());
|
||||||
|
copy.setReferenceIndex(read.getReferenceIndex());
|
||||||
|
copy.setMateReferenceName(read.getMateReferenceName());
|
||||||
|
copy.setMateReferenceIndex(read.getMateReferenceIndex());
|
||||||
|
copy.setAlignmentStart(read.getAlignmentStart());
|
||||||
|
//copy.setAlignmentEnd(read.getAlignmentEnd());
|
||||||
|
copy.setMateAlignmentStart(read.getMateAlignmentStart());
|
||||||
|
copy.setInferredInsertSize(read.getInferredInsertSize());
|
||||||
|
copy.setMappingQuality(read.getMappingQuality());
|
||||||
|
copy.setCigar(read.getCigar());
|
||||||
|
copy.setFlags(copy.getFlags());
|
||||||
|
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
private SAMRecord combineDuplicates(SAMRecord read1, SAMRecord read2) {
|
||||||
|
byte[] read1Bases = read1.getReadBases();
|
||||||
|
byte[] read1Quals = read1.getBaseQualities();
|
||||||
|
byte[] read2Bases = read2.getReadBases();
|
||||||
|
byte[] read2Quals = read2.getBaseQualities();
|
||||||
|
|
||||||
|
byte[] bases = new byte[read1Bases.length];
|
||||||
|
byte[] quals = new byte[read1Bases.length];
|
||||||
|
|
||||||
|
SAMRecord c = tmpCopyRead(read1);
|
||||||
|
for ( int i = 0; i < read1Bases.length; i++) {
|
||||||
|
byte base1 = read1Bases[i];
|
||||||
|
byte base2 = read2Bases[i];
|
||||||
|
byte qual1 = read1Quals[i];
|
||||||
|
byte qual2 = read2Quals[i];
|
||||||
|
final double p1 = QualityUtils.qualToProb(qual1);
|
||||||
|
final double p2 = QualityUtils.qualToProb(qual2);
|
||||||
|
|
||||||
|
double pc;
|
||||||
|
byte basec;
|
||||||
|
|
||||||
|
if ( base1 == base2 ) {
|
||||||
|
// agreement
|
||||||
|
basec = base1;
|
||||||
|
pc = 1 - (1 - p1) * (1 - p2);
|
||||||
|
} else {
|
||||||
|
// disagreement
|
||||||
|
basec = p1 > p2 ? base1 : base2;
|
||||||
|
pc = p1 > p2 ? p1 : p2;
|
||||||
|
//pc = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bases[i] = basec;
|
||||||
|
quals[i] = QualityUtils.probToQual(pc, 0.0);
|
||||||
|
|
||||||
|
if ( DEBUG )
|
||||||
|
logger.debug(String.format("Combining %s (Q%2d) with %s (Q%2d) -> %s (Q%2d)%s%n",
|
||||||
|
(char)base1, qual1, (char)base2, qual2, (char)bases[i], quals[i],
|
||||||
|
base1 == base2 ? "" : " [MISMATCH]"));
|
||||||
|
}
|
||||||
|
c.setReadBases(bases);
|
||||||
|
c.setBaseQualities(quals);
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue