More information from the duplicate combiner quality metrics

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@590 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-05-04 21:51:01 +00:00
parent 2b6466ea00
commit 8925df2e1e
1 changed files with 139 additions and 26 deletions

View File

@ -7,6 +7,8 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.util.List;
@ -48,7 +50,7 @@ class MismatchCounter {
}
class QualityTracker {
final private int MAX_QUAL_SCORE = 50;
final private int MAX_QUAL_SCORE = 100;
MismatchCounter[][] mismatchesByQ = new MismatchCounter[MAX_QUAL_SCORE][MAX_QUAL_SCORE];
public QualityTracker() {
@ -113,6 +115,10 @@ public class DuplicateQualsWalker extends DuplicateWalker<List<DuplicateComp>, Q
@Argument(fullName="maxPairwiseCompsPerDupSet", shortName="maxPairwiseCompsPerDupSet", required=false, defaultValue="100", doc="Maximumize number of pairwise comparisons to perform among duplicate read sets")
public int MAX_PAIRSIZE_COMPS_PER_DUPLICATE_SET;
@Argument(fullName="combinedQuals", shortName="combinedQuals", required=false, defaultValue="false", doc="Combine and assess pairwise base qualities")
public boolean COMBINE_QUALS;
final boolean DEBUG = false;
final private boolean ACTUALLY_DO_WORK = true;
public void onTraversalDone(QualityTracker result) {
@ -134,38 +140,145 @@ public class DuplicateQualsWalker extends DuplicateWalker<List<DuplicateComp>, Q
// Print out data for regression
public List<DuplicateComp> map(GenomeLoc loc, byte[] refBases, LocusContext context, List<SAMRecord> duplicateReads) {
//out.printf("%s has %d duplicates%n", loc, duplicateReads.size());
List<DuplicateComp> comps = new ArrayList<DuplicateComp>();
List<DuplicateComp> pairwiseComps = new ArrayList<DuplicateComp>();
if ( ! ACTUALLY_DO_WORK )
return comps;
return pairwiseComps;
int nComparisons = 0;
for ( SAMRecord read1 : duplicateReads ) {
byte[] read1Bases = read1.getReadBases();
byte[] read1Quals = read1.getBaseQualities();
for ( SAMRecord read2 : duplicateReads ) {
if ( read1 != read2 && read1.getReadLength() == read2.getReadLength()) {
byte[] read2Bases = read2.getReadBases();
byte[] read2Quals = read2.getBaseQualities();
nComparisons++;
for ( int i = 0; i < read1Bases.length; i++) {
byte read1Q = read1Quals[i];
byte read2Q = read2Quals[i];
boolean mismatchP = read1Bases[i] != read2Bases[i];
DuplicateComp dc = new DuplicateComp(read1Q, read2Q, mismatchP);
//logger.debug(String.format("dc: %s", dc));
comps.add(dc);
if ( COMBINE_QUALS ) {
Pair<SAMRecord, SAMRecord> combinedReads = combinedReadPair( duplicateReads );
if ( combinedReads != null ) {
SAMRecord combined1 = combinedReads.first;
SAMRecord combined2 = combinedReads.second;
addPairwiseMatches( pairwiseComps, combined1, combined2 );
}
}
else {
int nComparisons = 0;
for ( SAMRecord read1 : duplicateReads ) {
for ( SAMRecord read2 : duplicateReads ) {
if ( usableDuplicate(read1, read2) ) {
nComparisons++;
addPairwiseMatches( pairwiseComps, read1, read2 );
if ( nComparisons > MAX_PAIRSIZE_COMPS_PER_DUPLICATE_SET )
break;
}
if ( nComparisons > MAX_PAIRSIZE_COMPS_PER_DUPLICATE_SET )
break;
}
}
}
return pairwiseComps;
}
private boolean usableDuplicate( SAMRecord read1, SAMRecord read2 ) {
return read1 != read2 && read1.getReadLength() == read2.getReadLength();
}
private List<DuplicateComp> addPairwiseMatches(List<DuplicateComp> comps,
SAMRecord read1, SAMRecord read2 ) {
byte[] read1Bases = read1.getReadBases();
byte[] read1Quals = read1.getBaseQualities();
byte[] read2Bases = read2.getReadBases();
byte[] read2Quals = read2.getBaseQualities();
for ( int i = 0; i < read1Bases.length; i++) {
byte qual1 = read1Quals[i];
byte qual2 = read2Quals[i];
boolean mismatchP = read1Bases[i] != read2Bases[i];
DuplicateComp dc = new DuplicateComp(qual1, qual2, mismatchP);
comps.add(dc);
}
return comps;
}
private Pair<SAMRecord, SAMRecord> combinedReadPair( List<SAMRecord> duplicateReads ) {
if ( duplicateReads.size() < 4 )
return null;
SAMRecord c1 = combineDuplicates(duplicateReads.get(0),duplicateReads.get(1));
SAMRecord c2 = combineDuplicates(duplicateReads.get(2),duplicateReads.get(3));
return new Pair<SAMRecord, SAMRecord>(c1, c2);
}
private SAMRecord sample3rdRead( List<SAMRecord> duplicateReads, SAMRecord read1, SAMRecord read2 ) {
if ( duplicateReads.size() <= 2 ) {
// no third unique read is available
return null;
} else {
for ( SAMRecord read3 : duplicateReads ) {
if ( usableDuplicate(read1, read3) && usableDuplicate(read2, read3) )
return read3;
}
return null;
}
}
private SAMRecord tmpCopyRead(SAMRecord read) {
SAMRecord copy = new SAMRecord(read.getHeader());
copy.setReadName(read.getReadName());
//copy.setReadString(final String value) {
copy.setReadBases(read.getReadBases());
copy.setBaseQualities(read.getBaseQualities());
copy.setReferenceName(read.getReferenceName());
copy.setReferenceIndex(read.getReferenceIndex());
copy.setMateReferenceName(read.getMateReferenceName());
copy.setMateReferenceIndex(read.getMateReferenceIndex());
copy.setAlignmentStart(read.getAlignmentStart());
//copy.setAlignmentEnd(read.getAlignmentEnd());
copy.setMateAlignmentStart(read.getMateAlignmentStart());
copy.setInferredInsertSize(read.getInferredInsertSize());
copy.setMappingQuality(read.getMappingQuality());
copy.setCigar(read.getCigar());
copy.setFlags(copy.getFlags());
return copy;
}
private SAMRecord combineDuplicates(SAMRecord read1, SAMRecord read2) {
byte[] read1Bases = read1.getReadBases();
byte[] read1Quals = read1.getBaseQualities();
byte[] read2Bases = read2.getReadBases();
byte[] read2Quals = read2.getBaseQualities();
byte[] bases = new byte[read1Bases.length];
byte[] quals = new byte[read1Bases.length];
SAMRecord c = tmpCopyRead(read1);
for ( int i = 0; i < read1Bases.length; i++) {
byte base1 = read1Bases[i];
byte base2 = read2Bases[i];
byte qual1 = read1Quals[i];
byte qual2 = read2Quals[i];
final double p1 = QualityUtils.qualToProb(qual1);
final double p2 = QualityUtils.qualToProb(qual2);
double pc;
byte basec;
if ( base1 == base2 ) {
// agreement
basec = base1;
pc = 1 - (1 - p1) * (1 - p2);
} else {
// disagreement
basec = p1 > p2 ? base1 : base2;
pc = p1 > p2 ? p1 : p2;
//pc = 0;
}
bases[i] = basec;
quals[i] = QualityUtils.probToQual(pc, 0.0);
if ( DEBUG )
logger.debug(String.format("Combining %s (Q%2d) with %s (Q%2d) -> %s (Q%2d)%s%n",
(char)base1, qual1, (char)base2, qual2, (char)bases[i], quals[i],
base1 == base2 ? "" : " [MISMATCH]"));
}
c.setReadBases(bases);
c.setBaseQualities(quals);
return c;
}
}