Added - PairedQualityScoreCountsWalker: counts quality scores (e.g. as a histogram) on first reads of a pair and second reads of a pair. Turns out there's a consistent difference in quality scores; even after recalibrating without the pair ordering as a covariate (there's a bit of averaging -- but not as much as I initially thought).
Added - A paired read order covariate to use with recalibration. Currently experimental: for instance, what's a proper pair versus just a pair? Nobody should use this one... git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2401 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4f59bfd513
commit
7b5e332ff3
|
|
@ -0,0 +1,28 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Dec 16, 2009
|
||||
* Time: 3:22:19 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class PairedReadOrderCovariate implements ExperimentalCovariate{
|
||||
|
||||
public void initialize (final RecalibrationArgumentCollection rac ) { /* do nothing */ }
|
||||
|
||||
public final Comparable getValue(final SAMRecord read, final int offset) {
|
||||
return read.getReadPairedFlag() ? "Not_Paired" : read.getMateUnmappedFlag() ? "Mate_Unmapped" : read.getFirstOfPairFlag() ? "First_Read" : "Second_Read";
|
||||
}
|
||||
|
||||
public final Comparable getValue( final String str ) {
|
||||
return Integer.parseInt( str );
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,107 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* This walker prints out quality score counts for first and second reads of a pair aggregated over all reads
|
||||
* in the interval.
|
||||
*
|
||||
* @Author: Chris Hartl
|
||||
*/
|
||||
public class PairedQualityScoreCountsWalker extends ReadWalker<Pair<byte[],Boolean>,Pair<CycleQualCounts,CycleQualCounts>> {
|
||||
@Argument(fullName="readLength", shortName="rl", doc="Length of reads in the bam file", required=true)
|
||||
public int readLength = -1;
|
||||
|
||||
public void initialize() { return; }
|
||||
|
||||
public Pair<CycleQualCounts,CycleQualCounts> reduceInit() {
|
||||
return new Pair<CycleQualCounts,CycleQualCounts>( new CycleQualCounts(readLength), new CycleQualCounts(readLength) );
|
||||
}
|
||||
|
||||
public Pair<CycleQualCounts,CycleQualCounts> reduce( Pair<byte[],Boolean> mapCounts, Pair<CycleQualCounts,CycleQualCounts> reduceCounts ) {
|
||||
if ( mapCounts != null ) {
|
||||
if ( mapCounts.second ) {
|
||||
reduceCounts.first.update(mapCounts.first);
|
||||
} else {
|
||||
reduceCounts.second.update(mapCounts.first);
|
||||
}
|
||||
}
|
||||
|
||||
return reduceCounts;
|
||||
}
|
||||
|
||||
public Pair<byte[],Boolean> map( char[] ref, SAMRecord read) {
|
||||
if ( canUseRead(read) ) {
|
||||
return getCorrectlyOrientedBaseQualities(read);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private boolean canUseRead(SAMRecord read) {
|
||||
return ( ! read.getMateUnmappedFlag() && ! read.getReadUnmappedFlag() ) && ( read.getReadPairedFlag() && read.getReadLength() == readLength );
|
||||
}
|
||||
|
||||
private Pair<byte[],Boolean> getCorrectlyOrientedBaseQualities(SAMRecord read) {
|
||||
byte[] quals = read.getReadNegativeStrandFlag() ? BaseUtils.reverse(read.getBaseQualities()) : read.getBaseQualities();
|
||||
return new Pair<byte[], Boolean>(quals, read.getFirstOfPairFlag());
|
||||
}
|
||||
|
||||
public void onTraversalDone(Pair<CycleQualCounts,CycleQualCounts> finalCounts) {
|
||||
StringBuilder output = new StringBuilder();
|
||||
output.append(String.format("%s\t%s\t%s%n","Cycle","First_read_counts","Second_read_counts"));
|
||||
for ( int offset = 0; offset < readLength; offset++ ) {
|
||||
output.append(String.format("%d\t%s\t%s%n",offset,finalCounts.first.getCountDistribution(offset),finalCounts.second.getCountDistribution(offset)));
|
||||
}
|
||||
out.printf("%s",output.toString());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class CycleQualCounts {
|
||||
private long[][] qualityCountsByCycle;
|
||||
private int cycleLength;
|
||||
private int qualMax = QualityUtils.MAX_REASONABLE_Q_SCORE + 1;
|
||||
|
||||
public CycleQualCounts(int cycleLength) {
|
||||
this.cycleLength = cycleLength;
|
||||
qualityCountsByCycle = new long[cycleLength][qualMax];
|
||||
for ( int cycle = 0; cycle < cycleLength; cycle++ ) {
|
||||
for ( int qual = 0; qual < qualMax; qual++) {
|
||||
qualityCountsByCycle[cycle][qual] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void update(int offset, byte quality) {
|
||||
qualityCountsByCycle[offset][qualityToQualityIndex(quality)]++;
|
||||
}
|
||||
|
||||
|
||||
public void update(byte[] qualArray) {
|
||||
for ( int o = 0; o < cycleLength; o++ ) {
|
||||
update(o,qualArray[o]);
|
||||
}
|
||||
}
|
||||
|
||||
private int qualityToQualityIndex(byte qual) {
|
||||
return qual < 0 ? 0 : qual > qualMax ? qualMax : qual;
|
||||
}
|
||||
|
||||
public long[][] getCounts() { return qualityCountsByCycle; }
|
||||
|
||||
public String getCountDistribution(int offset) {
|
||||
StringBuilder b = new StringBuilder();
|
||||
for ( int qual = 0; qual < qualMax-1; qual++ ) {
|
||||
b.append(String.format("%d;",qualityCountsByCycle[offset][qual]));
|
||||
}
|
||||
b.append(String.format("%d",qualityCountsByCycle[offset][qualMax-1]));
|
||||
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue