Added - PairedQualityScoreCountsWalker: counts quality scores (e.g. as a histogram) on first reads of a pair and second reads of a pair. Turns out there's a consistent difference in quality scores; even after recalibrating without the pair ordering as a covariate (there's a bit of averaging -- but not as much as I initially thought).
Added - A paired read order covariate to use with recalibration. Currently experimental: for instance, what's a proper pair versus just a pair? Nobody should use this one... git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2401 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4f59bfd513
commit
7b5e332ff3
|
|
@ -0,0 +1,28 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: chartl
|
||||||
|
* Date: Dec 16, 2009
|
||||||
|
* Time: 3:22:19 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public class PairedReadOrderCovariate implements ExperimentalCovariate{
|
||||||
|
|
||||||
|
public void initialize (final RecalibrationArgumentCollection rac ) { /* do nothing */ }
|
||||||
|
|
||||||
|
public final Comparable getValue(final SAMRecord read, final int offset) {
|
||||||
|
return read.getReadPairedFlag() ? "Not_Paired" : read.getMateUnmappedFlag() ? "Mate_Unmapped" : read.getFirstOfPairFlag() ? "First_Read" : "Second_Read";
|
||||||
|
}
|
||||||
|
|
||||||
|
public final Comparable getValue( final String str ) {
|
||||||
|
return Integer.parseInt( str );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used to estimate the amount space required for the full data HashMap
|
||||||
|
public final int estimatedNumberOfBins() {
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,107 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This walker prints out quality score counts for first and second reads of a pair aggregated over all reads
|
||||||
|
* in the interval.
|
||||||
|
*
|
||||||
|
* @Author: Chris Hartl
|
||||||
|
*/
|
||||||
|
public class PairedQualityScoreCountsWalker extends ReadWalker<Pair<byte[],Boolean>,Pair<CycleQualCounts,CycleQualCounts>> {
|
||||||
|
@Argument(fullName="readLength", shortName="rl", doc="Length of reads in the bam file", required=true)
|
||||||
|
public int readLength = -1;
|
||||||
|
|
||||||
|
public void initialize() { return; }
|
||||||
|
|
||||||
|
public Pair<CycleQualCounts,CycleQualCounts> reduceInit() {
|
||||||
|
return new Pair<CycleQualCounts,CycleQualCounts>( new CycleQualCounts(readLength), new CycleQualCounts(readLength) );
|
||||||
|
}
|
||||||
|
|
||||||
|
public Pair<CycleQualCounts,CycleQualCounts> reduce( Pair<byte[],Boolean> mapCounts, Pair<CycleQualCounts,CycleQualCounts> reduceCounts ) {
|
||||||
|
if ( mapCounts != null ) {
|
||||||
|
if ( mapCounts.second ) {
|
||||||
|
reduceCounts.first.update(mapCounts.first);
|
||||||
|
} else {
|
||||||
|
reduceCounts.second.update(mapCounts.first);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return reduceCounts;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Pair<byte[],Boolean> map( char[] ref, SAMRecord read) {
|
||||||
|
if ( canUseRead(read) ) {
|
||||||
|
return getCorrectlyOrientedBaseQualities(read);
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean canUseRead(SAMRecord read) {
|
||||||
|
return ( ! read.getMateUnmappedFlag() && ! read.getReadUnmappedFlag() ) && ( read.getReadPairedFlag() && read.getReadLength() == readLength );
|
||||||
|
}
|
||||||
|
|
||||||
|
private Pair<byte[],Boolean> getCorrectlyOrientedBaseQualities(SAMRecord read) {
|
||||||
|
byte[] quals = read.getReadNegativeStrandFlag() ? BaseUtils.reverse(read.getBaseQualities()) : read.getBaseQualities();
|
||||||
|
return new Pair<byte[], Boolean>(quals, read.getFirstOfPairFlag());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void onTraversalDone(Pair<CycleQualCounts,CycleQualCounts> finalCounts) {
|
||||||
|
StringBuilder output = new StringBuilder();
|
||||||
|
output.append(String.format("%s\t%s\t%s%n","Cycle","First_read_counts","Second_read_counts"));
|
||||||
|
for ( int offset = 0; offset < readLength; offset++ ) {
|
||||||
|
output.append(String.format("%d\t%s\t%s%n",offset,finalCounts.first.getCountDistribution(offset),finalCounts.second.getCountDistribution(offset)));
|
||||||
|
}
|
||||||
|
out.printf("%s",output.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
class CycleQualCounts {
|
||||||
|
private long[][] qualityCountsByCycle;
|
||||||
|
private int cycleLength;
|
||||||
|
private int qualMax = QualityUtils.MAX_REASONABLE_Q_SCORE + 1;
|
||||||
|
|
||||||
|
public CycleQualCounts(int cycleLength) {
|
||||||
|
this.cycleLength = cycleLength;
|
||||||
|
qualityCountsByCycle = new long[cycleLength][qualMax];
|
||||||
|
for ( int cycle = 0; cycle < cycleLength; cycle++ ) {
|
||||||
|
for ( int qual = 0; qual < qualMax; qual++) {
|
||||||
|
qualityCountsByCycle[cycle][qual] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void update(int offset, byte quality) {
|
||||||
|
qualityCountsByCycle[offset][qualityToQualityIndex(quality)]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void update(byte[] qualArray) {
|
||||||
|
for ( int o = 0; o < cycleLength; o++ ) {
|
||||||
|
update(o,qualArray[o]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int qualityToQualityIndex(byte qual) {
|
||||||
|
return qual < 0 ? 0 : qual > qualMax ? qualMax : qual;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long[][] getCounts() { return qualityCountsByCycle; }
|
||||||
|
|
||||||
|
public String getCountDistribution(int offset) {
|
||||||
|
StringBuilder b = new StringBuilder();
|
||||||
|
for ( int qual = 0; qual < qualMax-1; qual++ ) {
|
||||||
|
b.append(String.format("%d;",qualityCountsByCycle[offset][qual]));
|
||||||
|
}
|
||||||
|
b.append(String.format("%d",qualityCountsByCycle[offset][qualMax-1]));
|
||||||
|
|
||||||
|
return b.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue