CountCovariates now uses any rod of type VariationRod with the name dbsnp as the source of known variant sites to skip over. It also grabs the platform string out of the read group when deciding which algorithm to use to calculate machine cycle. In this way it can now handle multi-platform bams. I added a new covariate: PositionCovariate. This is simply the offset regardless of which platform the read came from. This will be useful for comparing between the two covariates. Finally, this message serves as a warning that I will be killing the old recalibrator tomorrow after I've updated and verified new integration tests.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2077 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f667bed7fc
commit
0fbd81766b
|
|
@ -38,7 +38,7 @@ import net.sf.samtools.SAMRecord;
|
|||
*/
|
||||
|
||||
public interface Covariate {
|
||||
public Comparable getValue(SAMRecord read, int offset, String readGroup, byte[] quals, byte[] bases); // used to pick out the value from attributes of the read
|
||||
public Comparable getValue(SAMRecord read, int offset, String readGroup, String platform, byte[] quals, byte[] bases); // used to pick out the value from attributes of the read
|
||||
public Comparable getValue(String str); // used to get value from input file
|
||||
public int estimatedNumberOfBins(); // used to estimate the amount space required for the HashMap
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,11 +3,15 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
@ -70,8 +74,6 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
private String[] COVARIATES = null;
|
||||
@Argument(fullName = "use_original_quals", shortName="OQ", doc="If provided, we will use use the quals from the original qualities OQ attribute field instead of the quals in the regular QUALS field", required=false)
|
||||
private boolean USE_ORIGINAL_QUALS = false;
|
||||
@Argument(fullName = "platform", shortName="pl", doc="Which sequencing technology was used? This is important for the cycle covariate. Options are SLX, 454, and SOLID.", required=false)
|
||||
private String PLATFORM = "SLX";
|
||||
@Argument(fullName = "window_size_nqs", shortName="nqs", doc="How big of a window should the MinimumNQSCovariate use for its calculation", required=false)
|
||||
private int WINDOW_SIZE = 3;
|
||||
@Argument(fullName="recal_file", shortName="recalFile", required=false, doc="Filename for the outputted covariates table recalibration file")
|
||||
|
|
@ -114,10 +116,17 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
}
|
||||
|
||||
// Warn the user if no dbSNP file was specified
|
||||
if( this.getToolkit().getArguments().DBSNPFile == null ) {
|
||||
boolean foundDBSNP = false;
|
||||
for( ReferenceOrderedDataSource rod : this.getToolkit().getRodDataSources() ) {
|
||||
if( rod.getName().equalsIgnoreCase( "dbsnp" ) ) {
|
||||
foundDBSNP = true;
|
||||
}
|
||||
}
|
||||
if( !foundDBSNP ) {
|
||||
Utils.warnUser("This calculation is critically dependent on being able to skip over known variant sites. Are you sure you want to be running without a dbSNP rod specified?");
|
||||
}
|
||||
|
||||
|
||||
// Initialize the requested covariates by parsing the -cov argument
|
||||
requestedCovariates = new ArrayList<Covariate>();
|
||||
int estimatedCapacity = 1; // capacity is multiplicitive so this starts at one
|
||||
|
|
@ -130,9 +139,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
Covariate covariate = (Covariate)covClass.newInstance();
|
||||
estimatedCapacity *= covariate.estimatedNumberOfBins();
|
||||
// Some covariates need parameters (user supplied command line arguments) passed to them
|
||||
if( covariate instanceof CycleCovariate ) { covariate = new CycleCovariate( PLATFORM ); }
|
||||
else if( covariate instanceof PrimerRoundCovariate ) { covariate = new PrimerRoundCovariate( PLATFORM ); }
|
||||
else if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); }
|
||||
if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); }
|
||||
if( !( covariate instanceof ReadGroupCovariate || covariate instanceof QualityScoreCovariate ) ) { // these were already added so don't add them again
|
||||
requestedCovariates.add( covariate );
|
||||
}
|
||||
|
|
@ -160,9 +167,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
Covariate covariate = (Covariate)covClass.newInstance();
|
||||
estimatedCapacity *= covariate.estimatedNumberOfBins();
|
||||
// Some covariates need parameters (user supplied command line arguments) passed to them
|
||||
if( covariate instanceof CycleCovariate ) { covariate = new CycleCovariate( PLATFORM ); }
|
||||
else if( covariate instanceof PrimerRoundCovariate ) { covariate = new PrimerRoundCovariate( PLATFORM ); }
|
||||
else if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); }
|
||||
if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); }
|
||||
requestedCovariates.add( covariate );
|
||||
} catch ( InstantiationException e ) {
|
||||
throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) );
|
||||
|
|
@ -209,11 +214,21 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
*/
|
||||
public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
|
||||
final rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP( tracker.getTrackData("dbsnp", null) );
|
||||
// pull out anything passed by -B name,type,file that has the name "dbsnp"
|
||||
final RODRecordList<ReferenceOrderedDatum> dbsnpRODs = tracker.getTrackData( "dbsnp", null );
|
||||
boolean isSNP = false;
|
||||
if (dbsnpRODs != null) {
|
||||
for( ReferenceOrderedDatum rod : dbsnpRODs ) {
|
||||
if( ((Variation)rod).isSNP() ) {
|
||||
isSNP = true; // at least one of the rods says this is a snp site
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Only use data from non-dbsnp sites
|
||||
// Assume every mismatch at a non-dbsnp site is indicitive of poor quality
|
||||
if( dbsnp == null ) {
|
||||
if( !isSNP ) {
|
||||
final List<SAMRecord> reads = context.getReads();
|
||||
final List<Integer> offsets = context.getOffsets();
|
||||
SAMRecord read;
|
||||
|
|
@ -223,6 +238,8 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
byte[] bases;
|
||||
byte refBase;
|
||||
byte prevBase;
|
||||
String platform;
|
||||
byte[] colorSpaceQuals;
|
||||
|
||||
final int numReads = reads.size();
|
||||
// For each read at this locus
|
||||
|
|
@ -251,30 +268,33 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
}
|
||||
}
|
||||
|
||||
// SOLID bams insert the reference base into the read if the color space quality is zero, so skip over them
|
||||
byte[] colorSpaceQuals = null;
|
||||
if( PLATFORM.equalsIgnoreCase("SOLID") ) {
|
||||
colorSpaceQuals = (byte[])read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG);
|
||||
}
|
||||
if( colorSpaceQuals == null || colorSpaceQuals[offset] > 0 ) //BUGBUG: This isn't exactly correct yet
|
||||
{
|
||||
// skip if base quality is zero
|
||||
if( quals[offset] > 0 ) {
|
||||
bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'
|
||||
refBase = (byte)ref.getBase();
|
||||
prevBase = bases[offset-1];
|
||||
// skip if base quality is zero
|
||||
if( quals[offset] > 0 ) {
|
||||
bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A'
|
||||
refBase = (byte)ref.getBase();
|
||||
prevBase = bases[offset-1];
|
||||
|
||||
// Get the complement base strand if we are a negative strand read
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
bases = BaseUtils.simpleComplement( bases ); // this is an expensive call
|
||||
refBase = (byte)BaseUtils.simpleComplement( ref.getBase() );
|
||||
prevBase = bases[offset+1];
|
||||
// Get the complement base strand if we are a negative strand read
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
bases = BaseUtils.simpleComplement( bases ); // this is an expensive call
|
||||
refBase = (byte)BaseUtils.simpleComplement( ref.getBase() );
|
||||
prevBase = bases[offset+1];
|
||||
}
|
||||
|
||||
// skip if this base or the previous one was an 'N' or etc.
|
||||
if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)bases[offset] ) ) {
|
||||
|
||||
readGroup = read.getReadGroup().getReadGroupId(); // this is an expensive call
|
||||
platform = read.getReadGroup().getPlatform(); // this is an expensive call
|
||||
|
||||
// SOLID bams insert the reference base into the read if the color space quality is zero, so skip over them
|
||||
colorSpaceQuals = null;
|
||||
if( platform.equalsIgnoreCase("SOLID") ) {
|
||||
colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG));
|
||||
}
|
||||
|
||||
// skip if this base or the previous one was an 'N' or etc.
|
||||
if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)bases[offset] ) ) {
|
||||
readGroup = read.getReadGroup().getReadGroupId(); // this is an expensive call
|
||||
updateDataFromRead( read, offset, readGroup, quals, bases, refBase );
|
||||
if( colorSpaceQuals == null || colorSpaceQuals[offset] > 0 ) //BUGBUG: This isn't exactly correct yet
|
||||
{
|
||||
updateDataFromRead( read, offset, readGroup, platform, quals, bases, refBase );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -303,13 +323,14 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
* @param bases The bases which make up the read
|
||||
* @param refBase The reference base at this locus
|
||||
*/
|
||||
private void updateDataFromRead(final SAMRecord read, final int offset, final String readGroup, final byte[] quals, final byte[] bases, final byte refBase) {
|
||||
private void updateDataFromRead(final SAMRecord read, final int offset, final String readGroup, final String platform,
|
||||
final byte[] quals, final byte[] bases, final byte refBase) {
|
||||
|
||||
List<Comparable> key = new ArrayList<Comparable>();
|
||||
|
||||
// Loop through the list of requested covariates and pick out the value from the read, offset, and reference
|
||||
for( Covariate covariate : requestedCovariates ) {
|
||||
key.add( covariate.getValue( read, offset, readGroup, quals, bases ) );
|
||||
key.add( covariate.getValue( read, offset, readGroup, platform, quals, bases ) );
|
||||
}
|
||||
|
||||
// Using the list of covariate values as a key, pick out the RecalDatum from the data HashMap
|
||||
|
|
|
|||
|
|
@ -44,25 +44,18 @@ import net.sf.samtools.SAMRecord;
|
|||
|
||||
public class CycleCovariate implements Covariate {
|
||||
|
||||
private String platform;
|
||||
|
||||
public CycleCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
platform = "SLX"; // Solexa is the default
|
||||
}
|
||||
|
||||
public CycleCovariate(final String _platform) {
|
||||
platform = _platform;
|
||||
}
|
||||
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup,
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
|
||||
final byte[] quals, final byte[] bases) {
|
||||
if( platform.equalsIgnoreCase( "SLX" ) ) {
|
||||
int cycle = offset;
|
||||
if( platform.equalsIgnoreCase( "ILLUMINA" ) ) {
|
||||
int cycle = offset;
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
cycle = bases.length - (offset + 1);
|
||||
}
|
||||
return cycle;
|
||||
} else if( platform.equalsIgnoreCase( "454" ) ) {
|
||||
} else if( platform.contains( "454" ) ) { // some bams have "LS454" and others have just "454"
|
||||
int cycle = 0;
|
||||
byte prevBase = bases[0];
|
||||
for( int iii = 1; iii <= offset; iii++ ) {
|
||||
|
|
@ -76,7 +69,7 @@ public class CycleCovariate implements Covariate {
|
|||
// the ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
|
||||
return offset / 5; // integer division
|
||||
} else {
|
||||
throw new StingException( "Requested platform (" + platform + ") not supported in CycleCovariate." );
|
||||
throw new StingException( "Platform in read (" + platform + ") is not supported in CycleCovariate. Read = " + read );
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ public class DinucCovariate implements Covariate {
|
|||
}
|
||||
}
|
||||
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup,
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
|
||||
final byte[] quals, final byte[] bases) {
|
||||
|
||||
byte base = bases[offset];
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ public class MappingQualityCovariate implements Covariate {
|
|||
public MappingQualityCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
}
|
||||
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup,
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
|
||||
final byte[] quals, final byte[] bases) {
|
||||
|
||||
return read.getMappingQuality();
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ public class MinimumNQSCovariate implements Covariate {
|
|||
windowReach = windowSize / 2; // integer division
|
||||
}
|
||||
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup,
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
|
||||
final byte[] quals, final byte[] bases) {
|
||||
|
||||
// Loop over the list of base quality scores in the window and find the minimum
|
||||
|
|
|
|||
|
|
@ -0,0 +1,64 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Nov 18, 2009
|
||||
*
|
||||
* The Position covariate. It is a lot like the Cycle covariate except it always returns the offset regardless of which platform the read came from.
|
||||
* This is the Solexa definition of machine cycle and the covariate that was always being used in the original version of the recalibrator.
|
||||
*/
|
||||
|
||||
public class PositionCovariate implements Covariate {
|
||||
|
||||
public PositionCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
}
|
||||
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
|
||||
final byte[] quals, final byte[] bases) {
|
||||
int cycle = offset;
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
cycle = bases.length - (offset + 1);
|
||||
}
|
||||
return cycle;
|
||||
}
|
||||
|
||||
public final Comparable getValue(final String str) {
|
||||
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
|
||||
}
|
||||
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 100;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "Position in Read";
|
||||
}
|
||||
}
|
||||
|
|
@ -41,26 +41,15 @@ import net.sf.samtools.SAMRecord;
|
|||
|
||||
public class PrimerRoundCovariate implements Covariate {
|
||||
|
||||
private String platform;
|
||||
|
||||
public PrimerRoundCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
platform = "SLX";
|
||||
public PrimerRoundCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
}
|
||||
|
||||
public PrimerRoundCovariate(final String _platform) {
|
||||
platform = _platform;
|
||||
}
|
||||
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup,
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
|
||||
final byte[] quals, final byte[] bases) {
|
||||
if( platform.equalsIgnoreCase( "SLX" ) ) {
|
||||
return 1; // nothing to do here because it is always the same
|
||||
} else if( platform.equalsIgnoreCase( "454" ) ) {
|
||||
return 1; // nothing to do here because it is always the same
|
||||
} else if( platform.equalsIgnoreCase( "SOLID" ) ) {
|
||||
return offset % 5; // the primer round according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
|
||||
if( platform.equalsIgnoreCase( "SOLID" ) ) {
|
||||
return offset % 5; // the primer round according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
|
||||
} else {
|
||||
throw new StingException( "Requested platform (" + platform + ") not supported in PrimerRoundCovariate." );
|
||||
return 1; // nothing to do here because it is always the same
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ public class QualityScoreCovariate implements Covariate {
|
|||
public QualityScoreCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
}
|
||||
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup,
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
|
||||
final byte[] quals, final byte[] bases) {
|
||||
|
||||
return (int)quals[offset];
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ public class ReadGroupCovariate implements Covariate{
|
|||
public ReadGroupCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
}
|
||||
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup,
|
||||
public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform,
|
||||
final byte[] quals, final byte[] bases) {
|
||||
return readGroup;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
||||
|
|
@ -69,8 +70,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
private int PRESERVE_QSCORES_LESS_THAN = 5;
|
||||
@Argument(fullName = "use_original_quals", shortName="OQ", doc="If provided, we will use use the quals from the original qualities OQ attribute field instead of the quals in the regular QUALS field", required=false)
|
||||
private boolean USE_ORIGINAL_QUALS = false;
|
||||
@Argument(fullName = "platform", shortName="pl", doc="Which sequencing technology was used? This is important for the cycle covariate. Options are SLX, 454, and SOLID.", required=false)
|
||||
private String PLATFORM = "SLX";
|
||||
@Argument(fullName = "window_size_nqs", shortName="nqs", doc="How big of a window should the MinimumNQSCovariate use for its calculation", required=false)
|
||||
private int WINDOW_SIZE = 3;
|
||||
@Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points")
|
||||
|
|
@ -114,10 +113,20 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
boolean foundAllCovariates = false;
|
||||
int estimatedCapacity = 1; // capacity is multiplicitive so this starts at one
|
||||
|
||||
// Warn the user if a dbSNP file was specified since it isn't being used here
|
||||
boolean foundDBSNP = false;
|
||||
for( ReferenceOrderedDataSource rod : this.getToolkit().getRodDataSources() ) {
|
||||
if( rod.getName().equalsIgnoreCase( "dbsnp" ) ) {
|
||||
foundDBSNP = true;
|
||||
}
|
||||
}
|
||||
if( foundDBSNP ) {
|
||||
Utils.warnUser("A dbSNP rod file was specified but this walker doesn't make use of it.");
|
||||
}
|
||||
|
||||
// Read in the covariates that were used from the input file
|
||||
requestedCovariates = new ArrayList<Covariate>();
|
||||
|
||||
|
||||
// Read in the data from the csv file and populate the map
|
||||
logger.info( "Reading in the data from input file..." );
|
||||
|
||||
|
|
@ -139,9 +148,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
try {
|
||||
Covariate covariate = (Covariate)covClass.newInstance();
|
||||
// some covariates need parameters (user supplied command line arguments) passed to them
|
||||
if( covariate instanceof CycleCovariate ) { covariate = new CycleCovariate( PLATFORM ); }
|
||||
else if( covariate instanceof PrimerRoundCovariate ) { covariate = new PrimerRoundCovariate( PLATFORM ); }
|
||||
else if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); }
|
||||
if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); }
|
||||
requestedCovariates.add( covariate );
|
||||
estimatedCapacity *= covariate.estimatedNumberOfBins();
|
||||
|
||||
|
|
@ -243,6 +250,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
// These calls are expensive so only do them once for each read
|
||||
String readGroup = read.getReadGroup().getReadGroupId();
|
||||
String platform = read.getReadGroup().getPlatform();
|
||||
byte[] bases = read.getReadBases();
|
||||
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
|
|
@ -254,7 +262,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
for( int iii = 1; iii < read.getReadLength() - 1; iii++ ) { // skip first and last bases because there is no dinuc
|
||||
List<Comparable> key = new ArrayList<Comparable>();
|
||||
for( Covariate covariate : requestedCovariates ) {
|
||||
key.add( covariate.getValue( read, iii, readGroup, originalQuals, bases ) ); // offset is zero based so passing iii is correct here
|
||||
key.add( covariate.getValue( read, iii, readGroup, platform, originalQuals, bases ) ); // offset is zero based so passing iii is correct here
|
||||
}
|
||||
|
||||
recalQuals[iii] = performSequentialQualityCalculation( key );
|
||||
|
|
@ -275,8 +283,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
preserveQScores( originalQuals, recalQuals ); // overwrite the work done if original quality score is too low
|
||||
|
||||
// SOLID bams insert the reference base into the read if the color space quality is zero, so don't change their base quality scores
|
||||
if( PLATFORM.equalsIgnoreCase("SOLID") ) {
|
||||
byte[] colorSpaceQuals = (byte[])read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG);
|
||||
if( platform.equalsIgnoreCase("SOLID") ) {
|
||||
byte[] colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG));
|
||||
if(colorSpaceQuals != null) { preserveBadColorSpaceQualities_SOLID( originalQuals, recalQuals, colorSpaceQuals ); }
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue