diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Covariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Covariate.java index 7ac202ce6..af454e946 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Covariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/Covariate.java @@ -38,7 +38,7 @@ import net.sf.samtools.SAMRecord; */ public interface Covariate { - public Comparable getValue(SAMRecord read, int offset, String readGroup, byte[] quals, byte[] bases); // used to pick out the value from attributes of the read + public Comparable getValue(SAMRecord read, int offset, String readGroup, String platform, byte[] quals, byte[] bases); // used to pick out the value from attributes of the read public Comparable getValue(String str); // used to get value from input file public int estimatedNumberOfBins(); // used to estimate the amount space required for the HashMap } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java index 22f51474c..ddd8e885f 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CovariateCounterWalker.java @@ -3,11 +3,15 @@ package org.broadinstitute.sting.playground.gatk.walkers.Recalibration; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.rodDbSNP; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.RODRecordList; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.genotype.Variation; import java.io.PrintStream; import java.io.FileNotFoundException; @@ -70,8 +74,6 @@ public class CovariateCounterWalker extends LocusWalker { private String[] COVARIATES = null; @Argument(fullName = "use_original_quals", shortName="OQ", doc="If provided, we will use use the quals from the original qualities OQ attribute field instead of the quals in the regular QUALS field", required=false) private boolean USE_ORIGINAL_QUALS = false; - @Argument(fullName = "platform", shortName="pl", doc="Which sequencing technology was used? This is important for the cycle covariate. Options are SLX, 454, and SOLID.", required=false) - private String PLATFORM = "SLX"; @Argument(fullName = "window_size_nqs", shortName="nqs", doc="How big of a window should the MinimumNQSCovariate use for its calculation", required=false) private int WINDOW_SIZE = 3; @Argument(fullName="recal_file", shortName="recalFile", required=false, doc="Filename for the outputted covariates table recalibration file") @@ -114,10 +116,17 @@ public class CovariateCounterWalker extends LocusWalker { } // Warn the user if no dbSNP file was specified - if( this.getToolkit().getArguments().DBSNPFile == null ) { + boolean foundDBSNP = false; + for( ReferenceOrderedDataSource rod : this.getToolkit().getRodDataSources() ) { + if( rod.getName().equalsIgnoreCase( "dbsnp" ) ) { + foundDBSNP = true; + } + } + if( !foundDBSNP ) { Utils.warnUser("This calculation is critically dependent on being able to skip over known variant sites. Are you sure you want to be running without a dbSNP rod specified?"); } + // Initialize the requested covariates by parsing the -cov argument requestedCovariates = new ArrayList(); int estimatedCapacity = 1; // capacity is multiplicitive so this starts at one @@ -130,9 +139,7 @@ public class CovariateCounterWalker extends LocusWalker { Covariate covariate = (Covariate)covClass.newInstance(); estimatedCapacity *= covariate.estimatedNumberOfBins(); // Some covariates need parameters (user supplied command line arguments) passed to them - if( covariate instanceof CycleCovariate ) { covariate = new CycleCovariate( PLATFORM ); } - else if( covariate instanceof PrimerRoundCovariate ) { covariate = new PrimerRoundCovariate( PLATFORM ); } - else if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); } + if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); } if( !( covariate instanceof ReadGroupCovariate || covariate instanceof QualityScoreCovariate ) ) { // these were already added so don't add them again requestedCovariates.add( covariate ); } @@ -160,9 +167,7 @@ public class CovariateCounterWalker extends LocusWalker { Covariate covariate = (Covariate)covClass.newInstance(); estimatedCapacity *= covariate.estimatedNumberOfBins(); // Some covariates need parameters (user supplied command line arguments) passed to them - if( covariate instanceof CycleCovariate ) { covariate = new CycleCovariate( PLATFORM ); } - else if( covariate instanceof PrimerRoundCovariate ) { covariate = new PrimerRoundCovariate( PLATFORM ); } - else if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); } + if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); } requestedCovariates.add( covariate ); } catch ( InstantiationException e ) { throw new StingException( String.format("Can not instantiate covariate class '%s': must be concrete class.", covClass.getSimpleName()) ); @@ -209,11 +214,21 @@ public class CovariateCounterWalker extends LocusWalker { */ public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { - final rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP( tracker.getTrackData("dbsnp", null) ); + // pull out anything passed by -B name,type,file that has the name "dbsnp" + final RODRecordList dbsnpRODs = tracker.getTrackData( "dbsnp", null ); + boolean isSNP = false; + if (dbsnpRODs != null) { + for( ReferenceOrderedDatum rod : dbsnpRODs ) { + if( ((Variation)rod).isSNP() ) { + isSNP = true; // at least one of the rods says this is a snp site + break; + } + } + } // Only use data from non-dbsnp sites // Assume every mismatch at a non-dbsnp site is indicitive of poor quality - if( dbsnp == null ) { + if( !isSNP ) { final List reads = context.getReads(); final List offsets = context.getOffsets(); SAMRecord read; @@ -223,6 +238,8 @@ public class CovariateCounterWalker extends LocusWalker { byte[] bases; byte refBase; byte prevBase; + String platform; + byte[] colorSpaceQuals; final int numReads = reads.size(); // For each read at this locus @@ -251,30 +268,33 @@ public class CovariateCounterWalker extends LocusWalker { } } - // SOLID bams insert the reference base into the read if the color space quality is zero, so skip over them - byte[] colorSpaceQuals = null; - if( PLATFORM.equalsIgnoreCase("SOLID") ) { - colorSpaceQuals = (byte[])read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG); - } - if( colorSpaceQuals == null || colorSpaceQuals[offset] > 0 ) //BUGBUG: This isn't exactly correct yet - { - // skip if base quality is zero - if( quals[offset] > 0 ) { - bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A' - refBase = (byte)ref.getBase(); - prevBase = bases[offset-1]; + // skip if base quality is zero + if( quals[offset] > 0 ) { + bases = read.getReadBases(); // BUGBUG: DinucCovariate is relying on this method returning the same byte for bases 'a' and 'A' + refBase = (byte)ref.getBase(); + prevBase = bases[offset-1]; - // Get the complement base strand if we are a negative strand read - if( read.getReadNegativeStrandFlag() ) { - bases = BaseUtils.simpleComplement( bases ); // this is an expensive call - refBase = (byte)BaseUtils.simpleComplement( ref.getBase() ); - prevBase = bases[offset+1]; + // Get the complement base strand if we are a negative strand read + if( read.getReadNegativeStrandFlag() ) { + bases = BaseUtils.simpleComplement( bases ); // this is an expensive call + refBase = (byte)BaseUtils.simpleComplement( ref.getBase() ); + prevBase = bases[offset+1]; + } + + // skip if this base or the previous one was an 'N' or etc. + if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)bases[offset] ) ) { + + readGroup = read.getReadGroup().getReadGroupId(); // this is an expensive call + platform = read.getReadGroup().getPlatform(); // this is an expensive call + + // SOLID bams insert the reference base into the read if the color space quality is zero, so skip over them + colorSpaceQuals = null; + if( platform.equalsIgnoreCase("SOLID") ) { + colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG)); } - - // skip if this base or the previous one was an 'N' or etc. - if( BaseUtils.isRegularBase( (char)prevBase ) && BaseUtils.isRegularBase( (char)bases[offset] ) ) { - readGroup = read.getReadGroup().getReadGroupId(); // this is an expensive call - updateDataFromRead( read, offset, readGroup, quals, bases, refBase ); + if( colorSpaceQuals == null || colorSpaceQuals[offset] > 0 ) //BUGBUG: This isn't exactly correct yet + { + updateDataFromRead( read, offset, readGroup, platform, quals, bases, refBase ); } } } @@ -303,13 +323,14 @@ public class CovariateCounterWalker extends LocusWalker { * @param bases The bases which make up the read * @param refBase The reference base at this locus */ - private void updateDataFromRead(final SAMRecord read, final int offset, final String readGroup, final byte[] quals, final byte[] bases, final byte refBase) { + private void updateDataFromRead(final SAMRecord read, final int offset, final String readGroup, final String platform, + final byte[] quals, final byte[] bases, final byte refBase) { List key = new ArrayList(); // Loop through the list of requested covariates and pick out the value from the read, offset, and reference for( Covariate covariate : requestedCovariates ) { - key.add( covariate.getValue( read, offset, readGroup, quals, bases ) ); + key.add( covariate.getValue( read, offset, readGroup, platform, quals, bases ) ); } // Using the list of covariate values as a key, pick out the RecalDatum from the data HashMap diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CycleCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CycleCovariate.java index e1965434b..475563ab6 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CycleCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/CycleCovariate.java @@ -44,25 +44,18 @@ import net.sf.samtools.SAMRecord; public class CycleCovariate implements Covariate { - private String platform; - public CycleCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker - platform = "SLX"; // Solexa is the default } - public CycleCovariate(final String _platform) { - platform = _platform; - } - - public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, + public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform, final byte[] quals, final byte[] bases) { - if( platform.equalsIgnoreCase( "SLX" ) ) { - int cycle = offset; + if( platform.equalsIgnoreCase( "ILLUMINA" ) ) { + int cycle = offset; if( read.getReadNegativeStrandFlag() ) { cycle = bases.length - (offset + 1); } return cycle; - } else if( platform.equalsIgnoreCase( "454" ) ) { + } else if( platform.contains( "454" ) ) { // some bams have "LS454" and others have just "454" int cycle = 0; byte prevBase = bases[0]; for( int iii = 1; iii <= offset; iii++ ) { @@ -76,7 +69,7 @@ public class CycleCovariate implements Covariate { // the ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf return offset / 5; // integer division } else { - throw new StingException( "Requested platform (" + platform + ") not supported in CycleCovariate." ); + throw new StingException( "Platform in read (" + platform + ") is not supported in CycleCovariate. Read = " + read ); } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/DinucCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/DinucCovariate.java index 02c912468..2d6e34abc 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/DinucCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/DinucCovariate.java @@ -56,7 +56,7 @@ public class DinucCovariate implements Covariate { } } - public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, + public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform, final byte[] quals, final byte[] bases) { byte base = bases[offset]; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MappingQualityCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MappingQualityCovariate.java index 287c23667..2b9830972 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MappingQualityCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MappingQualityCovariate.java @@ -40,7 +40,7 @@ public class MappingQualityCovariate implements Covariate { public MappingQualityCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker } - public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, + public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform, final byte[] quals, final byte[] bases) { return read.getMappingQuality(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MinimumNQSCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MinimumNQSCovariate.java index 616682c4a..f8e54d6d4 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MinimumNQSCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/MinimumNQSCovariate.java @@ -48,7 +48,7 @@ public class MinimumNQSCovariate implements Covariate { windowReach = windowSize / 2; // integer division } - public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, + public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform, final byte[] quals, final byte[] bases) { // Loop over the list of base quality scores in the window and find the minimum diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PositionCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PositionCovariate.java new file mode 100755 index 000000000..e7f691739 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PositionCovariate.java @@ -0,0 +1,64 @@ +package org.broadinstitute.sting.playground.gatk.walkers.Recalibration; + +import net.sf.samtools.SAMRecord; + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: Nov 18, 2009 + * + * The Position covariate. It is a lot like the Cycle covariate except it always returns the offset regardless of which platform the read came from. + * This is the Solexa definition of machine cycle and the covariate that was always being used in the original version of the recalibrator. + */ + +public class PositionCovariate implements Covariate { + + public PositionCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker + } + + public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform, + final byte[] quals, final byte[] bases) { + int cycle = offset; + if( read.getReadNegativeStrandFlag() ) { + cycle = bases.length - (offset + 1); + } + return cycle; + } + + public final Comparable getValue(final String str) { + return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing + } + + public final int estimatedNumberOfBins() { + return 100; + } + + public String toString() { + return "Position in Read"; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PrimerRoundCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PrimerRoundCovariate.java index 058286537..5bb19a9b5 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PrimerRoundCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/PrimerRoundCovariate.java @@ -41,26 +41,15 @@ import net.sf.samtools.SAMRecord; public class PrimerRoundCovariate implements Covariate { - private String platform; - - public PrimerRoundCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker - platform = "SLX"; + public PrimerRoundCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker } - public PrimerRoundCovariate(final String _platform) { - platform = _platform; - } - - public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, + public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform, final byte[] quals, final byte[] bases) { - if( platform.equalsIgnoreCase( "SLX" ) ) { - return 1; // nothing to do here because it is always the same - } else if( platform.equalsIgnoreCase( "454" ) ) { - return 1; // nothing to do here because it is always the same - } else if( platform.equalsIgnoreCase( "SOLID" ) ) { - return offset % 5; // the primer round according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf + if( platform.equalsIgnoreCase( "SOLID" ) ) { + return offset % 5; // the primer round according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf } else { - throw new StingException( "Requested platform (" + platform + ") not supported in PrimerRoundCovariate." ); + return 1; // nothing to do here because it is always the same } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/QualityScoreCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/QualityScoreCovariate.java index 02531c16d..437aa78da 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/QualityScoreCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/QualityScoreCovariate.java @@ -40,7 +40,7 @@ public class QualityScoreCovariate implements Covariate { public QualityScoreCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker } - public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, + public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform, final byte[] quals, final byte[] bases) { return (int)quals[offset]; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadGroupCovariate.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadGroupCovariate.java index 4824b7078..92ab81a10 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadGroupCovariate.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/ReadGroupCovariate.java @@ -40,7 +40,7 @@ public class ReadGroupCovariate implements Covariate{ public ReadGroupCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker } - public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, + public final Comparable getValue(final SAMRecord read, final int offset, final String readGroup, final String platform, final byte[] quals, final byte[] bases) { return readGroup; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java index 4e25bd98d..3d6bb4ea1 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/Recalibration/TableRecalibrationWalker.java @@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.*; @@ -69,8 +70,6 @@ public class TableRecalibrationWalker extends ReadWalker(); - // Read in the data from the csv file and populate the map logger.info( "Reading in the data from input file..." ); @@ -139,9 +148,7 @@ public class TableRecalibrationWalker extends ReadWalker key = new ArrayList(); for( Covariate covariate : requestedCovariates ) { - key.add( covariate.getValue( read, iii, readGroup, originalQuals, bases ) ); // offset is zero based so passing iii is correct here + key.add( covariate.getValue( read, iii, readGroup, platform, originalQuals, bases ) ); // offset is zero based so passing iii is correct here } recalQuals[iii] = performSequentialQualityCalculation( key ); @@ -275,8 +283,8 @@ public class TableRecalibrationWalker extends ReadWalker