From dc05b71e39b1e0124a5954a9c561d4556269117d Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 6 Feb 2012 11:10:24 -0500 Subject: [PATCH] Updating Covariate interface with Mauricio to include an errorModel parameter. On the fly recalibration of base insertion and base deletion quals is live for the HaplotypeCaller --- .../walkers/recalibration/ContextCovariate.java | 5 +++-- .../recalibration/CountCovariatesWalker.java | 3 ++- .../gatk/walkers/recalibration/Covariate.java | 10 ++++++---- .../walkers/recalibration/CycleCovariate.java | 3 ++- .../walkers/recalibration/DinucCovariate.java | 11 ++++++----- .../recalibration/GCContentCovariate.java | 3 ++- .../recalibration/HomopolymerCovariate.java | 3 ++- .../recalibration/MappingQualityCovariate.java | 3 ++- .../recalibration/MinimumNQSCovariate.java | 3 ++- .../walkers/recalibration/PositionCovariate.java | 3 ++- .../recalibration/PrimerRoundCovariate.java | 3 ++- .../recalibration/QualityScoreCovariate.java | 16 ++++++++++++---- .../recalibration/ReadGroupCovariate.java | 5 +++-- .../walkers/recalibration/RecalDataManager.java | 5 +++-- .../recalibration/TableRecalibrationWalker.java | 3 ++- .../utils/recalibration/BaseRecalibration.java | 4 ++-- .../sting/utils/sam/GATKSAMRecord.java | 5 +++-- 17 files changed, 56 insertions(+), 32 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java index 8b8f2cee9..0edd5d03b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ContextCovariate.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; import java.util.Arrays; @@ -35,7 +36,7 @@ import java.util.Arrays; * Date: 9/26/11 */ -public class ContextCovariate implements Covariate { +public class ContextCovariate implements ExperimentalCovariate { final int CONTEXT_SIZE = 8; String allN = ""; @@ -49,7 +50,7 @@ public class ContextCovariate implements Covariate { } @Override - public void getValues(SAMRecord read, Comparable[] comparable) { + public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) { byte[] bases = read.getReadBases(); for(int i = 0; i < read.getReadLength(); i++) { comparable[i] = ( i-CONTEXT_SIZE < 0 ? allN : new String(Arrays.copyOfRange(bases,i-CONTEXT_SIZE,i)) ); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index f6f05d39c..4e3d4048b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -41,6 +41,7 @@ import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.PrintStream; @@ -374,7 +375,7 @@ public class CountCovariatesWalker extends LocusWalker dinucHashMapRef = this.dinucHashMap; //optimize access to dinucHashMap final int readLength = read.getReadLength(); final boolean negativeStrand = read.getReadNegativeStrandFlag(); @@ -78,7 +79,7 @@ public class DinucCovariate implements StandardCovariate { if(negativeStrand) { bases = BaseUtils.simpleReverseComplement(bases); //this is NOT in-place } - result[0] = NO_DINUC; // No dinuc at the beginning of the read + comparable[0] = NO_DINUC; // No dinuc at the beginning of the read prevBase = bases[0]; offset++; @@ -87,16 +88,16 @@ public class DinucCovariate implements StandardCovariate { // previous base in the reference. This is done in part to be consistent with unmapped reads. base = bases[offset]; if( BaseUtils.isRegularBase( prevBase ) ) { - result[offset] = dinucHashMapRef.get( Dinuc.hashBytes( prevBase, base ) ); + comparable[offset] = dinucHashMapRef.get( Dinuc.hashBytes( prevBase, base ) ); } else { - result[offset] = NO_DINUC; + comparable[offset] = NO_DINUC; } offset++; prevBase = base; } if(negativeStrand) { - reverse( result ); + reverse( comparable ); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java index 1a085d5c0..e4ff415fe 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/GCContentCovariate.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; /* * Copyright (c) 2010 The Broad Institute @@ -78,7 +79,7 @@ public class GCContentCovariate implements ExperimentalCovariate { } @Override - public void getValues(SAMRecord read, Comparable[] comparable) { + public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) { for(int iii = 0; iii < read.getReadLength(); iii++) { comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java index a54f9597b..24cb98a8d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; /* * Copyright (c) 2009 The Broad Institute @@ -92,7 +93,7 @@ public class HomopolymerCovariate implements ExperimentalCovariate { } @Override - public void getValues(SAMRecord read, Comparable[] comparable) { + public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) { for(int iii = 0; iii < read.getReadLength(); iii++) { comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java index ad6484428..ec5b357a4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MappingQualityCovariate.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; /* * Copyright (c) 2009 The Broad Institute @@ -54,7 +55,7 @@ public class MappingQualityCovariate implements ExperimentalCovariate { } @Override - public void getValues(SAMRecord read, Comparable[] comparable) { + public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) { for(int iii = 0; iii < read.getReadLength(); iii++) { comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java index 0c1c66a5f..21fd14e0c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/MinimumNQSCovariate.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; /* * Copyright (c) 2009 The Broad Institute @@ -63,7 +64,7 @@ public class MinimumNQSCovariate implements ExperimentalCovariate { } @Override - public void getValues(SAMRecord read, Comparable[] comparable) { + public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) { for(int iii = 0; iii < read.getReadLength(); iii++) { comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java index 2a4497b0d..5c410ce5f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PositionCovariate.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; /* * Copyright (c) 2009 The Broad Institute @@ -53,7 +54,7 @@ public class PositionCovariate implements ExperimentalCovariate { } @Override - public void getValues(SAMRecord read, Comparable[] comparable) { + public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) { for(int iii = 0; iii < read.getReadLength(); iii++) { comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java index 4a9629234..e6aa44226 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/PrimerRoundCovariate.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; /* * Copyright (c) 2009 The Broad Institute @@ -59,7 +60,7 @@ public class PrimerRoundCovariate implements ExperimentalCovariate { } @Override - public void getValues(SAMRecord read, Comparable[] comparable) { + public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) { for(int iii = 0; iii < read.getReadLength(); iii++) { comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java index de6d5065b..f85b52350 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/QualityScoreCovariate.java @@ -1,6 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; + +import java.util.Arrays; /* * Copyright (c) 2009 The Broad Institute @@ -43,10 +46,15 @@ public class QualityScoreCovariate implements RequiredCovariate { } @Override - public void getValues(SAMRecord read, Comparable[] comparable) { - byte[] baseQualities = read.getBaseQualities(); - for(int i = 0; i < read.getReadLength(); i++) { - comparable[i] = (int) baseQualities[i]; + public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) { + if( modelType == BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION ) { + byte[] baseQualities = read.getBaseQualities(); + for(int i = 0; i < read.getReadLength(); i++) { + comparable[i] = (int) baseQualities[i]; + } + } else { // model == BASE_INSERTION || model == BASE_DELETION + Arrays.fill(comparable, 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will + // be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45 } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java index cb108feb8..e27077128 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/ReadGroupCovariate.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; /* * Copyright (c) 2009 The Broad Institute @@ -35,7 +36,7 @@ import net.sf.samtools.SAMRecord; * The Read Group covariate. */ -public class ReadGroupCovariate implements RequiredCovariate{ +public class ReadGroupCovariate implements RequiredCovariate { public static final String defaultReadGroup = "DefaultReadGroup"; @@ -45,7 +46,7 @@ public class ReadGroupCovariate implements RequiredCovariate{ } @Override - public void getValues(SAMRecord read, Comparable[] comparable) { + public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) { final String readGroupId = read.getReadGroup().getReadGroupId(); for(int i = 0; i < read.getReadLength(); i++) { comparable[i] = readGroupId; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index 66ad1fb9c..be02063de 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -571,7 +572,7 @@ public class RecalDataManager { * value for the ith position in the read and the jth covariate in * reqeustedCovariates list. */ - public static Comparable[][] computeCovariates(final GATKSAMRecord gatkRead, final List requestedCovariates) { + public static Comparable[][] computeCovariates( final GATKSAMRecord gatkRead, final List requestedCovariates, final BaseRecalibration.BaseRecalibrationType modelType ) { //compute all covariates for this read final List requestedCovariatesRef = requestedCovariates; final int numRequestedCovariates = requestedCovariatesRef.size(); @@ -582,7 +583,7 @@ public class RecalDataManager { // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read for( int i = 0; i < numRequestedCovariates; i++ ) { - requestedCovariatesRef.get(i).getValues( gatkRead, tempCovariateValuesHolder ); + requestedCovariatesRef.get(i).getValues( gatkRead, tempCovariateValuesHolder, modelType ); for(int j = 0; j < readLength; j++) { //copy values into a 2D array that allows all covar types to be extracted at once for //an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index a569aefd2..a8006d506 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -39,6 +39,7 @@ import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.recalibration.BaseRecalibration; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.text.XReadLines; @@ -398,7 +399,7 @@ public class TableRecalibrationWalker extends ReadWalker