From aa86f3710dfd76cd35a355afcfd240ce6b6c2622 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Thu, 17 Dec 2009 18:25:09 +0000 Subject: [PATCH] Updating HomopolymerCovariate to only count the consecutive previous bases. I left in the code but commented out for if somebody wants to worry about carry forward homopolymer problems. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2391 348d0f76-0448-11de-a6fe-93d51630548a --- .../recalibration/HomopolymerCovariate.java | 34 +++++++++++++++---- .../RecalibrationArgumentCollection.java | 2 +- packages/GenomeAnalysisTK.xml | 2 +- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java index 0e9fb9306..e9745b6a3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java @@ -32,15 +32,15 @@ import net.sf.samtools.SAMRecord; * User: rpoplin * Date: Dec 4, 2009 * - * The Homopolymer Run Covariate. This is the number of bases in the previous N that match the current base. + * The Homopolymer Run Covariate. This is the number of consecutive bases in the previous N that match the current base. * For example, if N = 10: - * ATTGCCCCGTAAAAAAAAAA - * 00100123121123456789 + * ATTGCCCCGTAAAAAAAAATA + * 001001230001234567800 */ public class HomopolymerCovariate implements ExperimentalCovariate { - int numBack = 8; + int numBack = 7; // Initialize any member variables using the command-line arguments passed to the walkers public void initialize( final RecalibrationArgumentCollection RAC ) { @@ -50,6 +50,10 @@ public class HomopolymerCovariate implements ExperimentalCovariate { // Used to pick out the covariate's value from attributes of the read public final Comparable getValue( final SAMRecord read, final int offset ) { + // This block of code is for if you don't want to only count consecutive bases + // ATTGCCCCGTAAAAAAAAATA + // 001001231211234567819 + /* int numAgree = 0; // The number of bases that agree with you in the previous numBack bases of the read int startPos = 0; int stopPos = 0; @@ -57,15 +61,31 @@ public class HomopolymerCovariate implements ExperimentalCovariate { byte thisBase = bases[offset]; if( !read.getReadNegativeStrandFlag() ) { // Forward direction startPos = Math.max(offset - numBack, 0); - stopPos = offset; + stopPos = Math.max(offset - 1, 0); } else { // Negative direction - startPos = offset + 1; - stopPos = Math.min(offset + numBack + 1, read.getReadLength()); + startPos = Math.min(offset + 2, bases.length); + stopPos = Math.min(offset + numBack + 1, bases.length); } for( int iii = startPos; iii < stopPos; iii++ ) { if( bases[iii] == thisBase ) { numAgree++; } } + */ + + int numAgree = 0; // The number of consecutive bases that agree with you in the previous numBack bases of the read + byte[] bases = read.getReadBases(); + int iii = offset; + if( !read.getReadNegativeStrandFlag() ) { // Forward direction + while( iii <= bases.length-2 && bases[iii] == bases[iii+1] && numAgree < numBack ) { + numAgree++; + iii++; + } + } else { // Negative direction + while( iii >= 1 && bases[iii] == bases[iii-1] && numAgree < numBack ) { + numAgree++; + iii--; + } + } return numAgree; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java index 7a5c9cdc6..9d904e5fc 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java @@ -58,7 +58,7 @@ public class RecalibrationArgumentCollection { @Argument(fullName = "window_size_nqs", shortName="nqs", doc="The window size used by MinimumNQSCovariate for its calculation", required=false) public int WINDOW_SIZE = 5; @Argument(fullName = "homopolymer_nback", shortName="nback", doc="The number of previous bases to look at in HomopolymerCovariate", required=false) - public int HOMOPOLYMER_NBACK = 8; + public int HOMOPOLYMER_NBACK = 7; public boolean checkSolidRecalMode() { return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") || diff --git a/packages/GenomeAnalysisTK.xml b/packages/GenomeAnalysisTK.xml index acffff24c..33f9f5e95 100644 --- a/packages/GenomeAnalysisTK.xml +++ b/packages/GenomeAnalysisTK.xml @@ -24,7 +24,7 @@ org.broadinstitute.sting.gatk.walkers.recalibration.PrimerRoundCovariate org.broadinstitute.sting.gatk.walkers.recalibration.CycleCovariate org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate - org.broadinstitute.sting.gatk.walkers.recalibration.TileCovariate + org.broadinstitute.sting.gatk.walkers.recalibration.HomopolymerCovariate org.broadinstitute.sting.gatk.walkers.indels.CleanedReadInjector org.broadinstitute.sting.gatk.walkers.indels.IndelIntervalWalker