diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java
index 0e9fb9306..e9745b6a3 100755
--- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/HomopolymerCovariate.java
@@ -32,15 +32,15 @@ import net.sf.samtools.SAMRecord;
* User: rpoplin
* Date: Dec 4, 2009
*
- * The Homopolymer Run Covariate. This is the number of bases in the previous N that match the current base.
+ * The Homopolymer Run Covariate. This is the number of consecutive bases in the previous N that match the current base.
* For example, if N = 10:
- * ATTGCCCCGTAAAAAAAAAA
- * 00100123121123456789
+ * ATTGCCCCGTAAAAAAAAATA
+ * 001001230001234567800
*/
public class HomopolymerCovariate implements ExperimentalCovariate {
- int numBack = 8;
+ int numBack = 7;
// Initialize any member variables using the command-line arguments passed to the walkers
public void initialize( final RecalibrationArgumentCollection RAC ) {
@@ -50,6 +50,10 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
// Used to pick out the covariate's value from attributes of the read
public final Comparable getValue( final SAMRecord read, final int offset ) {
+ // This block of code is for if you don't want to only count consecutive bases
+ // ATTGCCCCGTAAAAAAAAATA
+ // 001001231211234567819
+ /*
int numAgree = 0; // The number of bases that agree with you in the previous numBack bases of the read
int startPos = 0;
int stopPos = 0;
@@ -57,15 +61,31 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
byte thisBase = bases[offset];
if( !read.getReadNegativeStrandFlag() ) { // Forward direction
startPos = Math.max(offset - numBack, 0);
- stopPos = offset;
+ stopPos = Math.max(offset - 1, 0);
} else { // Negative direction
- startPos = offset + 1;
- stopPos = Math.min(offset + numBack + 1, read.getReadLength());
+ startPos = Math.min(offset + 2, bases.length);
+ stopPos = Math.min(offset + numBack + 1, bases.length);
}
for( int iii = startPos; iii < stopPos; iii++ ) {
if( bases[iii] == thisBase ) { numAgree++; }
}
+ */
+
+ int numAgree = 0; // The number of consecutive bases that agree with you in the previous numBack bases of the read
+ byte[] bases = read.getReadBases();
+ int iii = offset;
+ if( !read.getReadNegativeStrandFlag() ) { // Forward direction
+ while( iii <= bases.length-2 && bases[iii] == bases[iii+1] && numAgree < numBack ) {
+ numAgree++;
+ iii++;
+ }
+ } else { // Negative direction
+ while( iii >= 1 && bases[iii] == bases[iii-1] && numAgree < numBack ) {
+ numAgree++;
+ iii--;
+ }
+ }
return numAgree;
}
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java
index 7a5c9cdc6..9d904e5fc 100755
--- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java
@@ -58,7 +58,7 @@ public class RecalibrationArgumentCollection {
@Argument(fullName = "window_size_nqs", shortName="nqs", doc="The window size used by MinimumNQSCovariate for its calculation", required=false)
public int WINDOW_SIZE = 5;
@Argument(fullName = "homopolymer_nback", shortName="nback", doc="The number of previous bases to look at in HomopolymerCovariate", required=false)
- public int HOMOPOLYMER_NBACK = 8;
+ public int HOMOPOLYMER_NBACK = 7;
public boolean checkSolidRecalMode() {
return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ||
diff --git a/packages/GenomeAnalysisTK.xml b/packages/GenomeAnalysisTK.xml
index acffff24c..33f9f5e95 100644
--- a/packages/GenomeAnalysisTK.xml
+++ b/packages/GenomeAnalysisTK.xml
@@ -24,7 +24,7 @@
org.broadinstitute.sting.gatk.walkers.recalibration.PrimerRoundCovariate
org.broadinstitute.sting.gatk.walkers.recalibration.CycleCovariate
org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate
- org.broadinstitute.sting.gatk.walkers.recalibration.TileCovariate
+ org.broadinstitute.sting.gatk.walkers.recalibration.HomopolymerCovariate
org.broadinstitute.sting.gatk.walkers.indels.CleanedReadInjector
org.broadinstitute.sting.gatk.walkers.indels.IndelIntervalWalker