diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java index 89a30e4f5..a1ab73341 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java @@ -26,10 +26,12 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Arrays; +import java.util.BitSet; /** * Created by IntelliJ IDEA. @@ -43,10 +45,6 @@ public class ContextCovariate implements StandardCovariate { private int insertionsContextSize; private int deletionsContextSize; - private String mismatchesNoContext = ""; - private String insertionsNoContext = ""; - private String deletionsNoContext = ""; - // Initialize any member variables using the command-line arguments passed to the walkers @Override public void initialize(final RecalibrationArgumentCollection RAC) { @@ -57,29 +55,26 @@ public class ContextCovariate implements StandardCovariate { if (mismatchesContextSize <= 0 || insertionsContextSize <= 0 || deletionsContextSize <= 0) throw new UserException(String.format("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Insertions: %d Deletions:%d", mismatchesContextSize, insertionsContextSize, deletionsContextSize)); - // initialize no context strings given the size of the context for each covariate type - mismatchesNoContext = makeAllNStringWithLength(mismatchesContextSize); - insertionsNoContext = makeAllNStringWithLength(insertionsContextSize); - deletionsNoContext = makeAllNStringWithLength( deletionsContextSize); } @Override public CovariateValues getValues(final GATKSAMRecord read) { int l = read.getReadLength(); - String[] mismatches = new String [l]; - String[] insertions = new String [l]; - String[] deletions = new String [l]; + BitSet[] mismatches = new BitSet[l]; + BitSet[] insertions = new BitSet[l]; + BitSet[] deletions = new BitSet[l]; final boolean negativeStrand = read.getReadNegativeStrandFlag(); byte[] bases = read.getReadBases(); - if (negativeStrand) { - bases = BaseUtils.simpleReverseComplement(bases); //this is NOT in-place - } + if (negativeStrand) + bases = BaseUtils.simpleReverseComplement(bases); + for (int i = 0; i < read.getReadLength(); i++) { - mismatches[i] = contextWith(bases, i, mismatchesContextSize, mismatchesNoContext); - insertions[i] = contextWith(bases, i, insertionsContextSize, insertionsNoContext); - deletions[i] = contextWith(bases, i, deletionsContextSize, deletionsNoContext); + mismatches[i] = contextWith(bases, i, mismatchesContextSize); + insertions[i] = contextWith(bases, i, insertionsContextSize); + deletions[i] = contextWith(bases, i, deletionsContextSize); } + if (negativeStrand) { reverse(mismatches); reverse(insertions); @@ -90,7 +85,7 @@ public class ContextCovariate implements StandardCovariate { // Used to get the covariate's value from input csv file during on-the-fly recalibration @Override - public final Comparable getValue(final String str) { + public final Object getValue(final String str) { return str; } @@ -100,29 +95,28 @@ public class ContextCovariate implements StandardCovariate { * @param bases the bases in the read to build the context from * @param offset the position in the read to calculate the context for * @param contextSize context size to use building the context - * @param noContextString string to return if the position is not far enough in the read to have a full context before. * @return */ - private String contextWith(byte [] bases, int offset, int contextSize, String noContextString) { - return (offset < contextSize) ? noContextString : new String(Arrays.copyOfRange(bases, offset - contextSize, offset)); + private BitSet contextWith(byte [] bases, int offset, int contextSize) { + if (offset < contextSize) + return null; + + String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset)); + if (context.contains("N")) + return null; + + return MathUtils.bitSetFrom(context); } - - private String makeAllNStringWithLength(int length) { - String s = ""; - for (int i=0; i