Context covariate now operates as a highly compressed bitset

* All contexts with 'N' bases are now collapsed as uninformative
   * Context size is now represented internally as a BitSet but output as a dna string
   * Temporarily disabled sorted outputs because of null objects
This commit is contained in:
Mauricio Carneiro 2012-02-29 18:56:11 -05:00
parent d379c3763a
commit 9e95b10789
6 changed files with 35 additions and 43 deletions

View File

@ -26,10 +26,12 @@
package org.broadinstitute.sting.gatk.walkers.bqsr; package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Arrays; import java.util.Arrays;
import java.util.BitSet;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
@ -43,10 +45,6 @@ public class ContextCovariate implements StandardCovariate {
private int insertionsContextSize; private int insertionsContextSize;
private int deletionsContextSize; private int deletionsContextSize;
private String mismatchesNoContext = "";
private String insertionsNoContext = "";
private String deletionsNoContext = "";
// Initialize any member variables using the command-line arguments passed to the walkers // Initialize any member variables using the command-line arguments passed to the walkers
@Override @Override
public void initialize(final RecalibrationArgumentCollection RAC) { public void initialize(final RecalibrationArgumentCollection RAC) {
@ -57,29 +55,26 @@ public class ContextCovariate implements StandardCovariate {
if (mismatchesContextSize <= 0 || insertionsContextSize <= 0 || deletionsContextSize <= 0) if (mismatchesContextSize <= 0 || insertionsContextSize <= 0 || deletionsContextSize <= 0)
throw new UserException(String.format("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Insertions: %d Deletions:%d", mismatchesContextSize, insertionsContextSize, deletionsContextSize)); throw new UserException(String.format("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Insertions: %d Deletions:%d", mismatchesContextSize, insertionsContextSize, deletionsContextSize));
// initialize no context strings given the size of the context for each covariate type
mismatchesNoContext = makeAllNStringWithLength(mismatchesContextSize);
insertionsNoContext = makeAllNStringWithLength(insertionsContextSize);
deletionsNoContext = makeAllNStringWithLength( deletionsContextSize);
} }
@Override @Override
public CovariateValues getValues(final GATKSAMRecord read) { public CovariateValues getValues(final GATKSAMRecord read) {
int l = read.getReadLength(); int l = read.getReadLength();
String[] mismatches = new String [l]; BitSet[] mismatches = new BitSet[l];
String[] insertions = new String [l]; BitSet[] insertions = new BitSet[l];
String[] deletions = new String [l]; BitSet[] deletions = new BitSet[l];
final boolean negativeStrand = read.getReadNegativeStrandFlag(); final boolean negativeStrand = read.getReadNegativeStrandFlag();
byte[] bases = read.getReadBases(); byte[] bases = read.getReadBases();
if (negativeStrand) { if (negativeStrand)
bases = BaseUtils.simpleReverseComplement(bases); //this is NOT in-place bases = BaseUtils.simpleReverseComplement(bases);
}
for (int i = 0; i < read.getReadLength(); i++) { for (int i = 0; i < read.getReadLength(); i++) {
mismatches[i] = contextWith(bases, i, mismatchesContextSize, mismatchesNoContext); mismatches[i] = contextWith(bases, i, mismatchesContextSize);
insertions[i] = contextWith(bases, i, insertionsContextSize, insertionsNoContext); insertions[i] = contextWith(bases, i, insertionsContextSize);
deletions[i] = contextWith(bases, i, deletionsContextSize, deletionsNoContext); deletions[i] = contextWith(bases, i, deletionsContextSize);
} }
if (negativeStrand) { if (negativeStrand) {
reverse(mismatches); reverse(mismatches);
reverse(insertions); reverse(insertions);
@ -90,7 +85,7 @@ public class ContextCovariate implements StandardCovariate {
// Used to get the covariate's value from input csv file during on-the-fly recalibration // Used to get the covariate's value from input csv file during on-the-fly recalibration
@Override @Override
public final Comparable getValue(final String str) { public final Object getValue(final String str) {
return str; return str;
} }
@ -100,29 +95,28 @@ public class ContextCovariate implements StandardCovariate {
* @param bases the bases in the read to build the context from * @param bases the bases in the read to build the context from
* @param offset the position in the read to calculate the context for * @param offset the position in the read to calculate the context for
* @param contextSize context size to use building the context * @param contextSize context size to use building the context
* @param noContextString string to return if the position is not far enough in the read to have a full context before.
* @return * @return
*/ */
private String contextWith(byte [] bases, int offset, int contextSize, String noContextString) { private BitSet contextWith(byte [] bases, int offset, int contextSize) {
return (offset < contextSize) ? noContextString : new String(Arrays.copyOfRange(bases, offset - contextSize, offset)); if (offset < contextSize)
return null;
String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
if (context.contains("N"))
return null;
return MathUtils.bitSetFrom(context);
} }
private String makeAllNStringWithLength(int length) {
String s = "";
for (int i=0; i<length; i++)
s += "N";
return s;
}
/** /**
* Reverses the given array in place. * Reverses the given array in place.
* *
* @param array any array * @param array any array
*/ */
private static void reverse(final Comparable[] array) { private static void reverse(final Object[] array) {
final int arrayLength = array.length; final int arrayLength = array.length;
for (int l = 0, r = arrayLength - 1; l < r; l++, r--) { for (int l = 0, r = arrayLength - 1; l < r; l++, r--) {
final Comparable temp = array[l]; final Object temp = array[l];
array[l] = array[r]; array[l] = array[r];
array[r] = temp; array[r] = temp;
} }

View File

@ -53,7 +53,7 @@ public interface Covariate {
*/ */
public CovariateValues getValues(GATKSAMRecord read); public CovariateValues getValues(GATKSAMRecord read);
public Comparable getValue(String str); // Used to get the covariate's value from input csv file during on-the-fly recalibration public Object getValue(String str); // Used to get the covariate's value from input csv file during on-the-fly recalibration
} }
interface RequiredCovariate extends Covariate {} interface RequiredCovariate extends Covariate {}

View File

@ -12,25 +12,25 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
* @since 2/8/12 * @since 2/8/12
*/ */
public class CovariateValues { public class CovariateValues {
private Comparable[] mismatches; private Object[] mismatches;
private Comparable[] insertions; private Object[] insertions;
private Comparable[] deletions; private Object[] deletions;
public CovariateValues(Comparable[] mismatch, Comparable[] insertion, Comparable[] deletion) { public CovariateValues(Object[] mismatch, Object[] insertion, Object[] deletion) {
this.mismatches = mismatch; this.mismatches = mismatch;
this.insertions = insertion; this.insertions = insertion;
this.deletions = deletion; this.deletions = deletion;
} }
public Comparable[] getMismatches() { public Object[] getMismatches() {
return mismatches; return mismatches;
} }
public Comparable[] getInsertions() { public Object[] getInsertions() {
return insertions; return insertions;
} }
public Comparable[] getDeletions() { public Object[] getDeletions() {
return deletions; return deletions;
} }

View File

@ -198,7 +198,7 @@ public class CycleCovariate implements StandardCovariate {
// Used to get the covariate's value from input csv file during on-the-fly recalibration // Used to get the covariate's value from input csv file during on-the-fly recalibration
@Override @Override
public final Comparable getValue(final String str) { public final Object getValue(final String str) {
return Integer.parseInt(str); return Integer.parseInt(str);
} }
} }

View File

@ -2,8 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Arrays;
/* /*
* Copyright (c) 2009 The Broad Institute * Copyright (c) 2009 The Broad Institute
* *
@ -67,7 +65,7 @@ public class QualityScoreCovariate implements RequiredCovariate {
// Used to get the covariate's value from input csv file during on-the-fly recalibration // Used to get the covariate's value from input csv file during on-the-fly recalibration
@Override @Override
public final Comparable getValue(final String str) { public final Object getValue(final String str) {
return Integer.parseInt(str); return Integer.parseInt(str);
} }
} }

View File

@ -55,7 +55,7 @@ public class ReadGroupCovariate implements RequiredCovariate {
// Used to get the covariate's value from input csv file during on-the-fly recalibration // Used to get the covariate's value from input csv file during on-the-fly recalibration
@Override @Override
public final Comparable getValue(final String str) { public final Object getValue(final String str) {
return str; return str;
} }
} }