Context covariate now operates as a highly compressed bitset
* All contexts with 'N' bases are now collapsed as uninformative * Context size is now represented internally as a BitSet but output as a dna string * Temporarily disabled sorted outputs because of null objects
This commit is contained in:
parent
d379c3763a
commit
9e95b10789
|
|
@ -26,10 +26,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -43,10 +45,6 @@ public class ContextCovariate implements StandardCovariate {
|
|||
private int insertionsContextSize;
|
||||
private int deletionsContextSize;
|
||||
|
||||
private String mismatchesNoContext = "";
|
||||
private String insertionsNoContext = "";
|
||||
private String deletionsNoContext = "";
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
||||
|
|
@ -57,29 +55,26 @@ public class ContextCovariate implements StandardCovariate {
|
|||
if (mismatchesContextSize <= 0 || insertionsContextSize <= 0 || deletionsContextSize <= 0)
|
||||
throw new UserException(String.format("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Insertions: %d Deletions:%d", mismatchesContextSize, insertionsContextSize, deletionsContextSize));
|
||||
|
||||
// initialize no context strings given the size of the context for each covariate type
|
||||
mismatchesNoContext = makeAllNStringWithLength(mismatchesContextSize);
|
||||
insertionsNoContext = makeAllNStringWithLength(insertionsContextSize);
|
||||
deletionsNoContext = makeAllNStringWithLength( deletionsContextSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CovariateValues getValues(final GATKSAMRecord read) {
|
||||
int l = read.getReadLength();
|
||||
String[] mismatches = new String [l];
|
||||
String[] insertions = new String [l];
|
||||
String[] deletions = new String [l];
|
||||
BitSet[] mismatches = new BitSet[l];
|
||||
BitSet[] insertions = new BitSet[l];
|
||||
BitSet[] deletions = new BitSet[l];
|
||||
|
||||
final boolean negativeStrand = read.getReadNegativeStrandFlag();
|
||||
byte[] bases = read.getReadBases();
|
||||
if (negativeStrand) {
|
||||
bases = BaseUtils.simpleReverseComplement(bases); //this is NOT in-place
|
||||
}
|
||||
if (negativeStrand)
|
||||
bases = BaseUtils.simpleReverseComplement(bases);
|
||||
|
||||
for (int i = 0; i < read.getReadLength(); i++) {
|
||||
mismatches[i] = contextWith(bases, i, mismatchesContextSize, mismatchesNoContext);
|
||||
insertions[i] = contextWith(bases, i, insertionsContextSize, insertionsNoContext);
|
||||
deletions[i] = contextWith(bases, i, deletionsContextSize, deletionsNoContext);
|
||||
mismatches[i] = contextWith(bases, i, mismatchesContextSize);
|
||||
insertions[i] = contextWith(bases, i, insertionsContextSize);
|
||||
deletions[i] = contextWith(bases, i, deletionsContextSize);
|
||||
}
|
||||
|
||||
if (negativeStrand) {
|
||||
reverse(mismatches);
|
||||
reverse(insertions);
|
||||
|
|
@ -90,7 +85,7 @@ public class ContextCovariate implements StandardCovariate {
|
|||
|
||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
@Override
|
||||
public final Comparable getValue(final String str) {
|
||||
public final Object getValue(final String str) {
|
||||
return str;
|
||||
}
|
||||
|
||||
|
|
@ -100,29 +95,28 @@ public class ContextCovariate implements StandardCovariate {
|
|||
* @param bases the bases in the read to build the context from
|
||||
* @param offset the position in the read to calculate the context for
|
||||
* @param contextSize context size to use building the context
|
||||
* @param noContextString string to return if the position is not far enough in the read to have a full context before.
|
||||
* @return
|
||||
*/
|
||||
private String contextWith(byte [] bases, int offset, int contextSize, String noContextString) {
|
||||
return (offset < contextSize) ? noContextString : new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
|
||||
private BitSet contextWith(byte [] bases, int offset, int contextSize) {
|
||||
if (offset < contextSize)
|
||||
return null;
|
||||
|
||||
String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
|
||||
if (context.contains("N"))
|
||||
return null;
|
||||
|
||||
return MathUtils.bitSetFrom(context);
|
||||
}
|
||||
|
||||
private String makeAllNStringWithLength(int length) {
|
||||
String s = "";
|
||||
for (int i=0; i<length; i++)
|
||||
s += "N";
|
||||
return s;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverses the given array in place.
|
||||
*
|
||||
* @param array any array
|
||||
*/
|
||||
private static void reverse(final Comparable[] array) {
|
||||
private static void reverse(final Object[] array) {
|
||||
final int arrayLength = array.length;
|
||||
for (int l = 0, r = arrayLength - 1; l < r; l++, r--) {
|
||||
final Comparable temp = array[l];
|
||||
final Object temp = array[l];
|
||||
array[l] = array[r];
|
||||
array[r] = temp;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ public interface Covariate {
|
|||
*/
|
||||
public CovariateValues getValues(GATKSAMRecord read);
|
||||
|
||||
public Comparable getValue(String str); // Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
public Object getValue(String str); // Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
}
|
||||
|
||||
interface RequiredCovariate extends Covariate {}
|
||||
|
|
|
|||
|
|
@ -12,25 +12,25 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
* @since 2/8/12
|
||||
*/
|
||||
public class CovariateValues {
|
||||
private Comparable[] mismatches;
|
||||
private Comparable[] insertions;
|
||||
private Comparable[] deletions;
|
||||
private Object[] mismatches;
|
||||
private Object[] insertions;
|
||||
private Object[] deletions;
|
||||
|
||||
public CovariateValues(Comparable[] mismatch, Comparable[] insertion, Comparable[] deletion) {
|
||||
public CovariateValues(Object[] mismatch, Object[] insertion, Object[] deletion) {
|
||||
this.mismatches = mismatch;
|
||||
this.insertions = insertion;
|
||||
this.deletions = deletion;
|
||||
}
|
||||
|
||||
public Comparable[] getMismatches() {
|
||||
public Object[] getMismatches() {
|
||||
return mismatches;
|
||||
}
|
||||
|
||||
public Comparable[] getInsertions() {
|
||||
public Object[] getInsertions() {
|
||||
return insertions;
|
||||
}
|
||||
|
||||
public Comparable[] getDeletions() {
|
||||
public Object[] getDeletions() {
|
||||
return deletions;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -198,7 +198,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
|
||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
@Override
|
||||
public final Comparable getValue(final String str) {
|
||||
public final Object getValue(final String str) {
|
||||
return Integer.parseInt(str);
|
||||
}
|
||||
}
|
||||
|
|
@ -2,8 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
|
|
@ -67,7 +65,7 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
|
||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
@Override
|
||||
public final Comparable getValue(final String str) {
|
||||
public final Object getValue(final String str) {
|
||||
return Integer.parseInt(str);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ public class ReadGroupCovariate implements RequiredCovariate {
|
|||
|
||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
@Override
|
||||
public final Comparable getValue(final String str) {
|
||||
public final Object getValue(final String str) {
|
||||
return str;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue