Revert "ReduceReads ref bases are now output as '='
Reducing the reference bases to '=' results in an extra compression of 13% on average. The GATK is not ready to handle files with '=' bases, and the decision was to implement this a an engine support, not a part of ReduceReads.
This commit is contained in:
parent
76c32f5409
commit
17cc484dbd
|
|
@ -198,13 +198,15 @@ public class AlignmentUtils {
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the mismatches in the read within the given reference context.
|
/** Returns the number of mismatches in the pileup element within the given reference context.
|
||||||
*
|
*
|
||||||
* @param read the SAMRecord
|
* @param read the SAMRecord
|
||||||
* @param ref the reference context
|
* @param ref the reference context
|
||||||
* @return each base is represented by a bit in the BitSet. True for mismatch, false for ref
|
* @param maxMismatches the maximum number of surrounding mismatches we tolerate to consider a base good
|
||||||
|
* @param windowSize window size (on each side) to test
|
||||||
|
* @return a bitset representing which bases are good
|
||||||
*/
|
*/
|
||||||
public static BitSet mismatchesInRefWindow(SAMRecord read, ReferenceContext ref) {
|
public static BitSet mismatchesInRefWindow(SAMRecord read, ReferenceContext ref, int maxMismatches, int windowSize) {
|
||||||
// first determine the positions with mismatches
|
// first determine the positions with mismatches
|
||||||
int readLength = read.getReadLength();
|
int readLength = read.getReadLength();
|
||||||
BitSet mismatches = new BitSet(readLength);
|
BitSet mismatches = new BitSet(readLength);
|
||||||
|
|
@ -212,7 +214,7 @@ public class AlignmentUtils {
|
||||||
// it's possible we aren't starting at the beginning of a read,
|
// it's possible we aren't starting at the beginning of a read,
|
||||||
// and we don't need to look at any of the previous context outside our window
|
// and we don't need to look at any of the previous context outside our window
|
||||||
// (although we do need future context)
|
// (although we do need future context)
|
||||||
int readStartPos = read.getAlignmentStart();
|
int readStartPos = Math.max(read.getAlignmentStart(), ref.getLocus().getStart() - windowSize);
|
||||||
int currentReadPos = read.getAlignmentStart();
|
int currentReadPos = read.getAlignmentStart();
|
||||||
|
|
||||||
byte[] refBases = ref.getBases();
|
byte[] refBases = ref.getBases();
|
||||||
|
|
@ -263,22 +265,9 @@ public class AlignmentUtils {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return mismatches;
|
|
||||||
}
|
|
||||||
/** Returns a BitSet showing what bases are good according to the criteria of maxMismatches in the window
|
|
||||||
*
|
|
||||||
* @param read the SAMRecord
|
|
||||||
* @param ref the reference context
|
|
||||||
* @param maxMismatches the maximum number of surrounding mismatches we tolerate to consider a base good
|
|
||||||
* @param windowSize window size (on each side) to test
|
|
||||||
* @return a bitset representing which bases are good
|
|
||||||
*/
|
|
||||||
public static BitSet mismatchesInRefWindow(SAMRecord read, ReferenceContext ref, int maxMismatches, int windowSize) {
|
|
||||||
int readLength = read.getReadLength();
|
|
||||||
|
|
||||||
// all bits are set to false by default
|
// all bits are set to false by default
|
||||||
BitSet result = new BitSet(readLength);
|
BitSet result = new BitSet(readLength);
|
||||||
BitSet mismatches = mismatchesInRefWindow(read, ref);
|
|
||||||
|
|
||||||
int currentPos = 0, leftPos = 0, rightPos;
|
int currentPos = 0, leftPos = 0, rightPos;
|
||||||
int mismatchCount = 0;
|
int mismatchCount = 0;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue