-move final vars to command-line args
-Per Andrey: ignore indels from aligner when testing against alt consensus git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@966 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ad80894afa
commit
5451bbfd5a
|
|
@ -30,9 +30,12 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
public String OUT_STATS = null;
|
public String OUT_STATS = null;
|
||||||
@Argument(fullName="LODThresholdForCleaning", shortName="LOD", doc="LOD threshold above which the cleaner will clean", required=false)
|
@Argument(fullName="LODThresholdForCleaning", shortName="LOD", doc="LOD threshold above which the cleaner will clean", required=false)
|
||||||
public double LOD_THRESHOLD = 5.0;
|
public double LOD_THRESHOLD = 5.0;
|
||||||
|
@Argument(fullName="EntropyThreshold", shortName="entropy", doc="percentage of mismatches at a locus to be considered having high entropy", required=false)
|
||||||
|
public double MISMATCH_THRESHOLD = 0.25;
|
||||||
|
@Argument(fullName="GreedyThreshold", shortName="greedy", doc="coverage above which the cleaner turns on greedy mode to improve performance", required=false)
|
||||||
|
public int GREEDY_THRESHOLD = 500;
|
||||||
|
|
||||||
public static final int MAX_QUAL = 99;
|
public static final int MAX_QUAL = 99;
|
||||||
private static final double MISMATCH_THRESHOLD = 0.25;
|
|
||||||
|
|
||||||
private SAMFileWriter writer;
|
private SAMFileWriter writer;
|
||||||
private FileWriter indelOutput = null;
|
private FileWriter indelOutput = null;
|
||||||
|
|
@ -46,6 +49,8 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
|
|
||||||
if ( LOD_THRESHOLD < 0.0 )
|
if ( LOD_THRESHOLD < 0.0 )
|
||||||
throw new RuntimeException("LOD threshold cannot be a negative number");
|
throw new RuntimeException("LOD threshold cannot be a negative number");
|
||||||
|
if ( MISMATCH_THRESHOLD <= 0.0 || MISMATCH_THRESHOLD > 1.0 )
|
||||||
|
throw new RuntimeException("Entropy threshold must be a fraction between 0 and 1");
|
||||||
|
|
||||||
SAMFileHeader header = getToolkit().getEngine().getSAMHeader();
|
SAMFileHeader header = getToolkit().getEngine().getSAMHeader();
|
||||||
writer = Utils.createSAMFileWriterWithCompression(header, false, OUT, getToolkit().getBAMCompression());
|
writer = Utils.createSAMFileWriterWithCompression(header, false, OUT, getToolkit().getBAMCompression());
|
||||||
|
|
@ -144,6 +149,19 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static int mismatchQualitySumIgnoreCigar(AlignedRead aRead, String refSeq, int refIndex) {
|
||||||
|
String readSeq = aRead.getReadString();
|
||||||
|
String quals = aRead.getBaseQualityString();
|
||||||
|
int sum = 0;
|
||||||
|
for (int readIndex = 0 ; readIndex < readSeq.length() ; refIndex++, readIndex++ ) {
|
||||||
|
if ( refIndex >= refSeq.length() )
|
||||||
|
sum += MAX_QUAL;
|
||||||
|
else if ( Character.toUpperCase(readSeq.charAt(readIndex)) != Character.toUpperCase(refSeq.charAt(refIndex)) )
|
||||||
|
sum += (int)quals.charAt(readIndex) - 33;
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
private static int mismatchQualitySum(AlignedRead aRead, String refSeq, int refIndex) {
|
private static int mismatchQualitySum(AlignedRead aRead, String refSeq, int refIndex) {
|
||||||
String readSeq = aRead.getReadString();
|
String readSeq = aRead.getReadString();
|
||||||
String quals = aRead.getBaseQualityString();
|
String quals = aRead.getBaseQualityString();
|
||||||
|
|
@ -168,8 +186,8 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
refIndex += ce.getLength();
|
refIndex += ce.getLength();
|
||||||
break;
|
break;
|
||||||
case S: // soft clip
|
case S: // soft clip
|
||||||
refIndex+=ce.getLength(); // (?? - do we have to??);
|
refIndex+=ce.getLength(); // (?? - do we have to??);
|
||||||
readIndex+=ce.getLength();
|
readIndex+=ce.getLength();
|
||||||
break;
|
break;
|
||||||
default: throw new StingException("Cigar element "+ce.getOperator() +" currently can not be processed");
|
default: throw new StingException("Cigar element "+ce.getOperator() +" currently can not be processed");
|
||||||
}
|
}
|
||||||
|
|
@ -197,7 +215,6 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
// decide which reads potentially need to be cleaned
|
// decide which reads potentially need to be cleaned
|
||||||
for ( SAMRecord read : reads ) {
|
for ( SAMRecord read : reads ) {
|
||||||
|
|
||||||
|
|
||||||
// first, move existing indels (for 1 indel reads only) to leftmost position within identical sequence
|
// first, move existing indels (for 1 indel reads only) to leftmost position within identical sequence
|
||||||
int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read);
|
int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read);
|
||||||
if ( numBlocks == 2 )
|
if ( numBlocks == 2 )
|
||||||
|
|
@ -207,7 +224,10 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
int mismatchScore = mismatchQualitySum(aRead, reference, read.getAlignmentStart()-(int)leftmostIndex);
|
int mismatchScore = mismatchQualitySum(aRead, reference, read.getAlignmentStart()-(int)leftmostIndex);
|
||||||
|
|
||||||
// we currently can not deal with clipped reads correctly
|
// we currently can not deal with clipped reads correctly
|
||||||
if ( readIsClipped(read) ) { refReads.add(read); continue; }
|
if ( readIsClipped(read) ) {
|
||||||
|
refReads.add(read);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// if this doesn't match perfectly to the reference, let's try to clean it
|
// if this doesn't match perfectly to the reference, let's try to clean it
|
||||||
if ( mismatchScore > 0 ) {
|
if ( mismatchScore > 0 ) {
|
||||||
|
|
@ -410,13 +430,13 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
|
|
||||||
private Pair<Integer, Integer> findBestOffset(String ref, AlignedRead read) {
|
private Pair<Integer, Integer> findBestOffset(String ref, AlignedRead read) {
|
||||||
int attempts = ref.length() - read.getReadLength() + 1;
|
int attempts = ref.length() - read.getReadLength() + 1;
|
||||||
int bestScore = mismatchQualitySum(read, ref, 0);
|
int bestScore = mismatchQualitySumIgnoreCigar(read, ref, 0);
|
||||||
int bestIndex = 0;
|
int bestIndex = 0;
|
||||||
for ( int i = 1; i < attempts; i++ ) {
|
for ( int i = 1; i < attempts; i++ ) {
|
||||||
// we can't get better than 0!
|
// we can't get better than 0!
|
||||||
if ( bestScore == 0 )
|
if ( bestScore == 0 )
|
||||||
return new Pair<Integer, Integer>(bestIndex, 0);
|
return new Pair<Integer, Integer>(bestIndex, 0);
|
||||||
int score = mismatchQualitySum(read, ref, i);
|
int score = mismatchQualitySumIgnoreCigar(read, ref, i);
|
||||||
if ( score < bestScore ) {
|
if ( score < bestScore ) {
|
||||||
bestScore = score;
|
bestScore = score;
|
||||||
bestIndex = i;
|
bestIndex = i;
|
||||||
|
|
@ -584,8 +604,9 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
* @param cigar structure of the original alignment
|
* @param cigar structure of the original alignment
|
||||||
* @param refSeq reference sequence the read is aligned to
|
* @param refSeq reference sequence the read is aligned to
|
||||||
* @param readSeq read sequence
|
* @param readSeq read sequence
|
||||||
* @param refIndex 0-based alignment start position
|
* @param refIndex 0-based alignment start position on ref
|
||||||
* @return a cigar, in which indel is guaranteed to be placed at the leftmost possible position across a repeat (if any)
|
* @param readIndex 0-based alignment start position on read
|
||||||
|
* @return a cigar, in which indel is guaranteed to be placed at the leftmost possible position across a repeat (if any)
|
||||||
*/
|
*/
|
||||||
private Cigar indelRealignment(Cigar cigar, String refSeq, String readSeq, int refIndex, int readIndex) {
|
private Cigar indelRealignment(Cigar cigar, String refSeq, String readSeq, int refIndex, int readIndex) {
|
||||||
if ( cigar.numCigarElements() < 2 ) return cigar; // no indels, nothing to do
|
if ( cigar.numCigarElements() < 2 ) return cigar; // no indels, nothing to do
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue