-move final vars to command-line args

-Per Andrey: ignore indels from aligner when testing against alt consensus


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@966 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-06-10 16:39:00 +00:00
parent ad80894afa
commit 5451bbfd5a
1 changed files with 30 additions and 9 deletions

View File

@ -30,9 +30,12 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
public String OUT_STATS = null;
@Argument(fullName="LODThresholdForCleaning", shortName="LOD", doc="LOD threshold above which the cleaner will clean", required=false)
public double LOD_THRESHOLD = 5.0;
@Argument(fullName="EntropyThreshold", shortName="entropy", doc="percentage of mismatches at a locus to be considered having high entropy", required=false)
public double MISMATCH_THRESHOLD = 0.25;
@Argument(fullName="GreedyThreshold", shortName="greedy", doc="coverage above which the cleaner turns on greedy mode to improve performance", required=false)
public int GREEDY_THRESHOLD = 500;
public static final int MAX_QUAL = 99;
private static final double MISMATCH_THRESHOLD = 0.25;
private SAMFileWriter writer;
private FileWriter indelOutput = null;
@ -46,6 +49,8 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
if ( LOD_THRESHOLD < 0.0 )
throw new RuntimeException("LOD threshold cannot be a negative number");
if ( MISMATCH_THRESHOLD <= 0.0 || MISMATCH_THRESHOLD > 1.0 )
throw new RuntimeException("Entropy threshold must be a fraction between 0 and 1");
SAMFileHeader header = getToolkit().getEngine().getSAMHeader();
writer = Utils.createSAMFileWriterWithCompression(header, false, OUT, getToolkit().getBAMCompression());
@ -144,6 +149,19 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
}
private static int mismatchQualitySumIgnoreCigar(AlignedRead aRead, String refSeq, int refIndex) {
String readSeq = aRead.getReadString();
String quals = aRead.getBaseQualityString();
int sum = 0;
for (int readIndex = 0 ; readIndex < readSeq.length() ; refIndex++, readIndex++ ) {
if ( refIndex >= refSeq.length() )
sum += MAX_QUAL;
else if ( Character.toUpperCase(readSeq.charAt(readIndex)) != Character.toUpperCase(refSeq.charAt(refIndex)) )
sum += (int)quals.charAt(readIndex) - 33;
}
return sum;
}
private static int mismatchQualitySum(AlignedRead aRead, String refSeq, int refIndex) {
String readSeq = aRead.getReadString();
String quals = aRead.getBaseQualityString();
@ -168,8 +186,8 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
refIndex += ce.getLength();
break;
case S: // soft clip
refIndex+=ce.getLength(); // (?? - do we have to??);
readIndex+=ce.getLength();
refIndex+=ce.getLength(); // (?? - do we have to??);
readIndex+=ce.getLength();
break;
default: throw new StingException("Cigar element "+ce.getOperator() +" currently can not be processed");
}
@ -197,7 +215,6 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
// decide which reads potentially need to be cleaned
for ( SAMRecord read : reads ) {
// first, move existing indels (for 1 indel reads only) to leftmost position within identical sequence
int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read);
if ( numBlocks == 2 )
@ -207,7 +224,10 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
int mismatchScore = mismatchQualitySum(aRead, reference, read.getAlignmentStart()-(int)leftmostIndex);
// we currently can not deal with clipped reads correctly
if ( readIsClipped(read) ) { refReads.add(read); continue; }
if ( readIsClipped(read) ) {
refReads.add(read);
continue;
}
// if this doesn't match perfectly to the reference, let's try to clean it
if ( mismatchScore > 0 ) {
@ -410,13 +430,13 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
private Pair<Integer, Integer> findBestOffset(String ref, AlignedRead read) {
int attempts = ref.length() - read.getReadLength() + 1;
int bestScore = mismatchQualitySum(read, ref, 0);
int bestScore = mismatchQualitySumIgnoreCigar(read, ref, 0);
int bestIndex = 0;
for ( int i = 1; i < attempts; i++ ) {
// we can't get better than 0!
if ( bestScore == 0 )
return new Pair<Integer, Integer>(bestIndex, 0);
int score = mismatchQualitySum(read, ref, i);
int score = mismatchQualitySumIgnoreCigar(read, ref, i);
if ( score < bestScore ) {
bestScore = score;
bestIndex = i;
@ -584,8 +604,9 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
* @param cigar structure of the original alignment
* @param refSeq reference sequence the read is aligned to
* @param readSeq read sequence
* @param refIndex 0-based alignment start position
* @return a cigar, in which indel is guaranteed to be placed at the leftmost possible position across a repeat (if any)
* @param refIndex 0-based alignment start position on ref
* @param readIndex 0-based alignment start position on read
* @return a cigar, in which indel is guaranteed to be placed at the leftmost possible position across a repeat (if any)
*/
private Cigar indelRealignment(Cigar cigar, String refSeq, String readSeq, int refIndex, int readIndex) {
if ( cigar.numCigarElements() < 2 ) return cigar; // no indels, nothing to do