-check original alignments for indels when computing mismatch score
-move logging to debug git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@778 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
5f67914b08
commit
0d58e4ccc9
|
|
@ -106,7 +106,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int numMismatches(SAMRecord r, String refSeq, int refIdx) {
|
private static int numMismatches(SAMRecord r, String refSeq, int refIndex) {
|
||||||
int readIdx = 0;
|
int readIdx = 0;
|
||||||
int mismatches = 0;
|
int mismatches = 0;
|
||||||
String readSeq = r.getReadString();
|
String readSeq = r.getReadString();
|
||||||
|
|
@ -115,8 +115,8 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
CigarElement ce = c.getCigarElement(i);
|
CigarElement ce = c.getCigarElement(i);
|
||||||
switch ( ce.getOperator() ) {
|
switch ( ce.getOperator() ) {
|
||||||
case M:
|
case M:
|
||||||
for (int j = 0 ; j < ce.getLength() ; j++, refIdx++, readIdx++ ) {
|
for (int j = 0 ; j < ce.getLength() ; j++, refIndex++, readIdx++ ) {
|
||||||
if ( Character.toUpperCase(readSeq.charAt(readIdx)) != Character.toUpperCase(refSeq.charAt(refIdx)) )
|
if ( Character.toUpperCase(readSeq.charAt(readIdx)) != Character.toUpperCase(refSeq.charAt(refIndex)) )
|
||||||
mismatches++;
|
mismatches++;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
@ -124,7 +124,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
readIdx += ce.getLength();
|
readIdx += ce.getLength();
|
||||||
break;
|
break;
|
||||||
case D:
|
case D:
|
||||||
refIdx += ce.getLength();
|
refIndex += ce.getLength();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -132,17 +132,32 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
return mismatches;
|
return mismatches;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int mismatchQualitySum(AlignedRead aRead, String ref, int refIndex) {
|
private static int mismatchQualitySum(AlignedRead aRead, String refSeq, int refIndex) {
|
||||||
String read = aRead.getReadString();
|
String readSeq = aRead.getReadString();
|
||||||
String quals = aRead.getBaseQualityString();
|
String quals = aRead.getBaseQualityString();
|
||||||
|
int readIndex = 0;
|
||||||
int sum = 0;
|
int sum = 0;
|
||||||
for ( int readIndex = 0 ; readIndex < read.length() ; readIndex++, refIndex++ ) {
|
Cigar c = aRead.getCigar();
|
||||||
if ( refIndex >= ref.length() )
|
for (int i = 0 ; i < c.numCigarElements() ; i++) {
|
||||||
|
CigarElement ce = c.getCigarElement(i);
|
||||||
|
switch ( ce.getOperator() ) {
|
||||||
|
case M:
|
||||||
|
for (int j = 0 ; j < ce.getLength() ; j++, refIndex++, readIndex++ ) {
|
||||||
|
if ( refIndex >= refSeq.length() )
|
||||||
sum += MAX_QUAL;
|
sum += MAX_QUAL;
|
||||||
else if ( Character.toUpperCase(read.charAt(readIndex)) != Character.toUpperCase(ref.charAt(refIndex)) )
|
else if ( Character.toUpperCase(readSeq.charAt(readIndex)) != Character.toUpperCase(refSeq.charAt(refIndex)) )
|
||||||
sum += (int)quals.charAt(readIndex) - 33;
|
sum += (int)quals.charAt(readIndex) - 33;
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
case I:
|
||||||
|
readIndex += ce.getLength();
|
||||||
|
break;
|
||||||
|
case D:
|
||||||
|
refIndex += ce.getLength();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -188,7 +203,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
StringBuffer sb = new StringBuffer();
|
StringBuffer sb = new StringBuffer();
|
||||||
sb.append(reference.substring(0, refIdx));
|
sb.append(reference.substring(0, refIdx));
|
||||||
Cigar c = swConsensus.getCigar();
|
Cigar c = swConsensus.getCigar();
|
||||||
logger.info("CIGAR = " + cigarToString(c));
|
logger.debug("CIGAR = " + cigarToString(c));
|
||||||
|
|
||||||
int indelCount = 0;
|
int indelCount = 0;
|
||||||
int altIdx = 0;
|
int altIdx = 0;
|
||||||
|
|
@ -236,23 +251,23 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
else
|
else
|
||||||
consensus.readIndexes.add(new Pair<Integer, Integer>(j, altAlignment.getFirst()));
|
consensus.readIndexes.add(new Pair<Integer, Integer>(j, altAlignment.getFirst()));
|
||||||
|
|
||||||
logger.info(aRead.getReadString() + " vs. " + toTest.getReadString() + " => " + myScore + " - " + altAlignment.getFirst());
|
logger.debug(aRead.getReadString() + " vs. " + toTest.getReadString() + " => " + myScore + " - " + altAlignment.getFirst());
|
||||||
consensus.mismatchSum += myScore;
|
consensus.mismatchSum += myScore;
|
||||||
if ( myScore == 0 )
|
if ( myScore == 0 )
|
||||||
// we already know that this is its consensus, so don't bother testing it later
|
// we already know that this is its consensus, so don't bother testing it later
|
||||||
altAlignmentsToTest.set(j, false);
|
altAlignmentsToTest.set(j, false);
|
||||||
}
|
}
|
||||||
logger.info(aRead.getReadString() + " " + consensus.mismatchSum);
|
logger.debug(aRead.getReadString() + " " + consensus.mismatchSum);
|
||||||
if ( bestConsensus == null || bestConsensus.mismatchSum > consensus.mismatchSum) {
|
if ( bestConsensus == null || bestConsensus.mismatchSum > consensus.mismatchSum) {
|
||||||
bestConsensus = consensus;
|
bestConsensus = consensus;
|
||||||
logger.info(aRead.getReadString() + " " + consensus.mismatchSum);
|
logger.debug(aRead.getReadString() + " " + consensus.mismatchSum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// if the best alternate consensus has a smaller sum of quality score mismatches (more than the LOD threshold), then clean!
|
// if the best alternate consensus has a smaller sum of quality score mismatches (more than the LOD threshold), then clean!
|
||||||
if ( bestConsensus != null && ((double)(totalMismatchSum - bestConsensus.mismatchSum))/10.0 >= LOD_THRESHOLD ) {
|
if ( bestConsensus != null && ((double)(totalMismatchSum - bestConsensus.mismatchSum))/10.0 >= LOD_THRESHOLD ) {
|
||||||
logger.info("CLEAN: " + bestConsensus.str );
|
logger.debug("CLEAN: " + bestConsensus.str );
|
||||||
if ( indelOutput != null && bestConsensus.cigar.numCigarElements() > 1 ) {
|
if ( indelOutput != null && bestConsensus.cigar.numCigarElements() > 1 ) {
|
||||||
StringBuffer str = new StringBuffer();
|
StringBuffer str = new StringBuffer();
|
||||||
str.append(reads.get(0).getReferenceName());
|
str.append(reads.get(0).getReferenceName());
|
||||||
|
|
@ -263,6 +278,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
str.append("\t" + (((double)(totalMismatchSum - bestConsensus.mismatchSum))/10.0) + "\n");
|
str.append("\t" + (((double)(totalMismatchSum - bestConsensus.mismatchSum))/10.0) + "\n");
|
||||||
try {
|
try {
|
||||||
indelOutput.write(str.toString());
|
indelOutput.write(str.toString());
|
||||||
|
indelOutput.flush();
|
||||||
} catch (Exception e) {}
|
} catch (Exception e) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -615,7 +631,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
}
|
}
|
||||||
if ( bestConsensus == null || bestConsensus.mismatchSum > consensus.mismatchSum) {
|
if ( bestConsensus == null || bestConsensus.mismatchSum > consensus.mismatchSum) {
|
||||||
bestConsensus = consensus;
|
bestConsensus = consensus;
|
||||||
logger.info(altConsensus + " " + consensus.mismatchSum);
|
logger.debug(altConsensus + " " + consensus.mismatchSum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -623,7 +639,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
|
|
||||||
// if the best alternate consensus has a smaller sum of quality score mismatches, then clean!
|
// if the best alternate consensus has a smaller sum of quality score mismatches, then clean!
|
||||||
if ( bestConsensus != null && ((double)(totalMismatchSum - bestConsensus.mismatchSum))/10.0 >= LOD_THRESHOLD ) {
|
if ( bestConsensus != null && ((double)(totalMismatchSum - bestConsensus.mismatchSum))/10.0 >= LOD_THRESHOLD ) {
|
||||||
logger.info("CLEAN: " + bestConsensus.str);
|
logger.debug("CLEAN: " + bestConsensus.str);
|
||||||
|
|
||||||
// clean the appropriate reads
|
// clean the appropriate reads
|
||||||
for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes )
|
for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes )
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue