Be smart about trying alternate consenses:

try prior indels first and only 1 instance of them


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@971 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-06-10 17:43:22 +00:00
parent f304803811
commit f9be175f44
1 changed files with 22 additions and 8 deletions

View File

@ -196,7 +196,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
return sum; return sum;
} }
private boolean readIsClipped(SAMRecord read) { private static boolean readIsClipped(SAMRecord read) {
final Cigar c = read.getCigar(); final Cigar c = read.getCigar();
final int n = c.numCigarElements(); final int n = c.numCigarElements();
if ( c.getCigarElement(n-1).getOperator() == CigarOperator.S || if ( c.getCigarElement(n-1).getOperator() == CigarOperator.S ||
@ -204,12 +204,26 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
return false; return false;
} }
private static String hashIndel(AlignedRead read) {
final Cigar c = read.getCigar();
final int start = read.getAlignmentStart() + c.getCigarElement(0).getLength() - 1;
StringBuffer sb = new StringBuffer();
sb.append(start);
if ( c.getCigarElement(1).getOperator() == CigarOperator.D )
sb.append("D");
else
sb.append("I");
sb.append(c.getCigarElement(1).getLength());
return sb.toString();
}
private void clean(List<SAMRecord> reads, String reference, GenomeLoc interval) { private void clean(List<SAMRecord> reads, String reference, GenomeLoc interval) {
long leftmostIndex = interval.getStart(); long leftmostIndex = interval.getStart();
ArrayList<SAMRecord> refReads = new ArrayList<SAMRecord>(); ArrayList<SAMRecord> refReads = new ArrayList<SAMRecord>(); // reads that perfectly match ref
ArrayList<AlignedRead> altReads = new ArrayList<AlignedRead>(); LinkedList<AlignedRead> altReads = new LinkedList<AlignedRead>(); // reads that don't perfectly match
ArrayList<Boolean> altAlignmentsToTest = new ArrayList<Boolean>(); LinkedList<Boolean> altAlignmentsToTest = new LinkedList<Boolean>(); // should we try to make an alt consensus from the corresponding read in altReads?
HashSet<String> priorIndelsToTest = new HashSet<String>(); // list of indels seen in the prior alignments to test (so we don't duplicate)
int totalMismatchSum = 0; int totalMismatchSum = 0;
// decide which reads potentially need to be cleaned // decide which reads potentially need to be cleaned
@ -236,11 +250,11 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
totalMismatchSum += mismatchScore; totalMismatchSum += mismatchScore;
aRead.setMismatchScoreToReference(mismatchScore); aRead.setMismatchScoreToReference(mismatchScore);
} }
// otherwise, if it has an indel, let's see if that's the best consensus // otherwise, if it has an indel, let's see if that's the best consensus (one instance per indel though)
else if ( numBlocks == 2 ) { else if ( numBlocks == 2 && priorIndelsToTest.add(hashIndel(aRead))) {
aRead.doNotRealign(); aRead.doNotRealign();
altReads.add(aRead); altReads.addFirst(aRead);
altAlignmentsToTest.add(true); altAlignmentsToTest.addFirst(true);
} }
// otherwise, we can emit it as is // otherwise, we can emit it as is
else { else {