Be smart about trying alternate consenses:
try prior indels first and only 1 instance of them git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@971 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f304803811
commit
f9be175f44
|
|
@ -196,7 +196,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean readIsClipped(SAMRecord read) {
|
private static boolean readIsClipped(SAMRecord read) {
|
||||||
final Cigar c = read.getCigar();
|
final Cigar c = read.getCigar();
|
||||||
final int n = c.numCigarElements();
|
final int n = c.numCigarElements();
|
||||||
if ( c.getCigarElement(n-1).getOperator() == CigarOperator.S ||
|
if ( c.getCigarElement(n-1).getOperator() == CigarOperator.S ||
|
||||||
|
|
@ -204,12 +204,26 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static String hashIndel(AlignedRead read) {
|
||||||
|
final Cigar c = read.getCigar();
|
||||||
|
final int start = read.getAlignmentStart() + c.getCigarElement(0).getLength() - 1;
|
||||||
|
StringBuffer sb = new StringBuffer();
|
||||||
|
sb.append(start);
|
||||||
|
if ( c.getCigarElement(1).getOperator() == CigarOperator.D )
|
||||||
|
sb.append("D");
|
||||||
|
else
|
||||||
|
sb.append("I");
|
||||||
|
sb.append(c.getCigarElement(1).getLength());
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
private void clean(List<SAMRecord> reads, String reference, GenomeLoc interval) {
|
private void clean(List<SAMRecord> reads, String reference, GenomeLoc interval) {
|
||||||
|
|
||||||
long leftmostIndex = interval.getStart();
|
long leftmostIndex = interval.getStart();
|
||||||
ArrayList<SAMRecord> refReads = new ArrayList<SAMRecord>();
|
ArrayList<SAMRecord> refReads = new ArrayList<SAMRecord>(); // reads that perfectly match ref
|
||||||
ArrayList<AlignedRead> altReads = new ArrayList<AlignedRead>();
|
LinkedList<AlignedRead> altReads = new LinkedList<AlignedRead>(); // reads that don't perfectly match
|
||||||
ArrayList<Boolean> altAlignmentsToTest = new ArrayList<Boolean>();
|
LinkedList<Boolean> altAlignmentsToTest = new LinkedList<Boolean>(); // should we try to make an alt consensus from the corresponding read in altReads?
|
||||||
|
HashSet<String> priorIndelsToTest = new HashSet<String>(); // list of indels seen in the prior alignments to test (so we don't duplicate)
|
||||||
int totalMismatchSum = 0;
|
int totalMismatchSum = 0;
|
||||||
|
|
||||||
// decide which reads potentially need to be cleaned
|
// decide which reads potentially need to be cleaned
|
||||||
|
|
@ -236,11 +250,11 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
totalMismatchSum += mismatchScore;
|
totalMismatchSum += mismatchScore;
|
||||||
aRead.setMismatchScoreToReference(mismatchScore);
|
aRead.setMismatchScoreToReference(mismatchScore);
|
||||||
}
|
}
|
||||||
// otherwise, if it has an indel, let's see if that's the best consensus
|
// otherwise, if it has an indel, let's see if that's the best consensus (one instance per indel though)
|
||||||
else if ( numBlocks == 2 ) {
|
else if ( numBlocks == 2 && priorIndelsToTest.add(hashIndel(aRead))) {
|
||||||
aRead.doNotRealign();
|
aRead.doNotRealign();
|
||||||
altReads.add(aRead);
|
altReads.addFirst(aRead);
|
||||||
altAlignmentsToTest.add(true);
|
altAlignmentsToTest.addFirst(true);
|
||||||
}
|
}
|
||||||
// otherwise, we can emit it as is
|
// otherwise, we can emit it as is
|
||||||
else {
|
else {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue