Multiple instances of the same consensus were all living happily together in the set of alt consensuses. As the result, we have been taking considerable performance hit from trying to align all reads to those instances over and over again. Fixed. Only one copy of any given alt consensus is now stored.
in class Consensus: 1) use Arrays.equals() to compare java arrays!! 2) if object overrides equals() it also MUST provide appropriate hashCode() (thanks, Matt) As a side effect, a number of commented out debug prints are committed, still need them... git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2879 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1f673e9fab
commit
94d74d4f78
|
|
@ -108,6 +108,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
private FileWriter statsOutput = null;
|
||||
private FileWriter snpsOutput = null;
|
||||
|
||||
// private boolean DEBUG = true;
|
||||
|
||||
public void initialize() {
|
||||
|
||||
if ( LOD_THRESHOLD < 0.0 )
|
||||
|
|
@ -409,11 +411,17 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
|
||||
final AlignedRead aRead = new AlignedRead(read);
|
||||
|
||||
// if ( DEBUG ) System.out.println("\nREAD "+read.getReadName());
|
||||
|
||||
// first, move existing indels (for 1 indel reads only) to leftmost position within identical sequence
|
||||
int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read);
|
||||
if ( numBlocks == 2 ) {
|
||||
// if ( DEBUG ) System.out.println("Attempting to left-shift: read "+read.getReadName()+" alignment: "+read.getCigar().toString()+
|
||||
// " at "+read.getAlignmentStart()+"; ref chunk starts at "+leftmostIndex);
|
||||
|
||||
Cigar newCigar = indelRealignment(read.getCigar(), reference, read.getReadBases(), read.getAlignmentStart()-(int)leftmostIndex, 0);
|
||||
if ( aRead.setCigar(newCigar) ) {
|
||||
// if ( DEBUG ) System.out.println("CIGAR CHANGED to "+newCigar.toString()) ;
|
||||
leftMovedIndels.add(aRead);
|
||||
}
|
||||
}
|
||||
|
|
@ -431,7 +439,10 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
if ( numBlocks == 2 ) {
|
||||
Consensus c = createAlternateConsensus(aRead.getAlignmentStart() - (int)leftmostIndex, aRead.getCigar(), reference, aRead.getRead().getReadBases());
|
||||
if ( c == null ) {} //System.out.println("ERROR: Failed to create alt consensus for read "+aRead.getRead().getReadName());
|
||||
else altConsenses.add(c);
|
||||
else {
|
||||
// if ( DEBUG ) System.out.println("Adding consensus: read start "+aRead.getAlignmentStart()+" ref offset "+leftmostIndex+" read cigar "+aRead.getCigar());
|
||||
altConsenses.add(c);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// if ( debugOn ) System.out.println("Going to test...");
|
||||
|
|
@ -453,6 +464,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getRead().getReadBases());
|
||||
if ( c != null ) {
|
||||
// if ( debugOn ) System.out.println("NEW consensus generated by SW: "+c.str ) ;
|
||||
// if ( DEBUG ) System.out.println("Found SW consensus: ref starts at "+leftmostIndex+" alignment at "+swConsensus.getAlignmentStart2wrt1()+" cigar "+swConsensus.getCigar().toString());
|
||||
altConsenses.add(c);
|
||||
} else {
|
||||
// if ( debugOn ) System.out.println("FAILED to create Alt consensus from SW");
|
||||
|
|
@ -480,6 +492,16 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
while ( iter.hasNext() ) {
|
||||
Consensus consensus = iter.next();
|
||||
|
||||
// if ( DEBUG ) {
|
||||
// System.out.println("Checking consensus with alignment at "+consensus.positionOnReference+" cigar "+consensus.cigar);
|
||||
// System.out.println(new String(consensus.str));
|
||||
// int z = 0;
|
||||
// for ( ; z < consensus.positionOnReference; z++ ) System.out.print('.');
|
||||
// for ( z=0 ; z < consensus.cigar.getCigarElement(0).getLength() ; z++ ) System.out.print('.');
|
||||
// if ( consensus.cigar.getCigarElement(1).getOperator() == CigarOperator.I ) for ( z= 0; z < consensus.cigar.getCigarElement(1).getLength(); z++ ) System.out.print('I');
|
||||
// System.out.println();
|
||||
// }
|
||||
|
||||
// if ( debugOn ) System.out.println("Consensus: "+consensus.str);
|
||||
|
||||
for ( int j = 0; j < altReads.size(); j++ ) {
|
||||
|
|
@ -488,6 +510,17 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
|
||||
// the mismatch score is the min of its alignment vs. the reference and vs. the alternate
|
||||
int myScore = altAlignment.second;
|
||||
|
||||
// if ( DEBUG ) {
|
||||
// if ( toTest.getRead().getReadName().equals("302U3AAXX090430:1:35:429:1940#0") ) {
|
||||
// System.out.println("READ: new score="+myScore+"; old score="+toTest.getMismatchScoreToReference() );
|
||||
// }
|
||||
// if ( toTest.getRead().getReadName().equals("426RRAAXX090524:8:42:1730:53#0") ) {
|
||||
// System.out.println("READ: new score="+myScore+"; old score="+toTest.getMismatchScoreToReference() +" start at "+toTest.getAlignmentStart());
|
||||
// }
|
||||
// System.out.println(toTest.getRead().getReadName()+" old score="+toTest.getMismatchScoreToReference()+" new score="+myScore );
|
||||
//
|
||||
// }
|
||||
if ( myScore >= toTest.getMismatchScoreToReference() )
|
||||
myScore = toTest.getMismatchScoreToReference();
|
||||
// keep track of reads that align better to the alternate consensus.
|
||||
|
|
@ -505,12 +538,15 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
break;
|
||||
}
|
||||
|
||||
// if ( DEBUG ) System.out.println("Consensus score: "+consensus.mismatchSum);
|
||||
|
||||
//logger.debug(consensus.str + " " + consensus.mismatchSum);
|
||||
if ( bestConsensus == null || bestConsensus.mismatchSum > consensus.mismatchSum) {
|
||||
// we do not need this alt consensus, release memory right away!!
|
||||
if ( bestConsensus != null )
|
||||
bestConsensus.readIndexes.clear();
|
||||
bestConsensus = consensus;
|
||||
// if ( DEBUG ) System.out.println("Current consensus is better!");
|
||||
//logger.debug(consensus.str + " " + consensus.mismatchSum);
|
||||
} else {
|
||||
// we do not need this alt consensus, release memory right away!!
|
||||
|
|
@ -957,6 +993,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
int indelIndexOnRead = readIndex+ce1.getLength(); // position of the indel on the READ (first insterted base, of first base after deletion)
|
||||
|
||||
byte[] indelString = new byte[ce2.getLength()]; // inserted or deleted sequence
|
||||
|
||||
if ( ce2.getOperator() == CigarOperator.D )
|
||||
System.arraycopy(refSeq, indelIndexOnRef, indelString, 0, ce2.getLength());
|
||||
else if ( ce2.getOperator() == CigarOperator.I )
|
||||
|
|
@ -966,6 +1003,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
// for now, we'll just punt the issue and not try to realign these
|
||||
return cigar;
|
||||
|
||||
// if ( DEBUG ) System.out.println("Indel sequence: "+new String(indelString));
|
||||
|
||||
// now we have to check all WHOLE periods of the indel sequence:
|
||||
// for instance, if
|
||||
// REF: AGCTATATATAGCC
|
||||
|
|
@ -985,11 +1024,13 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
// position, we will move insertion left, to the position right after CA. This way, while moving the indel across the repeat
|
||||
// on the ref, we can theoretically move it across a non-repeat on the read if the latter has a mismtach.
|
||||
|
||||
// if ( DEBUG ) System.out.println("Starting with "+indelIndexOnRef+" on the ref");
|
||||
while ( period < indel_length ) { // we will always get at least trivial period = indelStringLength
|
||||
|
||||
period = BaseUtils.sequencePeriod(indelString, period+1);
|
||||
|
||||
if ( indel_length % period != 0 ) continue; // if indel sequence length is not a multiple of the period, it's not gonna work
|
||||
// if ( DEBUG ) System.out.println("Checking period of "+period);
|
||||
|
||||
int newIndex = indelIndexOnRef;
|
||||
|
||||
|
|
@ -1007,9 +1048,14 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
break;
|
||||
}
|
||||
}
|
||||
if ( match )
|
||||
if ( match ) {
|
||||
// if ( DEBUG ) System.out.println("Success! Moving left by "+period);
|
||||
newIndex -= period; // yes, they are the same, we can move indel farther left by at least period bases, go check if we can do more...
|
||||
else break; // oops, no match, can not push indel farther left
|
||||
}
|
||||
else {
|
||||
// if ( DEBUG ) System.out.println("Can not push further left with period "+period+"; adjusted position: "+newIndex);
|
||||
break; // oops, no match, can not push indel farther left
|
||||
}
|
||||
}
|
||||
|
||||
final int newDifference = indelIndexOnRef - newIndex;
|
||||
|
|
@ -1025,6 +1071,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
|
||||
if ( difference > 0 ) {
|
||||
|
||||
// if ( DEBUG ) System.out.println("Best offset found: "+difference);
|
||||
|
||||
// The following if() statement: this should've never happened, unless the alignment is really screwed up.
|
||||
// A real life example:
|
||||
//
|
||||
|
|
@ -1181,12 +1229,18 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
readIndexes = new ArrayList<Pair<Integer, Integer>>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
return ( this == o || (o instanceof Consensus && this.str.equals(((Consensus)o).str)) );
|
||||
return ( this == o || (o instanceof Consensus && Arrays.equals(this.str,(((Consensus)o).str)) ) );
|
||||
}
|
||||
|
||||
public boolean equals(Consensus c) {
|
||||
return ( this == c || this.str.equals(c.str) );
|
||||
return ( this == c || Arrays.equals(this.str,c.str) ) ;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Arrays.hashCode(this.str);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue