Fixed a bug that accounted for a bunch of my remaining mis-cleaned indels.
Also, slightly optimized the cleaner by using readBases (instead of readString) and caching cigar element lengths. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2419 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b780ffb34a
commit
87e5a41964
|
|
@ -75,8 +75,6 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
readsToWrite = new TreeSet<ComparableSAMRecord>();
|
readsToWrite = new TreeSet<ComparableSAMRecord>();
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info("Writing into output BAM file");
|
|
||||||
logger.info("Temporary space used: "+System.getProperty("java.io.tmpdir"));
|
|
||||||
generator = new Random(RANDOM_SEED);
|
generator = new Random(RANDOM_SEED);
|
||||||
|
|
||||||
if ( OUT_INDELS != null ) {
|
if ( OUT_INDELS != null ) {
|
||||||
|
|
@ -190,20 +188,20 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
|
|
||||||
|
|
||||||
private static int mismatchQualitySumIgnoreCigar(AlignedRead aRead, String refSeq, int refIndex) {
|
private static int mismatchQualitySumIgnoreCigar(AlignedRead aRead, String refSeq, int refIndex) {
|
||||||
String readSeq = aRead.getReadString();
|
byte[] readSeq = aRead.getRead().getReadBases();
|
||||||
String quals = aRead.getBaseQualityString();
|
byte[] quals = aRead.getRead().getBaseQualities();
|
||||||
int sum = 0;
|
int sum = 0;
|
||||||
for (int readIndex = 0 ; readIndex < readSeq.length() ; refIndex++, readIndex++ ) {
|
for (int readIndex = 0 ; readIndex < readSeq.length ; refIndex++, readIndex++ ) {
|
||||||
if ( refIndex >= refSeq.length() )
|
if ( refIndex >= refSeq.length() )
|
||||||
sum += MAX_QUAL;
|
sum += MAX_QUAL;
|
||||||
else {
|
else {
|
||||||
char refChr = refSeq.charAt(refIndex);
|
char refChr = refSeq.charAt(refIndex);
|
||||||
char readChr = readSeq.charAt(readIndex);
|
char readChr = (char)readSeq[readIndex];
|
||||||
if ( BaseUtils.simpleBaseToBaseIndex(readChr) == -1 ||
|
if ( BaseUtils.simpleBaseToBaseIndex(readChr) == -1 ||
|
||||||
BaseUtils.simpleBaseToBaseIndex(refChr) == -1 )
|
BaseUtils.simpleBaseToBaseIndex(refChr) == -1 )
|
||||||
continue; // do not count Ns/Xs/etc ?
|
continue; // do not count Ns/Xs/etc ?
|
||||||
if ( Character.toUpperCase(readChr) != Character.toUpperCase(refChr) )
|
if ( Character.toUpperCase(readChr) != Character.toUpperCase(refChr) )
|
||||||
sum += (int)quals.charAt(readIndex) - 33;
|
sum += (int)quals[readIndex];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return sum;
|
return sum;
|
||||||
|
|
@ -263,7 +261,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
aRead.setMismatchScoreToReference(mismatchScore);
|
aRead.setMismatchScoreToReference(mismatchScore);
|
||||||
// if it has an indel, let's see if that's the best consensus
|
// if it has an indel, let's see if that's the best consensus
|
||||||
if ( numBlocks == 2 )
|
if ( numBlocks == 2 )
|
||||||
altConsenses.add(createAlternateConsensus(aRead.getAlignmentStart() - (int)leftmostIndex, aRead.getCigar(), reference, aRead.getReadString()));
|
altConsenses.add(createAlternateConsensus(aRead.getAlignmentStart() - (int)leftmostIndex, aRead.getCigar(), reference, aRead.getRead().getReadBases()));
|
||||||
else {
|
else {
|
||||||
// if ( debugOn ) System.out.println("Going to test...");
|
// if ( debugOn ) System.out.println("Going to test...");
|
||||||
altAlignmentsToTest.add(aRead);
|
altAlignmentsToTest.add(aRead);
|
||||||
|
|
@ -280,8 +278,8 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
if ( altAlignmentsToTest.size() <= MAX_READS_FOR_CONSENSUSES ) {
|
if ( altAlignmentsToTest.size() <= MAX_READS_FOR_CONSENSUSES ) {
|
||||||
for ( AlignedRead aRead : altAlignmentsToTest ) {
|
for ( AlignedRead aRead : altAlignmentsToTest ) {
|
||||||
// do a pairwise alignment against the reference
|
// do a pairwise alignment against the reference
|
||||||
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getReadString(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
|
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getRead().getReadString(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
|
||||||
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getReadString());
|
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getRead().getReadBases());
|
||||||
if ( c != null) {
|
if ( c != null) {
|
||||||
// if ( debugOn ) System.out.println("NEW consensus generated by SW: "+c.str ) ;
|
// if ( debugOn ) System.out.println("NEW consensus generated by SW: "+c.str ) ;
|
||||||
altConsenses.add(c);
|
altConsenses.add(c);
|
||||||
|
|
@ -296,8 +294,8 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
int index = generator.nextInt(altAlignmentsToTest.size());
|
int index = generator.nextInt(altAlignmentsToTest.size());
|
||||||
AlignedRead aRead = altAlignmentsToTest.remove(index);
|
AlignedRead aRead = altAlignmentsToTest.remove(index);
|
||||||
// do a pairwise alignment against the reference
|
// do a pairwise alignment against the reference
|
||||||
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getReadString(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
|
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getRead().getReadString(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
|
||||||
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getReadString());
|
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getRead().getReadBases());
|
||||||
if ( c != null)
|
if ( c != null)
|
||||||
altConsenses.add(c);
|
altConsenses.add(c);
|
||||||
}
|
}
|
||||||
|
|
@ -326,14 +324,14 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
else
|
else
|
||||||
consensus.readIndexes.add(new Pair<Integer, Integer>(j, altAlignment.first));
|
consensus.readIndexes.add(new Pair<Integer, Integer>(j, altAlignment.first));
|
||||||
|
|
||||||
logger.debug(consensus.str + " vs. " + toTest.getReadString() + " => " + myScore + " - " + altAlignment.first);
|
//logger.debug(consensus.str + " vs. " + toTest.getRead().getReadString() + " => " + myScore + " - " + altAlignment.first);
|
||||||
consensus.mismatchSum += myScore;
|
consensus.mismatchSum += myScore;
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.debug(consensus.str + " " + consensus.mismatchSum);
|
//logger.debug(consensus.str + " " + consensus.mismatchSum);
|
||||||
if ( bestConsensus == null || bestConsensus.mismatchSum > consensus.mismatchSum) {
|
if ( bestConsensus == null || bestConsensus.mismatchSum > consensus.mismatchSum) {
|
||||||
bestConsensus = consensus;
|
bestConsensus = consensus;
|
||||||
logger.debug(consensus.str + " " + consensus.mismatchSum);
|
//logger.debug(consensus.str + " " + consensus.mismatchSum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -349,7 +347,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
AlignedRead aRead = altReads.get(indexPair.first);
|
AlignedRead aRead = altReads.get(indexPair.first);
|
||||||
updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.second, aRead, (int)leftmostIndex);
|
updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.second, aRead, (int)leftmostIndex);
|
||||||
}
|
}
|
||||||
if( !alternateReducesEntropy(altReads, reference, leftmostIndex) ) {
|
if ( !alternateReducesEntropy(altReads, reference, leftmostIndex) ) {
|
||||||
if ( statsOutput != null ) {
|
if ( statsOutput != null ) {
|
||||||
try {
|
try {
|
||||||
statsOutput.write(interval.toString());
|
statsOutput.write(interval.toString());
|
||||||
|
|
@ -360,11 +358,11 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
} catch (Exception e) {}
|
} catch (Exception e) {}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
logger.debug("CLEAN: " + AlignmentUtils.cigarToString(bestConsensus.cigar) + " " + bestConsensus.str );
|
//logger.debug("CLEAN: " + AlignmentUtils.cigarToString(bestConsensus.cigar) + " " + bestConsensus.str );
|
||||||
if ( indelOutput != null && bestConsensus.cigar.numCigarElements() > 1 ) {
|
if ( indelOutput != null && bestConsensus.cigar.numCigarElements() > 1 ) {
|
||||||
// NOTE: indels are printed out in the format specified for the low-coverage pilot1
|
// NOTE: indels are printed out in the format specified for the low-coverage pilot1
|
||||||
// indel calls (tab-delimited): chr position size type sequence
|
// indel calls (tab-delimited): chr position size type sequence
|
||||||
StringBuffer str = new StringBuffer();
|
StringBuilder str = new StringBuilder();
|
||||||
str.append(reads.get(0).getReferenceName());
|
str.append(reads.get(0).getReferenceName());
|
||||||
int position = bestConsensus.positionOnReference + bestConsensus.cigar.getCigarElement(0).getLength();
|
int position = bestConsensus.positionOnReference + bestConsensus.cigar.getCigarElement(0).getLength();
|
||||||
str.append("\t" + (leftmostIndex + position - 1));
|
str.append("\t" + (leftmostIndex + position - 1));
|
||||||
|
|
@ -436,14 +434,14 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Consensus createAlternateConsensus(int indexOnRef, Cigar c, String reference, String readStr) {
|
private Consensus createAlternateConsensus(int indexOnRef, Cigar c, String reference, byte[] readStr) {
|
||||||
if ( indexOnRef < 0 )
|
if ( indexOnRef < 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// create the new consensus
|
// create the new consensus
|
||||||
StringBuffer sb = new StringBuffer();
|
StringBuilder sb = new StringBuilder();
|
||||||
sb.append(reference.substring(0, indexOnRef));
|
sb.append(reference.substring(0, indexOnRef));
|
||||||
logger.debug("CIGAR = " + AlignmentUtils.cigarToString(c));
|
//logger.debug("CIGAR = " + AlignmentUtils.cigarToString(c));
|
||||||
|
|
||||||
int indelCount = 0;
|
int indelCount = 0;
|
||||||
int altIdx = 0;
|
int altIdx = 0;
|
||||||
|
|
@ -451,22 +449,24 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
boolean ok_flag = true;
|
boolean ok_flag = true;
|
||||||
for ( int i = 0 ; i < c.numCigarElements() ; i++ ) {
|
for ( int i = 0 ; i < c.numCigarElements() ; i++ ) {
|
||||||
CigarElement ce = c.getCigarElement(i);
|
CigarElement ce = c.getCigarElement(i);
|
||||||
|
int elementLength = ce.getLength();
|
||||||
switch( ce.getOperator() ) {
|
switch( ce.getOperator() ) {
|
||||||
case D:
|
case D:
|
||||||
indelCount++;
|
indelCount++;
|
||||||
refIdx += ce.getLength();
|
refIdx += elementLength;
|
||||||
break;
|
break;
|
||||||
case M:
|
case M:
|
||||||
if ( reference.length() < refIdx+ce.getLength() )
|
if ( reference.length() < refIdx + elementLength )
|
||||||
ok_flag = false;
|
ok_flag = false;
|
||||||
else
|
else
|
||||||
sb.append(reference.substring(refIdx, refIdx+ce.getLength()));
|
sb.append(reference.substring(refIdx, refIdx + elementLength));
|
||||||
refIdx += ce.getLength();
|
refIdx += elementLength;
|
||||||
altIdx += ce.getLength();
|
altIdx += elementLength;
|
||||||
break;
|
break;
|
||||||
case I:
|
case I:
|
||||||
sb.append(readStr.substring(altIdx, altIdx+ce.getLength()));
|
for (int j = 0; j < elementLength; j++)
|
||||||
altIdx += ce.getLength();
|
sb.append((char)readStr[altIdx + j]);
|
||||||
|
altIdx += elementLength;
|
||||||
indelCount++;
|
indelCount++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -582,9 +582,12 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
private boolean alternateReducesEntropy(List<AlignedRead> reads, String reference, long leftmostIndex) {
|
private boolean alternateReducesEntropy(List<AlignedRead> reads, String reference, long leftmostIndex) {
|
||||||
int[] originalMismatchBases = new int[reference.length()];
|
int[] originalMismatchBases = new int[reference.length()];
|
||||||
int[] cleanedMismatchBases = new int[reference.length()];
|
int[] cleanedMismatchBases = new int[reference.length()];
|
||||||
int[] totalBases = new int[reference.length()];
|
int[] totalOriginalBases = new int[reference.length()];
|
||||||
|
int[] totalCleanedBases = new int[reference.length()];
|
||||||
|
|
||||||
|
// set to 1 to prevent dividing by zero
|
||||||
for ( int i=0; i < reference.length(); i++ )
|
for ( int i=0; i < reference.length(); i++ )
|
||||||
originalMismatchBases[i] = totalBases[i] = 0;
|
originalMismatchBases[i] = totalOriginalBases[i] = cleanedMismatchBases[i] = totalCleanedBases[i] = 1;
|
||||||
|
|
||||||
for (int i=0; i < reads.size(); i++) {
|
for (int i=0; i < reads.size(); i++) {
|
||||||
AlignedRead read = reads.get(i);
|
AlignedRead read = reads.get(i);
|
||||||
|
|
@ -592,18 +595,18 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
int refIdx = read.getOriginalAlignmentStart() - (int)leftmostIndex;
|
int refIdx = read.getOriginalAlignmentStart() - (int)leftmostIndex;
|
||||||
String readStr = read.getReadString();
|
byte[] readStr = read.getRead().getReadBases();
|
||||||
String qualStr = read.getBaseQualityString();
|
byte[] quals = read.getRead().getBaseQualities();
|
||||||
|
|
||||||
for (int j=0; j < readStr.length(); j++, refIdx++ ) {
|
for (int j=0; j < readStr.length; j++, refIdx++ ) {
|
||||||
if ( refIdx < 0 || refIdx >= reference.length() ) {
|
if ( refIdx < 0 || refIdx >= reference.length() ) {
|
||||||
//System.out.println( "Read: "+read.getRead().getReadName() + "; length = " + readStr.length() );
|
//System.out.println( "Read: "+read.getRead().getReadName() + "; length = " + readStr.length() );
|
||||||
//System.out.println( "Ref left: "+ leftmostIndex +"; ref length=" + reference.length() + "; read alignment start: "+read.getOriginalAlignmentStart() );
|
//System.out.println( "Ref left: "+ leftmostIndex +"; ref length=" + reference.length() + "; read alignment start: "+read.getOriginalAlignmentStart() );
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
totalBases[refIdx] += (int)qualStr.charAt(j) - 33;
|
totalOriginalBases[refIdx] += quals[j];
|
||||||
if ( Character.toUpperCase(readStr.charAt(j)) != Character.toUpperCase(reference.charAt(refIdx)) )
|
if ( Character.toUpperCase((char)readStr[j]) != Character.toUpperCase(reference.charAt(refIdx)) )
|
||||||
originalMismatchBases[refIdx] += (int)qualStr.charAt(j) - 33;
|
originalMismatchBases[refIdx] += quals[j];
|
||||||
}
|
}
|
||||||
|
|
||||||
// reset and now do the calculation based on the cleaning
|
// reset and now do the calculation based on the cleaning
|
||||||
|
|
@ -612,18 +615,22 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
Cigar c = read.getCigar();
|
Cigar c = read.getCigar();
|
||||||
for (int j = 0 ; j < c.numCigarElements() ; j++) {
|
for (int j = 0 ; j < c.numCigarElements() ; j++) {
|
||||||
CigarElement ce = c.getCigarElement(j);
|
CigarElement ce = c.getCigarElement(j);
|
||||||
|
int elementLength = ce.getLength();
|
||||||
switch ( ce.getOperator() ) {
|
switch ( ce.getOperator() ) {
|
||||||
case M:
|
case M:
|
||||||
for (int k = 0 ; k < ce.getLength() ; k++, refIdx++, altIdx++ ) {
|
for (int k = 0 ; k < elementLength ; k++, refIdx++, altIdx++ ) {
|
||||||
if ( refIdx < reference.length() && Character.toUpperCase(readStr.charAt(altIdx)) != Character.toUpperCase(reference.charAt(refIdx)) )
|
if ( refIdx >= reference.length() )
|
||||||
cleanedMismatchBases[refIdx] += (int)qualStr.charAt(altIdx) - 33;
|
break;
|
||||||
|
totalCleanedBases[refIdx] += quals[altIdx];
|
||||||
|
if ( Character.toUpperCase((char)readStr[altIdx]) != Character.toUpperCase(reference.charAt(refIdx)) )
|
||||||
|
cleanedMismatchBases[refIdx] += quals[altIdx];
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case I:
|
case I:
|
||||||
altIdx += ce.getLength();
|
altIdx += elementLength;
|
||||||
break;
|
break;
|
||||||
case D:
|
case D:
|
||||||
refIdx += ce.getLength();
|
refIdx += elementLength;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -631,19 +638,19 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
}
|
}
|
||||||
|
|
||||||
int originalMismatchColumns = 0, cleanedMismatchColumns = 0;
|
int originalMismatchColumns = 0, cleanedMismatchColumns = 0;
|
||||||
StringBuffer sb = new StringBuffer();
|
StringBuilder sb = new StringBuilder();
|
||||||
for ( int i=0; i < reference.length(); i++ ) {
|
for ( int i=0; i < reference.length(); i++ ) {
|
||||||
if ( cleanedMismatchBases[i] == originalMismatchBases[i] )
|
if ( cleanedMismatchBases[i] == originalMismatchBases[i] )
|
||||||
continue;
|
continue;
|
||||||
boolean didMismatch = false, stillMismatches = false;
|
boolean didMismatch = false, stillMismatches = false;
|
||||||
if ( originalMismatchBases[i] > totalBases[i] * MISMATCH_THRESHOLD ) {
|
if ( originalMismatchBases[i] > totalOriginalBases[i] * MISMATCH_THRESHOLD ) {
|
||||||
didMismatch = true;
|
didMismatch = true;
|
||||||
originalMismatchColumns++;
|
originalMismatchColumns++;
|
||||||
if ( cleanedMismatchBases[i] > originalMismatchBases[i] * (1.0 - MISMATCH_COLUMN_CLEANED_FRACTION) ) {
|
if ( (cleanedMismatchBases[i] / totalCleanedBases[i]) > (originalMismatchBases[i] / totalOriginalBases[i]) * (1.0 - MISMATCH_COLUMN_CLEANED_FRACTION) ) {
|
||||||
stillMismatches = true;
|
stillMismatches = true;
|
||||||
cleanedMismatchColumns++;
|
cleanedMismatchColumns++;
|
||||||
}
|
}
|
||||||
} else if ( cleanedMismatchBases[i] > totalBases[i] * MISMATCH_THRESHOLD ) {
|
} else if ( cleanedMismatchBases[i] > totalCleanedBases[i] * MISMATCH_THRESHOLD ) {
|
||||||
cleanedMismatchColumns++;
|
cleanedMismatchColumns++;
|
||||||
}
|
}
|
||||||
if ( snpsOutput != null ) {
|
if ( snpsOutput != null ) {
|
||||||
|
|
@ -658,7 +665,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.debug("Original mismatch columns = " + originalMismatchColumns + "; cleaned mismatch columns = " + cleanedMismatchColumns);
|
//logger.debug("Original mismatch columns = " + originalMismatchColumns + "; cleaned mismatch columns = " + cleanedMismatchColumns);
|
||||||
|
|
||||||
boolean reduces = (originalMismatchColumns == 0 || cleanedMismatchColumns < originalMismatchColumns);
|
boolean reduces = (originalMismatchColumns == 0 || cleanedMismatchColumns < originalMismatchColumns);
|
||||||
if ( reduces && snpsOutput != null ) {
|
if ( reduces && snpsOutput != null ) {
|
||||||
|
|
@ -774,11 +781,8 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
newCigar.add(new CigarElement(ce1.getLength()-difference, CigarOperator.M));
|
newCigar.add(new CigarElement(ce1.getLength()-difference, CigarOperator.M));
|
||||||
newCigar.add(ce2);
|
newCigar.add(ce2);
|
||||||
newCigar.add(new CigarElement(cigar.getCigarElement(2).getLength()+difference, CigarOperator.M));
|
newCigar.add(new CigarElement(cigar.getCigarElement(2).getLength()+difference, CigarOperator.M));
|
||||||
// System.out.println(" FROM:\n"+AlignmentUtils.alignmentToString(cigar,readSeq,refSeq,refIndex));
|
|
||||||
// if ( ce2.getLength() >=2 )
|
|
||||||
// System.out.println(" REALIGNED TO:\n"+AlignmentUtils.alignmentToString(newCigar,readSeq,refSeq,refIndex,readIndex)+"\n");
|
|
||||||
|
|
||||||
logger.debug("Realigning indel: " + AlignmentUtils.cigarToString(cigar) + " to " + AlignmentUtils.cigarToString(newCigar));
|
//logger.debug("Realigning indel: " + AlignmentUtils.cigarToString(cigar) + " to " + AlignmentUtils.cigarToString(newCigar));
|
||||||
cigar = newCigar;
|
cigar = newCigar;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -801,10 +805,6 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
return read;
|
return read;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getReadString() {
|
|
||||||
return read.getReadString();
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getReadLength() {
|
public int getReadLength() {
|
||||||
return read.getReadLength();
|
return read.getReadLength();
|
||||||
}
|
}
|
||||||
|
|
@ -875,10 +875,6 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
return updated;
|
return updated;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getBaseQualityString() {
|
|
||||||
return read.getBaseQualityString();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setMismatchScoreToReference(int score) {
|
public void setMismatchScoreToReference(int score) {
|
||||||
mismatchScoreToReference = score;
|
mismatchScoreToReference = score;
|
||||||
}
|
}
|
||||||
|
|
@ -911,110 +907,4 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
return ( this == c || this.str.equals(c.str) );
|
return ( this == c || this.str.equals(c.str) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void testCleanWithInsertion() {
|
|
||||||
String reference = "AAAAAACCCCCCAAAAAA";
|
|
||||||
// the alternate reference is: "AAAAAACCCTTCCCAAAAAA";
|
|
||||||
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
|
|
||||||
SAMFileHeader header = getToolkit().getSAMFileHeader();
|
|
||||||
SAMRecord r1 = new SAMRecord(header);
|
|
||||||
r1.setReadName("1");
|
|
||||||
r1.setReadString("AACCCCCC");
|
|
||||||
r1.setAlignmentStart(4);
|
|
||||||
r1.setBaseQualityString("BBBBBBBB");
|
|
||||||
SAMRecord r2 = new SAMRecord(header);
|
|
||||||
r2.setReadName("2");
|
|
||||||
r2.setReadString("AAAACCCT");
|
|
||||||
r2.setAlignmentStart(2);
|
|
||||||
r2.setBaseQualityString("BBBBBBBB");
|
|
||||||
SAMRecord r3 = new SAMRecord(header);
|
|
||||||
r3.setReadName("3");
|
|
||||||
r3.setReadString("CTTC");
|
|
||||||
r3.setAlignmentStart(10);
|
|
||||||
r3.setBaseQualityString("BBBB");
|
|
||||||
SAMRecord r4 = new SAMRecord(header);
|
|
||||||
r4.setReadName("4");
|
|
||||||
r4.setReadString("TCCCAA");
|
|
||||||
r4.setAlignmentStart(8);
|
|
||||||
r4.setBaseQualityString("BBBBBB");
|
|
||||||
SAMRecord r5 = new SAMRecord(header);
|
|
||||||
r5.setReadName("5");
|
|
||||||
r5.setReadString("AAAGAACC");
|
|
||||||
r5.setAlignmentStart(0);
|
|
||||||
r5.setBaseQualityString("BBBBBBBB");
|
|
||||||
SAMRecord r6 = new SAMRecord(header);
|
|
||||||
r6.setReadName("6");
|
|
||||||
r6.setReadString("CCAAAGAA");
|
|
||||||
r6.setAlignmentStart(10);
|
|
||||||
r6.setBaseQualityString("BBBBBBBB");
|
|
||||||
SAMRecord r7 = new SAMRecord(header);
|
|
||||||
r7.setReadName("7");
|
|
||||||
r7.setReadString("AACCCTTCCC");
|
|
||||||
r7.setAlignmentStart(4);
|
|
||||||
r7.setBaseQualityString("BBBBBBBBBB");
|
|
||||||
reads.add(r1);
|
|
||||||
reads.add(r2);
|
|
||||||
reads.add(r3);
|
|
||||||
reads.add(r4);
|
|
||||||
reads.add(r5);
|
|
||||||
reads.add(r6);
|
|
||||||
reads.add(r7);
|
|
||||||
clean(reads, reference, GenomeLocParser.createGenomeLoc(0,0));
|
|
||||||
}
|
|
||||||
|
|
||||||
private void testCleanWithDeletion() {
|
|
||||||
String reference = "AAAAAACCCTTCCCAAAAAA";
|
|
||||||
// the alternate reference is: "AAAAAACCCCCCAAAAAA";
|
|
||||||
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
|
|
||||||
SAMFileHeader header = getToolkit().getSAMFileHeader();
|
|
||||||
SAMRecord r1 = new SAMRecord(header);
|
|
||||||
r1.setReadName("1");
|
|
||||||
r1.setReadString("ACCCTTCC");
|
|
||||||
r1.setAlignmentStart(5);
|
|
||||||
r1.setBaseQualityString("BBBBBBBB");
|
|
||||||
SAMRecord r2 = new SAMRecord(header);
|
|
||||||
r2.setReadName("2");
|
|
||||||
r2.setReadString("AAAACCCC");
|
|
||||||
r2.setAlignmentStart(2);
|
|
||||||
r2.setBaseQualityString("BBBBBBBB");
|
|
||||||
SAMRecord r3 = new SAMRecord(header);
|
|
||||||
r3.setReadName("3");
|
|
||||||
r3.setReadString("CCCC");
|
|
||||||
r3.setAlignmentStart(6);
|
|
||||||
r3.setBaseQualityString("BBBB");
|
|
||||||
SAMRecord r4 = new SAMRecord(header);
|
|
||||||
r4.setReadName("4");
|
|
||||||
r4.setReadString("CCCCAA");
|
|
||||||
r4.setAlignmentStart(10);
|
|
||||||
r4.setBaseQualityString("BBBBBB");
|
|
||||||
SAMRecord r5 = new SAMRecord(header);
|
|
||||||
r5.setReadName("5");
|
|
||||||
r5.setReadString("AAAGAACC");
|
|
||||||
r5.setAlignmentStart(0);
|
|
||||||
r5.setBaseQualityString("BBBBBBBB");
|
|
||||||
SAMRecord r6 = new SAMRecord(header);
|
|
||||||
r6.setReadName("6");
|
|
||||||
r6.setReadString("CCAAAGAA");
|
|
||||||
r6.setAlignmentStart(10);
|
|
||||||
r6.setBaseQualityString("BBBBBBBB");
|
|
||||||
SAMRecord r7 = new SAMRecord(header);
|
|
||||||
r7.setReadName("7");
|
|
||||||
r7.setReadString("AAAACCCG");
|
|
||||||
r7.setAlignmentStart(2);
|
|
||||||
r7.setBaseQualityString("BBBBBBBB");
|
|
||||||
SAMRecord r8 = new SAMRecord(header);
|
|
||||||
r8.setReadName("8");
|
|
||||||
r8.setReadString("AACCCCCC");
|
|
||||||
r8.setAlignmentStart(4);
|
|
||||||
r8.setBaseQualityString("BBBBBBBB");
|
|
||||||
reads.add(r1);
|
|
||||||
reads.add(r2);
|
|
||||||
reads.add(r3);
|
|
||||||
reads.add(r4);
|
|
||||||
reads.add(r5);
|
|
||||||
reads.add(r6);
|
|
||||||
reads.add(r7);
|
|
||||||
reads.add(r8);
|
|
||||||
clean(reads, reference, GenomeLocParser.createGenomeLoc(0,0));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -9,21 +9,21 @@ public class IntervalCleanerIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testIntervals() {
|
public void testIntervals() {
|
||||||
|
|
||||||
String[] md5lod5 = {"163d2f1b04741986151636d3c7e04c94", "4aa3637e86822c95af3e2c9b414530c3"};
|
String[] md5lod5 = {"042c1c2649a51a260bc204ec5f256c1a", "460631e8d98644dcf53b3045ca40f02a"};
|
||||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||||
"-T IntervalCleaner -LOD 5 -maxConsensuses 100 -greedy 100 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chrom1.SLX.SRP000032.2009_06.bam -L /humgen/gsa-scr1/GATK_Data/Validation_Data/cleaner.test.intervals --OutputCleaned %s -snps %s",
|
"-T IntervalCleaner -LOD 5 -maxConsensuses 100 -greedy 100 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chrom1.SLX.SRP000032.2009_06.bam -L /humgen/gsa-scr1/GATK_Data/Validation_Data/cleaner.test.intervals --OutputCleaned %s -snps %s",
|
||||||
2,
|
2,
|
||||||
Arrays.asList(md5lod5));
|
Arrays.asList(md5lod5));
|
||||||
executeTest("testLod5", spec1);
|
executeTest("testLod5", spec1);
|
||||||
|
|
||||||
String[] md5lod200 = {"1d89ee2af03df79eb5de494c77767221", "e39aa718c9810364ebe30964d878d5ff"};
|
String[] md5lod200 = {"1d89ee2af03df79eb5de494c77767221", "6137bf0c25c7972b07b0d3fc6979cf5b"};
|
||||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||||
"-T IntervalCleaner -LOD 200 -maxConsensuses 100 -greedy 100 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chrom1.SLX.SRP000032.2009_06.bam -L /humgen/gsa-scr1/GATK_Data/Validation_Data/cleaner.test.intervals --OutputCleaned %s -snps %s",
|
"-T IntervalCleaner -LOD 200 -maxConsensuses 100 -greedy 100 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chrom1.SLX.SRP000032.2009_06.bam -L /humgen/gsa-scr1/GATK_Data/Validation_Data/cleaner.test.intervals --OutputCleaned %s -snps %s",
|
||||||
2,
|
2,
|
||||||
Arrays.asList(md5lod200));
|
Arrays.asList(md5lod200));
|
||||||
executeTest("testLod200", spec2);
|
executeTest("testLod200", spec2);
|
||||||
|
|
||||||
String[] md5cleanedOnly = {"710f9114ec496c73f1c3782b5cb09757", "4aa3637e86822c95af3e2c9b414530c3"};
|
String[] md5cleanedOnly = {"168c8d02fceb107477381b93d189fe1f", "460631e8d98644dcf53b3045ca40f02a"};
|
||||||
WalkerTestSpec spec3 = new WalkerTestSpec(
|
WalkerTestSpec spec3 = new WalkerTestSpec(
|
||||||
"-T IntervalCleaner -LOD 5 -cleanedOnly -maxConsensuses 100 -greedy 100 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chrom1.SLX.SRP000032.2009_06.bam -L /humgen/gsa-scr1/GATK_Data/Validation_Data/cleaner.test.intervals --OutputCleaned %s -snps %s",
|
"-T IntervalCleaner -LOD 5 -cleanedOnly -maxConsensuses 100 -greedy 100 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chrom1.SLX.SRP000032.2009_06.bam -L /humgen/gsa-scr1/GATK_Data/Validation_Data/cleaner.test.intervals --OutputCleaned %s -snps %s",
|
||||||
2,
|
2,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue