The new cleaner can now use known indels to create alternate consenses for cleaning.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2816 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0250338ce7
commit
6652b992f7
|
|
@ -151,20 +151,22 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
generator = new Random(RANDOM_SEED);
|
generator = new Random(RANDOM_SEED);
|
||||||
|
|
||||||
// set up the rods (since this is a ReadWalker we don't get rods from the traversal)
|
// set up the rods (since this is a ReadWalker we don't get rods from the traversal)
|
||||||
|
logger.info("Reading and parsing known indel rod files...");
|
||||||
List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods = new ArrayList<ReferenceOrderedData<? extends ReferenceOrderedDatum>>();
|
List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods = new ArrayList<ReferenceOrderedData<? extends ReferenceOrderedDatum>>();
|
||||||
ReferenceOrderedData.parseBindings(knownIndels, rods);
|
ReferenceOrderedData.parseBindings(knownIndels, rods);
|
||||||
for ( ReferenceOrderedData<? extends ReferenceOrderedDatum> rod : rods ) {
|
for ( ReferenceOrderedData<? extends ReferenceOrderedDatum> rod : rods ) {
|
||||||
if ( !(rod instanceof VariationRod) )
|
if ( !VariationRod.class.isAssignableFrom(rod.getType()) )
|
||||||
continue;
|
continue;
|
||||||
SeekableRODIterator<? extends ReferenceOrderedDatum> iter = rod.iterator();
|
SeekableRODIterator<? extends ReferenceOrderedDatum> iter = rod.iterator();
|
||||||
while ( iter.hasNext() ) {
|
while ( iter.hasNext() ) {
|
||||||
RODRecordList<? extends ReferenceOrderedDatum> records = iter.next();
|
RODRecordList<? extends ReferenceOrderedDatum> records = iter.next();
|
||||||
for ( ReferenceOrderedDatum record : records ) {
|
for ( ReferenceOrderedDatum record : records ) {
|
||||||
if (((VariationRod)record).isIndel())
|
if ( ((VariationRod)record).isIndel() )
|
||||||
knownIndelsToTry.add((VariationRod)record);
|
knownIndelsToTry.add((VariationRod)record);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
logger.info("Finished reading and parsing known indel rod files");
|
||||||
|
|
||||||
if ( OUT_INDELS != null ) {
|
if ( OUT_INDELS != null ) {
|
||||||
try {
|
try {
|
||||||
|
|
@ -251,6 +253,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
readLoc = GenomeLocParser.createGenomeLoc(readLoc.getContig(), readLoc.getStart(), readLoc.getStart());
|
readLoc = GenomeLocParser.createGenomeLoc(readLoc.getContig(), readLoc.getStart(), readLoc.getStart());
|
||||||
|
|
||||||
if ( readLoc.isBefore(currentInterval) || Utils.is454Read(read) ) {
|
if ( readLoc.isBefore(currentInterval) || Utils.is454Read(read) ) {
|
||||||
|
// TODO -- it would be nice if we could use indels from 454 reads as alternate consenses
|
||||||
emit(read);
|
emit(read);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -388,9 +391,24 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
ArrayList<AlignedRead> altReads = new ArrayList<AlignedRead>(); // reads that don't perfectly match
|
ArrayList<AlignedRead> altReads = new ArrayList<AlignedRead>(); // reads that don't perfectly match
|
||||||
LinkedList<AlignedRead> altAlignmentsToTest = new LinkedList<AlignedRead>(); // should we try to make an alt consensus from the read?
|
LinkedList<AlignedRead> altAlignmentsToTest = new LinkedList<AlignedRead>(); // should we try to make an alt consensus from the read?
|
||||||
ArrayList<AlignedRead> leftMovedIndels = new ArrayList<AlignedRead>();
|
ArrayList<AlignedRead> leftMovedIndels = new ArrayList<AlignedRead>();
|
||||||
Set<Consensus> altConsenses = new LinkedHashSet<Consensus>(); // list of alt consenses
|
Set<Consensus> altConsenses = new LinkedHashSet<Consensus>(); // list of alt consenses
|
||||||
int totalMismatchSum = 0;
|
int totalMismatchSum = 0;
|
||||||
|
|
||||||
|
// if there are any known indels for this region, get them
|
||||||
|
while ( knownIndelsToTry.size() > 0 ) {
|
||||||
|
VariationRod knownIndel = knownIndelsToTry.first();
|
||||||
|
if ( knownIndel.getLocation().isBefore(readsToClean.getLocation()) ) {
|
||||||
|
knownIndelsToTry.remove(knownIndel);
|
||||||
|
} else if ( knownIndel.getLocation().overlapsP(readsToClean.getLocation()) ) {
|
||||||
|
knownIndelsToTry.remove(knownIndel);
|
||||||
|
String indelStr = knownIndel.isInsertion() ? knownIndel.getAlternateAlleleList().get(0) : Utils.dupString('-', knownIndel.getAlleleList().get(0).length());
|
||||||
|
Consensus c = createAlternateConsensus((int)(knownIndel.getLocation().getStart() - leftmostIndex), reference, indelStr, knownIndel.isDeletion());
|
||||||
|
if ( c != null )
|
||||||
|
altConsenses.add(c);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// decide which reads potentially need to be cleaned
|
// decide which reads potentially need to be cleaned
|
||||||
for ( SAMRecord read : reads ) {
|
for ( SAMRecord read : reads ) {
|
||||||
|
|
@ -430,7 +448,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
// if it has an indel, let's see if that's the best consensus
|
// if it has an indel, let's see if that's the best consensus
|
||||||
if ( numBlocks == 2 ) {
|
if ( numBlocks == 2 ) {
|
||||||
Consensus c = createAlternateConsensus(aRead.getAlignmentStart() - (int)leftmostIndex, aRead.getCigar(), reference, aRead.getRead().getReadBases());
|
Consensus c = createAlternateConsensus(aRead.getAlignmentStart() - (int)leftmostIndex, aRead.getCigar(), reference, aRead.getRead().getReadBases());
|
||||||
if ( c==null) {} //System.out.println("ERROR: Failed to create alt consensus for read "+aRead.getRead().getReadName());
|
if ( c == null ) {} //System.out.println("ERROR: Failed to create alt consensus for read "+aRead.getRead().getReadName());
|
||||||
else altConsenses.add(c);
|
else altConsenses.add(c);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
@ -451,7 +469,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
// do a pairwise alignment against the reference
|
// do a pairwise alignment against the reference
|
||||||
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getRead().getReadBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
|
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getRead().getReadBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
|
||||||
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getRead().getReadBases());
|
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getRead().getReadBases());
|
||||||
if ( c != null) {
|
if ( c != null ) {
|
||||||
// if ( debugOn ) System.out.println("NEW consensus generated by SW: "+c.str ) ;
|
// if ( debugOn ) System.out.println("NEW consensus generated by SW: "+c.str ) ;
|
||||||
altConsenses.add(c);
|
altConsenses.add(c);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -467,7 +485,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
// do a pairwise alignment against the reference
|
// do a pairwise alignment against the reference
|
||||||
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getRead().getReadBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
|
SWPairwiseAlignment swConsensus = new SWPairwiseAlignment(reference, aRead.getRead().getReadBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND);
|
||||||
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getRead().getReadBases());
|
Consensus c = createAlternateConsensus(swConsensus.getAlignmentStart2wrt1(), swConsensus.getCigar(), reference, aRead.getRead().getReadBases());
|
||||||
if ( c != null)
|
if ( c != null )
|
||||||
altConsenses.add(c);
|
altConsenses.add(c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -653,13 +671,43 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
|
|
||||||
for (int i = refIdx; i < reference.length; i++)
|
for (int i = refIdx; i < reference.length; i++)
|
||||||
sb.append((char)reference[i]);
|
sb.append((char)reference[i]);
|
||||||
byte[] altConsensus = StringUtil.stringToBytes(sb.toString()); // alternative consensus sequence we just built from the cuurent read
|
byte[] altConsensus = StringUtil.stringToBytes(sb.toString()); // alternative consensus sequence we just built from the current read
|
||||||
|
|
||||||
// if ( debugOn ) System.out.println("Alt consensus generated: "+altConsensus);
|
|
||||||
|
|
||||||
return new Consensus(altConsensus, c, indexOnRef);
|
return new Consensus(altConsensus, c, indexOnRef);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Consensus createAlternateConsensus(int indexOnRef, byte[] reference, String indelStr, boolean isDeletion) {
|
||||||
|
if ( indexOnRef < 0 )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
// create the new consensus
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
Cigar cigar = new Cigar();
|
||||||
|
int refIdx;
|
||||||
|
|
||||||
|
for (refIdx = 0; refIdx < indexOnRef; refIdx++)
|
||||||
|
sb.append((char)reference[refIdx]);
|
||||||
|
if ( indexOnRef > 0 )
|
||||||
|
cigar.add(new CigarElement(indexOnRef, CigarOperator.M));
|
||||||
|
|
||||||
|
if ( isDeletion ) {
|
||||||
|
refIdx += indelStr.length();
|
||||||
|
cigar.add(new CigarElement(indelStr.length(), CigarOperator.D));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
sb.append(indelStr);
|
||||||
|
cigar.add(new CigarElement(indelStr.length(), CigarOperator.I));
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( reference.length - refIdx > 0 )
|
||||||
|
cigar.add(new CigarElement(reference.length - refIdx, CigarOperator.M));
|
||||||
|
for (; refIdx < reference.length; refIdx++)
|
||||||
|
sb.append((char)reference[refIdx]);
|
||||||
|
byte[] altConsensus = StringUtil.stringToBytes(sb.toString()); // alternative consensus sequence we just built from the current read
|
||||||
|
|
||||||
|
return new Consensus(altConsensus, cigar, indexOnRef);
|
||||||
|
}
|
||||||
|
|
||||||
private Pair<Integer, Integer> findBestOffset(byte[] ref, AlignedRead read) {
|
private Pair<Integer, Integer> findBestOffset(byte[] ref, AlignedRead read) {
|
||||||
int attempts = ref.length - read.getReadLength() + 1;
|
int attempts = ref.length - read.getReadLength() + 1;
|
||||||
int bestScore = mismatchQualitySumIgnoreCigar(read, ref, 0);
|
int bestScore = mismatchQualitySumIgnoreCigar(read, ref, 0);
|
||||||
|
|
@ -1162,6 +1210,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
System.arraycopy(reference, 0, newReference, 0, reference.length);
|
System.arraycopy(reference, 0, newReference, 0, reference.length);
|
||||||
System.arraycopy(ref, ref.length-neededBases, newReference, reference.length, neededBases);
|
System.arraycopy(ref, ref.length-neededBases, newReference, reference.length, neededBases);
|
||||||
reference = newReference;
|
reference = newReference;
|
||||||
|
loc = GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStop()+neededBases);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue