Rods for reads hooked up into the cleaner

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3070 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-03-24 18:17:56 +00:00
parent 5079f35e40
commit 47e30aba92
2 changed files with 37 additions and 9 deletions

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.indels;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.arguments.IntervalMergingRule; import org.broadinstitute.sting.gatk.arguments.IntervalMergingRule;
import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.*;
@ -92,7 +93,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// the reads and known indels that fall into the current interval // the reads and known indels that fall into the current interval
private final ReadBin readsToClean = new ReadBin(); private final ReadBin readsToClean = new ReadBin();
private final ArrayList<SAMRecord> readsNotToClean = new ArrayList<SAMRecord>(); private final ArrayList<SAMRecord> readsNotToClean = new ArrayList<SAMRecord>();
private final ArrayList<VariationRod> knownIndelsToTry = new ArrayList<VariationRod>(); private final IdentityHashMap<ReferenceOrderedDatum, VariantContext> knownIndelsToTry = new IdentityHashMap<ReferenceOrderedDatum, VariantContext>();
// the wrapper around the SAM writer // the wrapper around the SAM writer
private Map<String, SAMFileWriter> writers = null; private Map<String, SAMFileWriter> writers = null;
@ -261,6 +262,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
readsNotToClean.add(read); readsNotToClean.add(read);
} else { } else {
readsToClean.add(read, ref); readsToClean.add(read, ref);
// add the rods to the list of known variants
populateKnownIndels(metaDataTracker);
} }
if ( readsToClean.size() + readsNotToClean.size() >= MAX_READS ) { if ( readsToClean.size() + readsNotToClean.size() >= MAX_READS ) {
@ -344,6 +347,21 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
} }
} }
private void populateKnownIndels(ReadMetaDataTracker metaDataTracker) {
for ( Collection<ReferenceOrderedDatum> rods : metaDataTracker.getContigOffsetMapping().values() ) {
Iterator<ReferenceOrderedDatum> rodIter = rods.iterator();
while ( rodIter.hasNext() ) {
ReferenceOrderedDatum rod = rodIter.next();
if ( knownIndelsToTry.containsKey(rod) )
continue;
if ( VariantContextAdaptors.canBeConvertedToVariantContext(rod))
knownIndelsToTry.put(rod, VariantContextAdaptors.toVariantContext("", rod));
else
knownIndelsToTry.put(rod, null);
}
}
}
private static int mismatchQualitySumIgnoreCigar(final AlignedRead aRead, final byte[] refSeq, int refIndex, int quitAboveThisValue) { private static int mismatchQualitySumIgnoreCigar(final AlignedRead aRead, final byte[] refSeq, int refIndex, int quitAboveThisValue) {
final byte[] readSeq = aRead.getRead().getReadBases(); final byte[] readSeq = aRead.getRead().getReadBases();
final byte[] quals = aRead.getRead().getBaseQualities(); final byte[] quals = aRead.getRead().getBaseQualities();
@ -394,8 +412,10 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
long totalAlignerMismatchSum = 0, totalRawMismatchSum = 0; long totalAlignerMismatchSum = 0, totalRawMismatchSum = 0;
// if there are any known indels for this region, get them // if there are any known indels for this region, get them
for ( VariationRod knownIndel : knownIndelsToTry ) { for ( VariantContext knownIndel : knownIndelsToTry.values() ) {
String indelStr = knownIndel.isInsertion() ? knownIndel.getAlternateAlleleList().get(0) : Utils.dupString('-', knownIndel.getAlleleList().get(0).length()); if ( knownIndel == null || !knownIndel.isIndel() )
continue;
byte[] indelStr = knownIndel.isInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length());
Consensus c = createAlternateConsensus((int)(knownIndel.getLocation().getStart() - leftmostIndex), reference, indelStr, knownIndel.isDeletion()); Consensus c = createAlternateConsensus((int)(knownIndel.getLocation().getStart() - leftmostIndex), reference, indelStr, knownIndel.isDeletion());
if ( c != null ) if ( c != null )
altConsenses.add(c); altConsenses.add(c);
@ -410,7 +430,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// System.out.println(read.getReadString()); // System.out.println(read.getReadString());
// } // }
// we currently can not deal with clipped reads correctly (or screwy record) // we currently can not deal with clipped reads correctly (or a screwy record)
if ( read.getCigar().numCigarElements() == 0 || readIsClipped(read) ) { if ( read.getCigar().numCigarElements() == 0 || readIsClipped(read) ) {
refReads.add(read); refReads.add(read);
continue; continue;
@ -630,6 +650,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
} }
} }
// create a Consensus from cigar/read strings which originate somewhere on the reference
private Consensus createAlternateConsensus(final int indexOnRef, final Cigar c, final byte[] reference, final byte[] readStr) { private Consensus createAlternateConsensus(final int indexOnRef, final Cigar c, final byte[] reference, final byte[] readStr) {
if ( indexOnRef < 0 ) if ( indexOnRef < 0 )
return null; return null;
@ -687,7 +708,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
return new Consensus(altConsensus, c, indexOnRef); return new Consensus(altConsensus, c, indexOnRef);
} }
private Consensus createAlternateConsensus(final int indexOnRef, final byte[] reference, final String indelStr, final boolean isDeletion) { // create a Consensus from just the indel string that falls on the reference
private Consensus createAlternateConsensus(final int indexOnRef, final byte[] reference, final byte[] indelStr, final boolean isDeletion) {
if ( indexOnRef < 0 ) if ( indexOnRef < 0 )
return null; return null;
@ -702,12 +724,12 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
cigar.add(new CigarElement(indexOnRef, CigarOperator.M)); cigar.add(new CigarElement(indexOnRef, CigarOperator.M));
if ( isDeletion ) { if ( isDeletion ) {
refIdx += indelStr.length(); refIdx += indelStr.length;
cigar.add(new CigarElement(indelStr.length(), CigarOperator.D)); cigar.add(new CigarElement(indelStr.length, CigarOperator.D));
} }
else { else {
sb.append(indelStr); sb.append(indelStr);
cigar.add(new CigarElement(indelStr.length(), CigarOperator.I)); cigar.add(new CigarElement(indelStr.length, CigarOperator.I));
} }
if ( reference.length - refIdx > 0 ) if ( reference.length - refIdx > 0 )
@ -716,7 +738,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
sb.append((char)reference[refIdx]); sb.append((char)reference[refIdx]);
byte[] altConsensus = StringUtil.stringToBytes(sb.toString()); // alternative consensus sequence we just built from the current read byte[] altConsensus = StringUtil.stringToBytes(sb.toString()); // alternative consensus sequence we just built from the current read
return new Consensus(altConsensus, cigar, indexOnRef); return new Consensus(altConsensus, cigar, 0);
} }
private Pair<Integer, Integer> findBestOffset(final byte[] ref, final AlignedRead read, final int leftmostIndex) { private Pair<Integer, Integer> findBestOffset(final byte[] ref, final AlignedRead read, final int leftmostIndex) {

View File

@ -425,6 +425,12 @@ public class Utils {
return new String(chars); return new String(chars);
} }
public static byte[] dupBytes(byte b, int nCopies) {
byte[] bytes = new byte[nCopies];
Arrays.fill(bytes, b);
return bytes;
}
public static int countOccurrences(char c, String s) { public static int countOccurrences(char c, String s) {
int count = 0; int count = 0;
for (int i = 0; i < s.length(); i++) { for (int i = 0; i < s.length(); i++) {