The new cleaner can now use known indels to create alternate consenses for cleaning.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2816 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0250338ce7
commit
6652b992f7
|
|
@ -151,10 +151,11 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
generator = new Random(RANDOM_SEED);
|
generator = new Random(RANDOM_SEED);
|
||||||
|
|
||||||
// set up the rods (since this is a ReadWalker we don't get rods from the traversal)
|
// set up the rods (since this is a ReadWalker we don't get rods from the traversal)
|
||||||
|
logger.info("Reading and parsing known indel rod files...");
|
||||||
List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods = new ArrayList<ReferenceOrderedData<? extends ReferenceOrderedDatum>>();
|
List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods = new ArrayList<ReferenceOrderedData<? extends ReferenceOrderedDatum>>();
|
||||||
ReferenceOrderedData.parseBindings(knownIndels, rods);
|
ReferenceOrderedData.parseBindings(knownIndels, rods);
|
||||||
for ( ReferenceOrderedData<? extends ReferenceOrderedDatum> rod : rods ) {
|
for ( ReferenceOrderedData<? extends ReferenceOrderedDatum> rod : rods ) {
|
||||||
if ( !(rod instanceof VariationRod) )
|
if ( !VariationRod.class.isAssignableFrom(rod.getType()) )
|
||||||
continue;
|
continue;
|
||||||
SeekableRODIterator<? extends ReferenceOrderedDatum> iter = rod.iterator();
|
SeekableRODIterator<? extends ReferenceOrderedDatum> iter = rod.iterator();
|
||||||
while ( iter.hasNext() ) {
|
while ( iter.hasNext() ) {
|
||||||
|
|
@ -165,6 +166,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
logger.info("Finished reading and parsing known indel rod files");
|
||||||
|
|
||||||
if ( OUT_INDELS != null ) {
|
if ( OUT_INDELS != null ) {
|
||||||
try {
|
try {
|
||||||
|
|
@ -251,6 +253,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
readLoc = GenomeLocParser.createGenomeLoc(readLoc.getContig(), readLoc.getStart(), readLoc.getStart());
|
readLoc = GenomeLocParser.createGenomeLoc(readLoc.getContig(), readLoc.getStart(), readLoc.getStart());
|
||||||
|
|
||||||
if ( readLoc.isBefore(currentInterval) || Utils.is454Read(read) ) {
|
if ( readLoc.isBefore(currentInterval) || Utils.is454Read(read) ) {
|
||||||
|
// TODO -- it would be nice if we could use indels from 454 reads as alternate consenses
|
||||||
emit(read);
|
emit(read);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -391,6 +394,21 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
Set<Consensus> altConsenses = new LinkedHashSet<Consensus>(); // list of alt consenses
|
Set<Consensus> altConsenses = new LinkedHashSet<Consensus>(); // list of alt consenses
|
||||||
int totalMismatchSum = 0;
|
int totalMismatchSum = 0;
|
||||||
|
|
||||||
|
// if there are any known indels for this region, get them
|
||||||
|
while ( knownIndelsToTry.size() > 0 ) {
|
||||||
|
VariationRod knownIndel = knownIndelsToTry.first();
|
||||||
|
if ( knownIndel.getLocation().isBefore(readsToClean.getLocation()) ) {
|
||||||
|
knownIndelsToTry.remove(knownIndel);
|
||||||
|
} else if ( knownIndel.getLocation().overlapsP(readsToClean.getLocation()) ) {
|
||||||
|
knownIndelsToTry.remove(knownIndel);
|
||||||
|
String indelStr = knownIndel.isInsertion() ? knownIndel.getAlternateAlleleList().get(0) : Utils.dupString('-', knownIndel.getAlleleList().get(0).length());
|
||||||
|
Consensus c = createAlternateConsensus((int)(knownIndel.getLocation().getStart() - leftmostIndex), reference, indelStr, knownIndel.isDeletion());
|
||||||
|
if ( c != null )
|
||||||
|
altConsenses.add(c);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// decide which reads potentially need to be cleaned
|
// decide which reads potentially need to be cleaned
|
||||||
for ( SAMRecord read : reads ) {
|
for ( SAMRecord read : reads ) {
|
||||||
|
|
@ -653,13 +671,43 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
|
|
||||||
for (int i = refIdx; i < reference.length; i++)
|
for (int i = refIdx; i < reference.length; i++)
|
||||||
sb.append((char)reference[i]);
|
sb.append((char)reference[i]);
|
||||||
byte[] altConsensus = StringUtil.stringToBytes(sb.toString()); // alternative consensus sequence we just built from the cuurent read
|
byte[] altConsensus = StringUtil.stringToBytes(sb.toString()); // alternative consensus sequence we just built from the current read
|
||||||
|
|
||||||
// if ( debugOn ) System.out.println("Alt consensus generated: "+altConsensus);
|
|
||||||
|
|
||||||
return new Consensus(altConsensus, c, indexOnRef);
|
return new Consensus(altConsensus, c, indexOnRef);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Consensus createAlternateConsensus(int indexOnRef, byte[] reference, String indelStr, boolean isDeletion) {
|
||||||
|
if ( indexOnRef < 0 )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
// create the new consensus
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
Cigar cigar = new Cigar();
|
||||||
|
int refIdx;
|
||||||
|
|
||||||
|
for (refIdx = 0; refIdx < indexOnRef; refIdx++)
|
||||||
|
sb.append((char)reference[refIdx]);
|
||||||
|
if ( indexOnRef > 0 )
|
||||||
|
cigar.add(new CigarElement(indexOnRef, CigarOperator.M));
|
||||||
|
|
||||||
|
if ( isDeletion ) {
|
||||||
|
refIdx += indelStr.length();
|
||||||
|
cigar.add(new CigarElement(indelStr.length(), CigarOperator.D));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
sb.append(indelStr);
|
||||||
|
cigar.add(new CigarElement(indelStr.length(), CigarOperator.I));
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( reference.length - refIdx > 0 )
|
||||||
|
cigar.add(new CigarElement(reference.length - refIdx, CigarOperator.M));
|
||||||
|
for (; refIdx < reference.length; refIdx++)
|
||||||
|
sb.append((char)reference[refIdx]);
|
||||||
|
byte[] altConsensus = StringUtil.stringToBytes(sb.toString()); // alternative consensus sequence we just built from the current read
|
||||||
|
|
||||||
|
return new Consensus(altConsensus, cigar, indexOnRef);
|
||||||
|
}
|
||||||
|
|
||||||
private Pair<Integer, Integer> findBestOffset(byte[] ref, AlignedRead read) {
|
private Pair<Integer, Integer> findBestOffset(byte[] ref, AlignedRead read) {
|
||||||
int attempts = ref.length - read.getReadLength() + 1;
|
int attempts = ref.length - read.getReadLength() + 1;
|
||||||
int bestScore = mismatchQualitySumIgnoreCigar(read, ref, 0);
|
int bestScore = mismatchQualitySumIgnoreCigar(read, ref, 0);
|
||||||
|
|
@ -1162,6 +1210,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
||||||
System.arraycopy(reference, 0, newReference, 0, reference.length);
|
System.arraycopy(reference, 0, newReference, 0, reference.length);
|
||||||
System.arraycopy(ref, ref.length-neededBases, newReference, reference.length, neededBases);
|
System.arraycopy(ref, ref.length-neededBases, newReference, reference.length, neededBases);
|
||||||
reference = newReference;
|
reference = newReference;
|
||||||
|
loc = GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStop()+neededBases);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue