-set indel cleaning walkers to be in core package

-move Andrey's alignment utility classes to core


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1307 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-07-24 05:23:29 +00:00
parent bb20462a7c
commit 59f0c00d77
8 changed files with 32 additions and 133 deletions

View File

@ -1,10 +1,9 @@
package org.broadinstitute.sting.playground.gatk.walkers.indels;
package org.broadinstitute.sting.gatk.walkers.indels;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.playground.gatk.walkers.indels;
package org.broadinstitute.sting.gatk.walkers.indels;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.playground.gatk.walkers.indels;
package org.broadinstitute.sting.gatk.walkers.indels;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.gatk.refdata.*;
@ -9,7 +9,6 @@ import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.playground.indels.*;
import net.sf.samtools.*;
@ -338,7 +337,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
} catch (Exception e) {}
}
} else {
logger.debug("CLEAN: " + cigarToString(bestConsensus.cigar) + " " + bestConsensus.str );
logger.debug("CLEAN: " + AlignmentUtils.cigarToString(bestConsensus.cigar) + " " + bestConsensus.str );
if ( indelOutput != null && bestConsensus.cigar.numCigarElements() > 1 ) {
// NOTE: indels are printed out in the format specified for the low-coverage pilot1
// indel calls (tab-delimited): chr position size type sequence
@ -421,7 +420,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
// create the new consensus
StringBuffer sb = new StringBuffer();
sb.append(reference.substring(0, indexOnRef));
logger.debug("CIGAR = " + cigarToString(c));
logger.debug("CIGAR = " + AlignmentUtils.cigarToString(c));
int indelCount = 0;
int altIdx = 0;
@ -673,7 +672,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
int difference = 0; // we can move indel 'difference' bases left
final int indel_length = ce2.getLength();
String indelString = null; // inserted or deleted sequence
String indelString; // inserted or deleted sequence
int period = 0; // period of the inserted/deleted sequence
int indelIndexOnRef = refIndex+ce1.getLength() ; // position of the indel on the REF (first deleted base or first base after insertion)
int indelIndexOnRead = readIndex+ce1.getLength(); // position of the indel on the READ (first insterted base, of first base after deletion)
@ -753,7 +752,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
// if ( ce2.getLength() >=2 )
// System.out.println(" REALIGNED TO:\n"+AlignmentUtils.alignmentToString(newCigar,readSeq,refSeq,refIndex,readIndex)+"\n");
logger.debug("Realigning indel: " + cigarToString(cigar) + " to " + cigarToString(newCigar));
logger.debug("Realigning indel: " + AlignmentUtils.cigarToString(cigar) + " to " + AlignmentUtils.cigarToString(newCigar));
cigar = newCigar;
}
@ -794,7 +793,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
if ( getCigar().equals(cigar) )
return false;
String str = cigarToString(cigar);
String str = AlignmentUtils.cigarToString(cigar);
if ( !str.contains("D") && !str.contains("I") )
return false;
@ -992,23 +991,4 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
reads.add(r8);
clean(reads, reference, GenomeLocParser.createGenomeLoc(0,0));
}
public static String cigarToString(Cigar cig) {
if ( cig == null )
return "null";
StringBuilder b = new StringBuilder();
for ( int i = 0 ; i < cig.numCigarElements() ; i++ ) {
char c='?';
switch ( cig.getCigarElement(i).getOperator() ) {
case M : c = 'M'; break;
case D : c = 'D'; break;
case I : c = 'I'; break;
}
b.append(cig.getCigarElement(i).getLength());
b.append(c);
}
return b.toString();
}
}

View File

@ -22,7 +22,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.playground.gatk.walkers.indels;
package org.broadinstitute.sting.gatk.walkers.indels;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;

View File

@ -1,5 +1,5 @@
package org.broadinstitute.sting.playground.gatk.walkers.indels;
package org.broadinstitute.sting.gatk.walkers.indels;
import net.sf.samtools.*;
import org.broadinstitute.sting.gatk.refdata.*;

View File

@ -1,5 +1,5 @@
package org.broadinstitute.sting.playground.gatk.walkers.indels;
package org.broadinstitute.sting.gatk.walkers.indels;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.LocusContext;

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.playground.indels;
package org.broadinstitute.sting.utils;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
@ -6,7 +6,6 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.picard.reference.ReferenceSequence;
import org.broadinstitute.sting.playground.utils.CountedObject;
import org.broadinstitute.sting.utils.*;
import java.util.*;
@ -175,103 +174,6 @@ public class AlignmentUtils {
return n;
}
/** Reads through the alignment cigar and returns all indels found in the alignment as a collection
* of Indel objects.
* @param c alignment cigar
* @param start alignment start. NOTE: if cigar starts with clipped bases (S), alignment start position should be set after them according to SAM format specification.
* @return
*/
public static Collection<Indel> extractIndels(final Cigar c, final int start) {
//
// firstpos,lastpos span of the indel will be interpreted as follows:
// any alignment that ends strictly before firstpos or starts strictly after lastpos
// on the *reference* (not inclusive!) does not overlap with an indel; in the case of
// insertion it will result in firstpos > lastpos!
// lastpos
// | firstpos
// | |
// v v
// ---------III----- Ref Insertion: bases I are not in the ref; any alignment that starts
// after lastpos or ends before firstpos *on the reference*
// is completely over the reference bases to the right or to
// the left, respectively, of the insertion site
//
// firstpos
// | lastpos
// | |
// v v
//------------------ Ref Deletion: any alignment that ends before firstpos or starts after lastpos
// -----DDD--- alignment on the reference does not overlap with the deletion
int runninglength = start; // position on the original reference; start = alignment start position
List<Indel> indels = new ArrayList<Indel>(4);
if ( c.numCigarElements() <= 1 ) return indels; // most of the reads have no indels, save a few cycles by returning early
for ( int i = 0 ; i < c.numCigarElements() ; i++ ) {
final CigarElement ce = c.getCigarElement(i);
Indel curr_indel = null;
switch(ce.getOperator()) {
case I:
curr_indel = new Indel(runninglength, ce.getLength(), Indel.IndelType.I);
if ( i == 0 ) System.out.println("WARNING: Indel at start of the read");
if ( i == c.numCigarElements() - 1) System.out.println("WARNING: Indel at end of the read");
break;
case D: curr_indel = new Indel(runninglength, ce.getLength(), Indel.IndelType.D);
if ( i == 0 ) System.out.println("WARNING: Indel at start of the read");
if ( i == c.numCigarElements() - 1) System.out.println("WARNING: Indel at end of the read");
runninglength += ce.getLength();
break;
case M: runninglength += ce.getLength(); break; // advance along the gapless block in the alignment
case H:
case S: break; // just skip hard and soft-clipped bases; according to SAM format specification, alignment start position on the reference
// points to where the actually aligned (not clipped) bases go, so we do not need to increment reference position here
default :
throw new IllegalArgumentException("Unexpected operator in cigar string: "+ce.getOperator());
}
if ( curr_indel == null ) continue; // element was not an indel, go grab next element...
indels.add(curr_indel); // this is a new indel. Add it.
} // end for loop over all alignment cigar elements
return indels;
} // end extractIndels() method
/** Reads through the alignment specified in the record and returns all indels found in the alignment as a collection
* of Indel objects. If read is not mapped, silently returns an empty collection.
* @param r
* @return
*/
public static Collection<Indel> extractIndels(SAMRecord r) {
if ( r.getReadUnmappedFlag() ) return new ArrayList<Indel>();
return extractIndels(r.getCigar(), r.getAlignmentStart());
}
/** Extracts indels from the specified record (@see #extractIndels()) and updates the provided tree object.
* Multiple occurences of the same indel (i.e. with support from multiple reads) will be grouped together
* in one counted object held by the set.
*
* @param r
* @param t
*/
public static void collectAndCountIndels(SAMRecord r, TreeSet<CountedObject<Indel> > t) {
Collection<Indel> indels = AlignmentUtils.extractIndels(r);
for ( Indel ind : indels ) {
CountedObject<Indel> ci = new CountedObject<Indel>(ind);
CountedObject<Indel> found = t.floor(ci);
// CountedObject<Indel> found2 = t.ceiling(ci);
if ( ci.equals( found ) ) found.increment(); // we did find our indel, advance the counter
else t.add(ci); // this is a new indel. Add it.
}
}
public static String toString(Cigar cig) {
StringBuilder b = new StringBuilder();
@ -294,6 +196,25 @@ public class AlignmentUtils {
return alignmentToString( cigar, seq, ref, posOnRef, 0 );
}
public static String cigarToString(Cigar cig) {
if ( cig == null )
return "null";
StringBuilder b = new StringBuilder();
for ( int i = 0 ; i < cig.numCigarElements() ; i++ ) {
char c='?';
switch ( cig.getCigarElement(i).getOperator() ) {
case M : c = 'M'; break;
case D : c = 'D'; break;
case I : c = 'I'; break;
}
b.append(cig.getCigarElement(i).getLength());
b.append(c);
}
return b.toString();
}
public static String alignmentToString(final Cigar cigar,final String seq, final String ref, final int posOnRef, final int posOnRead ) {
int readPos = posOnRead;
int refPos = posOnRef;

View File

@ -1,6 +1,5 @@
package org.broadinstitute.sting.playground.indels;
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.utils.PrimitivePair;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.Cigar;