-set indel cleaning walkers to be in core package
-move Andrey's alignment utility classes to core git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1307 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
bb20462a7c
commit
59f0c00d77
|
|
@ -1,10 +1,9 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.indels;
|
||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.indels;
|
||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.indels;
|
||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
|
|
@ -9,7 +9,6 @@ import org.broadinstitute.sting.gatk.LocusContext;
|
|||
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
|
||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.playground.indels.*;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
|
||||
|
|
@ -338,7 +337,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
} catch (Exception e) {}
|
||||
}
|
||||
} else {
|
||||
logger.debug("CLEAN: " + cigarToString(bestConsensus.cigar) + " " + bestConsensus.str );
|
||||
logger.debug("CLEAN: " + AlignmentUtils.cigarToString(bestConsensus.cigar) + " " + bestConsensus.str );
|
||||
if ( indelOutput != null && bestConsensus.cigar.numCigarElements() > 1 ) {
|
||||
// NOTE: indels are printed out in the format specified for the low-coverage pilot1
|
||||
// indel calls (tab-delimited): chr position size type sequence
|
||||
|
|
@ -421,7 +420,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
// create the new consensus
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append(reference.substring(0, indexOnRef));
|
||||
logger.debug("CIGAR = " + cigarToString(c));
|
||||
logger.debug("CIGAR = " + AlignmentUtils.cigarToString(c));
|
||||
|
||||
int indelCount = 0;
|
||||
int altIdx = 0;
|
||||
|
|
@ -673,7 +672,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
int difference = 0; // we can move indel 'difference' bases left
|
||||
final int indel_length = ce2.getLength();
|
||||
|
||||
String indelString = null; // inserted or deleted sequence
|
||||
String indelString; // inserted or deleted sequence
|
||||
int period = 0; // period of the inserted/deleted sequence
|
||||
int indelIndexOnRef = refIndex+ce1.getLength() ; // position of the indel on the REF (first deleted base or first base after insertion)
|
||||
int indelIndexOnRead = readIndex+ce1.getLength(); // position of the indel on the READ (first insterted base, of first base after deletion)
|
||||
|
|
@ -753,7 +752,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
// if ( ce2.getLength() >=2 )
|
||||
// System.out.println(" REALIGNED TO:\n"+AlignmentUtils.alignmentToString(newCigar,readSeq,refSeq,refIndex,readIndex)+"\n");
|
||||
|
||||
logger.debug("Realigning indel: " + cigarToString(cigar) + " to " + cigarToString(newCigar));
|
||||
logger.debug("Realigning indel: " + AlignmentUtils.cigarToString(cigar) + " to " + AlignmentUtils.cigarToString(newCigar));
|
||||
cigar = newCigar;
|
||||
|
||||
}
|
||||
|
|
@ -794,7 +793,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
if ( getCigar().equals(cigar) )
|
||||
return false;
|
||||
|
||||
String str = cigarToString(cigar);
|
||||
String str = AlignmentUtils.cigarToString(cigar);
|
||||
if ( !str.contains("D") && !str.contains("I") )
|
||||
return false;
|
||||
|
||||
|
|
@ -992,23 +991,4 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
reads.add(r8);
|
||||
clean(reads, reference, GenomeLocParser.createGenomeLoc(0,0));
|
||||
}
|
||||
|
||||
public static String cigarToString(Cigar cig) {
|
||||
if ( cig == null )
|
||||
return "null";
|
||||
|
||||
StringBuilder b = new StringBuilder();
|
||||
|
||||
for ( int i = 0 ; i < cig.numCigarElements() ; i++ ) {
|
||||
char c='?';
|
||||
switch ( cig.getCigarElement(i).getOperator() ) {
|
||||
case M : c = 'M'; break;
|
||||
case D : c = 'D'; break;
|
||||
case I : c = 'I'; break;
|
||||
}
|
||||
b.append(cig.getCigarElement(i).getLength());
|
||||
b.append(c);
|
||||
}
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.indels;
|
||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.indels;
|
||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.indels;
|
||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.playground.indels;
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
|
@ -6,7 +6,6 @@ import net.sf.samtools.Cigar;
|
|||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import org.broadinstitute.sting.playground.utils.CountedObject;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -175,103 +174,6 @@ public class AlignmentUtils {
|
|||
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
/** Reads through the alignment cigar and returns all indels found in the alignment as a collection
|
||||
* of Indel objects.
|
||||
* @param c alignment cigar
|
||||
* @param start alignment start. NOTE: if cigar starts with clipped bases (S), alignment start position should be set after them according to SAM format specification.
|
||||
* @return
|
||||
*/
|
||||
public static Collection<Indel> extractIndels(final Cigar c, final int start) {
|
||||
//
|
||||
// firstpos,lastpos span of the indel will be interpreted as follows:
|
||||
// any alignment that ends strictly before firstpos or starts strictly after lastpos
|
||||
// on the *reference* (not inclusive!) does not overlap with an indel; in the case of
|
||||
// insertion it will result in firstpos > lastpos!
|
||||
// lastpos
|
||||
// | firstpos
|
||||
// | |
|
||||
// v v
|
||||
// ---------III----- Ref Insertion: bases I are not in the ref; any alignment that starts
|
||||
// after lastpos or ends before firstpos *on the reference*
|
||||
// is completely over the reference bases to the right or to
|
||||
// the left, respectively, of the insertion site
|
||||
//
|
||||
// firstpos
|
||||
// | lastpos
|
||||
// | |
|
||||
// v v
|
||||
//------------------ Ref Deletion: any alignment that ends before firstpos or starts after lastpos
|
||||
// -----DDD--- alignment on the reference does not overlap with the deletion
|
||||
int runninglength = start; // position on the original reference; start = alignment start position
|
||||
|
||||
List<Indel> indels = new ArrayList<Indel>(4);
|
||||
|
||||
if ( c.numCigarElements() <= 1 ) return indels; // most of the reads have no indels, save a few cycles by returning early
|
||||
|
||||
for ( int i = 0 ; i < c.numCigarElements() ; i++ ) {
|
||||
|
||||
final CigarElement ce = c.getCigarElement(i);
|
||||
Indel curr_indel = null;
|
||||
|
||||
switch(ce.getOperator()) {
|
||||
case I:
|
||||
curr_indel = new Indel(runninglength, ce.getLength(), Indel.IndelType.I);
|
||||
if ( i == 0 ) System.out.println("WARNING: Indel at start of the read");
|
||||
if ( i == c.numCigarElements() - 1) System.out.println("WARNING: Indel at end of the read");
|
||||
break;
|
||||
case D: curr_indel = new Indel(runninglength, ce.getLength(), Indel.IndelType.D);
|
||||
if ( i == 0 ) System.out.println("WARNING: Indel at start of the read");
|
||||
if ( i == c.numCigarElements() - 1) System.out.println("WARNING: Indel at end of the read");
|
||||
runninglength += ce.getLength();
|
||||
break;
|
||||
case M: runninglength += ce.getLength(); break; // advance along the gapless block in the alignment
|
||||
case H:
|
||||
case S: break; // just skip hard and soft-clipped bases; according to SAM format specification, alignment start position on the reference
|
||||
// points to where the actually aligned (not clipped) bases go, so we do not need to increment reference position here
|
||||
default :
|
||||
throw new IllegalArgumentException("Unexpected operator in cigar string: "+ce.getOperator());
|
||||
}
|
||||
|
||||
if ( curr_indel == null ) continue; // element was not an indel, go grab next element...
|
||||
|
||||
indels.add(curr_indel); // this is a new indel. Add it.
|
||||
} // end for loop over all alignment cigar elements
|
||||
|
||||
return indels;
|
||||
} // end extractIndels() method
|
||||
|
||||
/** Reads through the alignment specified in the record and returns all indels found in the alignment as a collection
|
||||
* of Indel objects. If read is not mapped, silently returns an empty collection.
|
||||
* @param r
|
||||
* @return
|
||||
*/
|
||||
public static Collection<Indel> extractIndels(SAMRecord r) {
|
||||
if ( r.getReadUnmappedFlag() ) return new ArrayList<Indel>();
|
||||
return extractIndels(r.getCigar(), r.getAlignmentStart());
|
||||
}
|
||||
|
||||
/** Extracts indels from the specified record (@see #extractIndels()) and updates the provided tree object.
|
||||
* Multiple occurences of the same indel (i.e. with support from multiple reads) will be grouped together
|
||||
* in one counted object held by the set.
|
||||
*
|
||||
* @param r
|
||||
* @param t
|
||||
*/
|
||||
public static void collectAndCountIndels(SAMRecord r, TreeSet<CountedObject<Indel> > t) {
|
||||
Collection<Indel> indels = AlignmentUtils.extractIndels(r);
|
||||
for ( Indel ind : indels ) {
|
||||
CountedObject<Indel> ci = new CountedObject<Indel>(ind);
|
||||
CountedObject<Indel> found = t.floor(ci);
|
||||
// CountedObject<Indel> found2 = t.ceiling(ci);
|
||||
|
||||
if ( ci.equals( found ) ) found.increment(); // we did find our indel, advance the counter
|
||||
else t.add(ci); // this is a new indel. Add it.
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static String toString(Cigar cig) {
|
||||
StringBuilder b = new StringBuilder();
|
||||
|
|
@ -294,6 +196,25 @@ public class AlignmentUtils {
|
|||
return alignmentToString( cigar, seq, ref, posOnRef, 0 );
|
||||
}
|
||||
|
||||
public static String cigarToString(Cigar cig) {
|
||||
if ( cig == null )
|
||||
return "null";
|
||||
|
||||
StringBuilder b = new StringBuilder();
|
||||
|
||||
for ( int i = 0 ; i < cig.numCigarElements() ; i++ ) {
|
||||
char c='?';
|
||||
switch ( cig.getCigarElement(i).getOperator() ) {
|
||||
case M : c = 'M'; break;
|
||||
case D : c = 'D'; break;
|
||||
case I : c = 'I'; break;
|
||||
}
|
||||
b.append(cig.getCigarElement(i).getLength());
|
||||
b.append(c);
|
||||
}
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
public static String alignmentToString(final Cigar cigar,final String seq, final String ref, final int posOnRef, final int posOnRead ) {
|
||||
int readPos = posOnRead;
|
||||
int refPos = posOnRef;
|
||||
|
|
@ -1,6 +1,5 @@
|
|||
package org.broadinstitute.sting.playground.indels;
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.utils.PrimitivePair;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.Cigar;
|
||||
Loading…
Reference in New Issue