-set indel cleaning walkers to be in core package
-move Andrey's alignment utility classes to core git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1307 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
bb20462a7c
commit
59f0c00d77
|
|
@ -1,10 +1,9 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.indels;
|
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileReader;
|
import net.sf.samtools.SAMFileReader;
|
||||||
import net.sf.samtools.SAMFileWriter;
|
import net.sf.samtools.SAMFileWriter;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.indels;
|
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.indels;
|
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.gatk.refdata.*;
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
|
|
@ -9,7 +9,6 @@ import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
|
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
|
||||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
import org.broadinstitute.sting.playground.indels.*;
|
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
|
|
||||||
|
|
@ -338,7 +337,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
} catch (Exception e) {}
|
} catch (Exception e) {}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
logger.debug("CLEAN: " + cigarToString(bestConsensus.cigar) + " " + bestConsensus.str );
|
logger.debug("CLEAN: " + AlignmentUtils.cigarToString(bestConsensus.cigar) + " " + bestConsensus.str );
|
||||||
if ( indelOutput != null && bestConsensus.cigar.numCigarElements() > 1 ) {
|
if ( indelOutput != null && bestConsensus.cigar.numCigarElements() > 1 ) {
|
||||||
// NOTE: indels are printed out in the format specified for the low-coverage pilot1
|
// NOTE: indels are printed out in the format specified for the low-coverage pilot1
|
||||||
// indel calls (tab-delimited): chr position size type sequence
|
// indel calls (tab-delimited): chr position size type sequence
|
||||||
|
|
@ -421,7 +420,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
// create the new consensus
|
// create the new consensus
|
||||||
StringBuffer sb = new StringBuffer();
|
StringBuffer sb = new StringBuffer();
|
||||||
sb.append(reference.substring(0, indexOnRef));
|
sb.append(reference.substring(0, indexOnRef));
|
||||||
logger.debug("CIGAR = " + cigarToString(c));
|
logger.debug("CIGAR = " + AlignmentUtils.cigarToString(c));
|
||||||
|
|
||||||
int indelCount = 0;
|
int indelCount = 0;
|
||||||
int altIdx = 0;
|
int altIdx = 0;
|
||||||
|
|
@ -673,7 +672,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
int difference = 0; // we can move indel 'difference' bases left
|
int difference = 0; // we can move indel 'difference' bases left
|
||||||
final int indel_length = ce2.getLength();
|
final int indel_length = ce2.getLength();
|
||||||
|
|
||||||
String indelString = null; // inserted or deleted sequence
|
String indelString; // inserted or deleted sequence
|
||||||
int period = 0; // period of the inserted/deleted sequence
|
int period = 0; // period of the inserted/deleted sequence
|
||||||
int indelIndexOnRef = refIndex+ce1.getLength() ; // position of the indel on the REF (first deleted base or first base after insertion)
|
int indelIndexOnRef = refIndex+ce1.getLength() ; // position of the indel on the REF (first deleted base or first base after insertion)
|
||||||
int indelIndexOnRead = readIndex+ce1.getLength(); // position of the indel on the READ (first insterted base, of first base after deletion)
|
int indelIndexOnRead = readIndex+ce1.getLength(); // position of the indel on the READ (first insterted base, of first base after deletion)
|
||||||
|
|
@ -753,7 +752,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
// if ( ce2.getLength() >=2 )
|
// if ( ce2.getLength() >=2 )
|
||||||
// System.out.println(" REALIGNED TO:\n"+AlignmentUtils.alignmentToString(newCigar,readSeq,refSeq,refIndex,readIndex)+"\n");
|
// System.out.println(" REALIGNED TO:\n"+AlignmentUtils.alignmentToString(newCigar,readSeq,refSeq,refIndex,readIndex)+"\n");
|
||||||
|
|
||||||
logger.debug("Realigning indel: " + cigarToString(cigar) + " to " + cigarToString(newCigar));
|
logger.debug("Realigning indel: " + AlignmentUtils.cigarToString(cigar) + " to " + AlignmentUtils.cigarToString(newCigar));
|
||||||
cigar = newCigar;
|
cigar = newCigar;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -794,7 +793,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
if ( getCigar().equals(cigar) )
|
if ( getCigar().equals(cigar) )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
String str = cigarToString(cigar);
|
String str = AlignmentUtils.cigarToString(cigar);
|
||||||
if ( !str.contains("D") && !str.contains("I") )
|
if ( !str.contains("D") && !str.contains("I") )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
|
@ -992,23 +991,4 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
reads.add(r8);
|
reads.add(r8);
|
||||||
clean(reads, reference, GenomeLocParser.createGenomeLoc(0,0));
|
clean(reads, reference, GenomeLocParser.createGenomeLoc(0,0));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String cigarToString(Cigar cig) {
|
|
||||||
if ( cig == null )
|
|
||||||
return "null";
|
|
||||||
|
|
||||||
StringBuilder b = new StringBuilder();
|
|
||||||
|
|
||||||
for ( int i = 0 ; i < cig.numCigarElements() ; i++ ) {
|
|
||||||
char c='?';
|
|
||||||
switch ( cig.getCigarElement(i).getOperator() ) {
|
|
||||||
case M : c = 'M'; break;
|
|
||||||
case D : c = 'D'; break;
|
|
||||||
case I : c = 'I'; break;
|
|
||||||
}
|
|
||||||
b.append(cig.getCigarElement(i).getLength());
|
|
||||||
b.append(c);
|
|
||||||
}
|
|
||||||
return b.toString();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.indels;
|
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.indels;
|
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import org.broadinstitute.sting.gatk.refdata.*;
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.indels;
|
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.*;
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package org.broadinstitute.sting.playground.indels;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
@ -6,7 +6,6 @@ import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.picard.reference.ReferenceSequence;
|
import net.sf.picard.reference.ReferenceSequence;
|
||||||
import org.broadinstitute.sting.playground.utils.CountedObject;
|
import org.broadinstitute.sting.playground.utils.CountedObject;
|
||||||
import org.broadinstitute.sting.utils.*;
|
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -176,103 +175,6 @@ public class AlignmentUtils {
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Reads through the alignment cigar and returns all indels found in the alignment as a collection
|
|
||||||
* of Indel objects.
|
|
||||||
* @param c alignment cigar
|
|
||||||
* @param start alignment start. NOTE: if cigar starts with clipped bases (S), alignment start position should be set after them according to SAM format specification.
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public static Collection<Indel> extractIndels(final Cigar c, final int start) {
|
|
||||||
//
|
|
||||||
// firstpos,lastpos span of the indel will be interpreted as follows:
|
|
||||||
// any alignment that ends strictly before firstpos or starts strictly after lastpos
|
|
||||||
// on the *reference* (not inclusive!) does not overlap with an indel; in the case of
|
|
||||||
// insertion it will result in firstpos > lastpos!
|
|
||||||
// lastpos
|
|
||||||
// | firstpos
|
|
||||||
// | |
|
|
||||||
// v v
|
|
||||||
// ---------III----- Ref Insertion: bases I are not in the ref; any alignment that starts
|
|
||||||
// after lastpos or ends before firstpos *on the reference*
|
|
||||||
// is completely over the reference bases to the right or to
|
|
||||||
// the left, respectively, of the insertion site
|
|
||||||
//
|
|
||||||
// firstpos
|
|
||||||
// | lastpos
|
|
||||||
// | |
|
|
||||||
// v v
|
|
||||||
//------------------ Ref Deletion: any alignment that ends before firstpos or starts after lastpos
|
|
||||||
// -----DDD--- alignment on the reference does not overlap with the deletion
|
|
||||||
int runninglength = start; // position on the original reference; start = alignment start position
|
|
||||||
|
|
||||||
List<Indel> indels = new ArrayList<Indel>(4);
|
|
||||||
|
|
||||||
if ( c.numCigarElements() <= 1 ) return indels; // most of the reads have no indels, save a few cycles by returning early
|
|
||||||
|
|
||||||
for ( int i = 0 ; i < c.numCigarElements() ; i++ ) {
|
|
||||||
|
|
||||||
final CigarElement ce = c.getCigarElement(i);
|
|
||||||
Indel curr_indel = null;
|
|
||||||
|
|
||||||
switch(ce.getOperator()) {
|
|
||||||
case I:
|
|
||||||
curr_indel = new Indel(runninglength, ce.getLength(), Indel.IndelType.I);
|
|
||||||
if ( i == 0 ) System.out.println("WARNING: Indel at start of the read");
|
|
||||||
if ( i == c.numCigarElements() - 1) System.out.println("WARNING: Indel at end of the read");
|
|
||||||
break;
|
|
||||||
case D: curr_indel = new Indel(runninglength, ce.getLength(), Indel.IndelType.D);
|
|
||||||
if ( i == 0 ) System.out.println("WARNING: Indel at start of the read");
|
|
||||||
if ( i == c.numCigarElements() - 1) System.out.println("WARNING: Indel at end of the read");
|
|
||||||
runninglength += ce.getLength();
|
|
||||||
break;
|
|
||||||
case M: runninglength += ce.getLength(); break; // advance along the gapless block in the alignment
|
|
||||||
case H:
|
|
||||||
case S: break; // just skip hard and soft-clipped bases; according to SAM format specification, alignment start position on the reference
|
|
||||||
// points to where the actually aligned (not clipped) bases go, so we do not need to increment reference position here
|
|
||||||
default :
|
|
||||||
throw new IllegalArgumentException("Unexpected operator in cigar string: "+ce.getOperator());
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( curr_indel == null ) continue; // element was not an indel, go grab next element...
|
|
||||||
|
|
||||||
indels.add(curr_indel); // this is a new indel. Add it.
|
|
||||||
} // end for loop over all alignment cigar elements
|
|
||||||
|
|
||||||
return indels;
|
|
||||||
} // end extractIndels() method
|
|
||||||
|
|
||||||
/** Reads through the alignment specified in the record and returns all indels found in the alignment as a collection
|
|
||||||
* of Indel objects. If read is not mapped, silently returns an empty collection.
|
|
||||||
* @param r
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public static Collection<Indel> extractIndels(SAMRecord r) {
|
|
||||||
if ( r.getReadUnmappedFlag() ) return new ArrayList<Indel>();
|
|
||||||
return extractIndels(r.getCigar(), r.getAlignmentStart());
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Extracts indels from the specified record (@see #extractIndels()) and updates the provided tree object.
|
|
||||||
* Multiple occurences of the same indel (i.e. with support from multiple reads) will be grouped together
|
|
||||||
* in one counted object held by the set.
|
|
||||||
*
|
|
||||||
* @param r
|
|
||||||
* @param t
|
|
||||||
*/
|
|
||||||
public static void collectAndCountIndels(SAMRecord r, TreeSet<CountedObject<Indel> > t) {
|
|
||||||
Collection<Indel> indels = AlignmentUtils.extractIndels(r);
|
|
||||||
for ( Indel ind : indels ) {
|
|
||||||
CountedObject<Indel> ci = new CountedObject<Indel>(ind);
|
|
||||||
CountedObject<Indel> found = t.floor(ci);
|
|
||||||
// CountedObject<Indel> found2 = t.ceiling(ci);
|
|
||||||
|
|
||||||
if ( ci.equals( found ) ) found.increment(); // we did find our indel, advance the counter
|
|
||||||
else t.add(ci); // this is a new indel. Add it.
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String toString(Cigar cig) {
|
public static String toString(Cigar cig) {
|
||||||
StringBuilder b = new StringBuilder();
|
StringBuilder b = new StringBuilder();
|
||||||
|
|
||||||
|
|
@ -294,6 +196,25 @@ public class AlignmentUtils {
|
||||||
return alignmentToString( cigar, seq, ref, posOnRef, 0 );
|
return alignmentToString( cigar, seq, ref, posOnRef, 0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String cigarToString(Cigar cig) {
|
||||||
|
if ( cig == null )
|
||||||
|
return "null";
|
||||||
|
|
||||||
|
StringBuilder b = new StringBuilder();
|
||||||
|
|
||||||
|
for ( int i = 0 ; i < cig.numCigarElements() ; i++ ) {
|
||||||
|
char c='?';
|
||||||
|
switch ( cig.getCigarElement(i).getOperator() ) {
|
||||||
|
case M : c = 'M'; break;
|
||||||
|
case D : c = 'D'; break;
|
||||||
|
case I : c = 'I'; break;
|
||||||
|
}
|
||||||
|
b.append(cig.getCigarElement(i).getLength());
|
||||||
|
b.append(c);
|
||||||
|
}
|
||||||
|
return b.toString();
|
||||||
|
}
|
||||||
|
|
||||||
public static String alignmentToString(final Cigar cigar,final String seq, final String ref, final int posOnRef, final int posOnRead ) {
|
public static String alignmentToString(final Cigar cigar,final String seq, final String ref, final int posOnRef, final int posOnRead ) {
|
||||||
int readPos = posOnRead;
|
int readPos = posOnRead;
|
||||||
int refPos = posOnRef;
|
int refPos = posOnRef;
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
package org.broadinstitute.sting.playground.indels;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.PrimitivePair;
|
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.Cigar;
|
import net.sf.samtools.Cigar;
|
||||||
Loading…
Reference in New Issue