diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java index 2074e1899..79c31ab8d 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/CleanedReadInjector.java @@ -1,10 +1,9 @@ -package org.broadinstitute.sting.playground.gatk.walkers.indels; +package org.broadinstitute.sting.gatk.walkers.indels; import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.cmdLine.Argument; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelIntervalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelIntervalWalker.java index 410e9ff62..e06879097 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelIntervalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelIntervalWalker.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.playground.gatk.walkers.indels; +package org.broadinstitute.sting.gatk.walkers.indels; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java index a9d4dc053..e211287c1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.playground.gatk.walkers.indels; +package org.broadinstitute.sting.gatk.walkers.indels; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.gatk.refdata.*; @@ -9,7 +9,6 @@ import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.filters.Platform454Filter; import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.playground.indels.*; import net.sf.samtools.*; @@ -338,7 +337,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker } catch (Exception e) {} } } else { - logger.debug("CLEAN: " + cigarToString(bestConsensus.cigar) + " " + bestConsensus.str ); + logger.debug("CLEAN: " + AlignmentUtils.cigarToString(bestConsensus.cigar) + " " + bestConsensus.str ); if ( indelOutput != null && bestConsensus.cigar.numCigarElements() > 1 ) { // NOTE: indels are printed out in the format specified for the low-coverage pilot1 // indel calls (tab-delimited): chr position size type sequence @@ -421,7 +420,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker // create the new consensus StringBuffer sb = new StringBuffer(); sb.append(reference.substring(0, indexOnRef)); - logger.debug("CIGAR = " + cigarToString(c)); + logger.debug("CIGAR = " + AlignmentUtils.cigarToString(c)); int indelCount = 0; int altIdx = 0; @@ -673,7 +672,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker int difference = 0; // we can move indel 'difference' bases left final int indel_length = ce2.getLength(); - String indelString = null; // inserted or deleted sequence + String indelString; // inserted or deleted sequence int period = 0; // period of the inserted/deleted sequence int indelIndexOnRef = refIndex+ce1.getLength() ; // position of the indel on the REF (first deleted base or first base after insertion) int indelIndexOnRead = readIndex+ce1.getLength(); // position of the indel on the READ (first insterted base, of first base after deletion) @@ -753,7 +752,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker // if ( ce2.getLength() >=2 ) // System.out.println(" REALIGNED TO:\n"+AlignmentUtils.alignmentToString(newCigar,readSeq,refSeq,refIndex,readIndex)+"\n"); - logger.debug("Realigning indel: " + cigarToString(cigar) + " to " + cigarToString(newCigar)); + logger.debug("Realigning indel: " + AlignmentUtils.cigarToString(cigar) + " to " + AlignmentUtils.cigarToString(newCigar)); cigar = newCigar; } @@ -794,7 +793,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker if ( getCigar().equals(cigar) ) return false; - String str = cigarToString(cigar); + String str = AlignmentUtils.cigarToString(cigar); if ( !str.contains("D") && !str.contains("I") ) return false; @@ -992,23 +991,4 @@ public class IntervalCleanerWalker extends LocusWindowWalker reads.add(r8); clean(reads, reference, GenomeLocParser.createGenomeLoc(0,0)); } - - public static String cigarToString(Cigar cig) { - if ( cig == null ) - return "null"; - - StringBuilder b = new StringBuilder(); - - for ( int i = 0 ; i < cig.numCigarElements() ; i++ ) { - char c='?'; - switch ( cig.getCigarElement(i).getOperator() ) { - case M : c = 'M'; break; - case D : c = 'D'; break; - case I : c = 'I'; break; - } - b.append(cig.getCigarElement(i).getLength()); - b.append(c); - } - return b.toString(); - } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalMergerWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalMergerWalker.java index 51cb2e95e..9358162c2 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalMergerWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalMergerWalker.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.playground.gatk.walkers.indels; +package org.broadinstitute.sting.gatk.walkers.indels; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/MismatchIntervalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/MismatchIntervalWalker.java index efa3fb3af..f79fb89c2 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/MismatchIntervalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/MismatchIntervalWalker.java @@ -1,5 +1,5 @@ -package org.broadinstitute.sting.playground.gatk.walkers.indels; +package org.broadinstitute.sting.gatk.walkers.indels; import net.sf.samtools.*; import org.broadinstitute.sting.gatk.refdata.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/SNPClusterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/SNPClusterWalker.java index 64c0965bd..78fe1dd1e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/SNPClusterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/SNPClusterWalker.java @@ -1,5 +1,5 @@ -package org.broadinstitute.sting.playground.gatk.walkers.indels; +package org.broadinstitute.sting.gatk.walkers.indels; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.LocusContext; diff --git a/java/src/org/broadinstitute/sting/playground/indels/AlignmentUtils.java b/java/src/org/broadinstitute/sting/utils/AlignmentUtils.java similarity index 67% rename from java/src/org/broadinstitute/sting/playground/indels/AlignmentUtils.java rename to java/src/org/broadinstitute/sting/utils/AlignmentUtils.java index 3f8cc2adb..dd4edf521 100644 --- a/java/src/org/broadinstitute/sting/playground/indels/AlignmentUtils.java +++ b/java/src/org/broadinstitute/sting/utils/AlignmentUtils.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.playground.indels; +package org.broadinstitute.sting.utils; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; @@ -6,7 +6,6 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.picard.reference.ReferenceSequence; import org.broadinstitute.sting.playground.utils.CountedObject; -import org.broadinstitute.sting.utils.*; import java.util.*; @@ -175,103 +174,6 @@ public class AlignmentUtils { return n; } - - - /** Reads through the alignment cigar and returns all indels found in the alignment as a collection - * of Indel objects. - * @param c alignment cigar - * @param start alignment start. NOTE: if cigar starts with clipped bases (S), alignment start position should be set after them according to SAM format specification. - * @return - */ - public static Collection extractIndels(final Cigar c, final int start) { - // - // firstpos,lastpos span of the indel will be interpreted as follows: - // any alignment that ends strictly before firstpos or starts strictly after lastpos - // on the *reference* (not inclusive!) does not overlap with an indel; in the case of - // insertion it will result in firstpos > lastpos! - // lastpos - // | firstpos - // | | - // v v - // ---------III----- Ref Insertion: bases I are not in the ref; any alignment that starts - // after lastpos or ends before firstpos *on the reference* - // is completely over the reference bases to the right or to - // the left, respectively, of the insertion site - // - // firstpos - // | lastpos - // | | - // v v - //------------------ Ref Deletion: any alignment that ends before firstpos or starts after lastpos - // -----DDD--- alignment on the reference does not overlap with the deletion - int runninglength = start; // position on the original reference; start = alignment start position - - List indels = new ArrayList(4); - - if ( c.numCigarElements() <= 1 ) return indels; // most of the reads have no indels, save a few cycles by returning early - - for ( int i = 0 ; i < c.numCigarElements() ; i++ ) { - - final CigarElement ce = c.getCigarElement(i); - Indel curr_indel = null; - - switch(ce.getOperator()) { - case I: - curr_indel = new Indel(runninglength, ce.getLength(), Indel.IndelType.I); - if ( i == 0 ) System.out.println("WARNING: Indel at start of the read"); - if ( i == c.numCigarElements() - 1) System.out.println("WARNING: Indel at end of the read"); - break; - case D: curr_indel = new Indel(runninglength, ce.getLength(), Indel.IndelType.D); - if ( i == 0 ) System.out.println("WARNING: Indel at start of the read"); - if ( i == c.numCigarElements() - 1) System.out.println("WARNING: Indel at end of the read"); - runninglength += ce.getLength(); - break; - case M: runninglength += ce.getLength(); break; // advance along the gapless block in the alignment - case H: - case S: break; // just skip hard and soft-clipped bases; according to SAM format specification, alignment start position on the reference - // points to where the actually aligned (not clipped) bases go, so we do not need to increment reference position here - default : - throw new IllegalArgumentException("Unexpected operator in cigar string: "+ce.getOperator()); - } - - if ( curr_indel == null ) continue; // element was not an indel, go grab next element... - - indels.add(curr_indel); // this is a new indel. Add it. - } // end for loop over all alignment cigar elements - - return indels; - } // end extractIndels() method - - /** Reads through the alignment specified in the record and returns all indels found in the alignment as a collection - * of Indel objects. If read is not mapped, silently returns an empty collection. - * @param r - * @return - */ - public static Collection extractIndels(SAMRecord r) { - if ( r.getReadUnmappedFlag() ) return new ArrayList(); - return extractIndels(r.getCigar(), r.getAlignmentStart()); - } - - /** Extracts indels from the specified record (@see #extractIndels()) and updates the provided tree object. - * Multiple occurences of the same indel (i.e. with support from multiple reads) will be grouped together - * in one counted object held by the set. - * - * @param r - * @param t - */ - public static void collectAndCountIndels(SAMRecord r, TreeSet > t) { - Collection indels = AlignmentUtils.extractIndels(r); - for ( Indel ind : indels ) { - CountedObject ci = new CountedObject(ind); - CountedObject found = t.floor(ci); -// CountedObject found2 = t.ceiling(ci); - - if ( ci.equals( found ) ) found.increment(); // we did find our indel, advance the counter - else t.add(ci); // this is a new indel. Add it. - - } - - } public static String toString(Cigar cig) { StringBuilder b = new StringBuilder(); @@ -294,6 +196,25 @@ public class AlignmentUtils { return alignmentToString( cigar, seq, ref, posOnRef, 0 ); } + public static String cigarToString(Cigar cig) { + if ( cig == null ) + return "null"; + + StringBuilder b = new StringBuilder(); + + for ( int i = 0 ; i < cig.numCigarElements() ; i++ ) { + char c='?'; + switch ( cig.getCigarElement(i).getOperator() ) { + case M : c = 'M'; break; + case D : c = 'D'; break; + case I : c = 'I'; break; + } + b.append(cig.getCigarElement(i).getLength()); + b.append(c); + } + return b.toString(); + } + public static String alignmentToString(final Cigar cigar,final String seq, final String ref, final int posOnRef, final int posOnRead ) { int readPos = posOnRead; int refPos = posOnRef; diff --git a/java/src/org/broadinstitute/sting/playground/indels/SWPairwiseAlignment.java b/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java similarity index 97% rename from java/src/org/broadinstitute/sting/playground/indels/SWPairwiseAlignment.java rename to java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java index a225b5016..2f31b4cc7 100755 --- a/java/src/org/broadinstitute/sting/playground/indels/SWPairwiseAlignment.java +++ b/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java @@ -1,6 +1,5 @@ -package org.broadinstitute.sting.playground.indels; +package org.broadinstitute.sting.utils; -import org.broadinstitute.sting.utils.PrimitivePair; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.Cigar;