massive changes everywhere; lots of bugs fixed; methods moved around; computation and printout of overall stats added; now decides whether to accept or reject 'improvement'; writes alignments into two output sam files (unmodified reads/failed piles into one, realigned piles into the other); special treat for paranoids: writes third sam file with all the analyzed reads, unmodified
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@197 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0331cd8e95
commit
f47a214f96
|
|
@ -113,8 +113,14 @@ public class AlignmentUtils {
|
|||
Indel curr_indel = null;
|
||||
|
||||
switch(ce.getOperator()) {
|
||||
case I: curr_indel = new Indel(runninglength, ce.getLength(), Indel.IndelType.I); break;
|
||||
case I:
|
||||
curr_indel = new Indel(runninglength, ce.getLength(), Indel.IndelType.I);
|
||||
if ( i == 0 ) System.out.println("WARNING: Indel at start!");
|
||||
if ( i == c.numCigarElements() - 1) System.out.println("WARNING: Indel at end!");
|
||||
break;
|
||||
case D: curr_indel = new Indel(runninglength, ce.getLength(), Indel.IndelType.D);
|
||||
if ( i == 0 ) System.out.println("WARNING: Indel at start!");
|
||||
if ( i == c.numCigarElements() - 1) System.out.println("WARNING: Indel at end!");
|
||||
runninglength += ce.getLength();
|
||||
break;
|
||||
case M: runninglength += ce.getLength(); break; // advance along the gapless block in the alignment
|
||||
|
|
@ -152,6 +158,7 @@ public class AlignmentUtils {
|
|||
for ( Indel ind : indels ) {
|
||||
CountedObject<Indel> ci = new CountedObject<Indel>(ind);
|
||||
CountedObject<Indel> found = t.floor(ci);
|
||||
// CountedObject<Indel> found2 = t.ceiling(ci);
|
||||
|
||||
if ( ci.equals( found ) ) found.increment(); // we did find our indel, advance the counter
|
||||
else t.add(ci); // this is a new indel. Add it.
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ public class Indel implements Interval {
|
|||
* @param i Another interval
|
||||
* @return true iff intervals overlap
|
||||
*/
|
||||
@Override
|
||||
|
||||
public boolean overlapsP(Interval i) {
|
||||
return ! disjointP(i); //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
|
@ -126,7 +126,6 @@ public class Indel implements Interval {
|
|||
* @param i Another interval
|
||||
* @return true iff intervals do not overlap
|
||||
*/
|
||||
@Override
|
||||
public boolean disjointP(Interval i) {
|
||||
return i.getStop() < this.getStart() || i.getStart() > this.getStop();
|
||||
}
|
||||
|
|
@ -135,7 +134,6 @@ public class Indel implements Interval {
|
|||
* has length of 0.
|
||||
* @return length of the event on the original, unmodified reference
|
||||
*/
|
||||
@Override
|
||||
public long getLength() {
|
||||
if ( mType == IndelType.I ) return 0;
|
||||
return mLength;
|
||||
|
|
@ -150,6 +148,6 @@ public class Indel implements Interval {
|
|||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return (int)( mStart << 2 + mLength );
|
||||
return (int)( mStart << 6 + mStart + mLength );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,8 +11,6 @@ import javax.swing.filechooser.FileNameExtensionFilter;
|
|||
import edu.mit.broad.picard.cmdline.CommandLineProgram;
|
||||
import edu.mit.broad.picard.cmdline.Option;
|
||||
import edu.mit.broad.picard.cmdline.Usage;
|
||||
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
||||
import edu.mit.broad.picard.reference.ReferenceSequenceFileFactory;
|
||||
import edu.mit.broad.picard.reference.ReferenceSequenceFileWalker;
|
||||
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||
|
||||
|
|
@ -25,6 +23,10 @@ public class IndelInspector extends CommandLineProgram {
|
|||
@Usage(programVersion="1.0") public String USAGE = "Investigates indels called in the alignment data\n";
|
||||
@Option(shortName="I", doc="SAM or BAM file for calling",optional=true) public File INPUT_FILE;
|
||||
@Option(shortName="L",doc="Genomic interval to run on, as contig[:start[-stop]]; whole genome if not specified", optional=true) public String GENOME_LOCATION;
|
||||
@Option(shortName="V",doc="Verbosity level: SILENT, PILESUMMARY, ALIGNMENTS", optional=true) public String VERBOSITY_LEVEL;
|
||||
@Option(doc="Output file (sam or bam) for non-indel related reads and indel reads that were not improved") public String OUT1;
|
||||
@Option(doc="Output file (sam or bam) for improved (realigned) indel related reads") public String OUT2;
|
||||
@Option(doc="[paranoid] Output \"control\" file (sam or bam): all reads picked and processed by this tool will be also saved, unmodified, into this file", optional=true) public String OUTC;
|
||||
@Option(doc="Error counting mode: MM - count mismatches only, ERR - count errors (arachne style), MG - count mismatches and gaps as one error each") public String ERR_MODE;
|
||||
@Option(doc="Maximum number of errors allowed (see ERR_MODE)") public Integer MAX_ERRS;
|
||||
// @Option(shortName="R", doc="Reference fasta or fasta.gz file") public File REF_FILE;
|
||||
|
|
@ -46,7 +48,7 @@ public class IndelInspector extends CommandLineProgram {
|
|||
System.out.println("Unknown value specified for ERR_MODE");
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
final SAMFileReader samReader = new SAMFileReader(getInputFile(INPUT_FILE,"/broad/1KG/"));
|
||||
samReader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
||||
|
||||
|
|
@ -60,12 +62,26 @@ public class IndelInspector extends CommandLineProgram {
|
|||
ReferenceSequence contig_seq = null;
|
||||
|
||||
IndelRecordPileCollector col = null;
|
||||
PileBuilder pileBuilder = new PileBuilder();
|
||||
PassThroughWriter ptWriter = new PassThroughWriter(OUT1,samReader.getFileHeader());
|
||||
PileBuilder pileBuilder = new PileBuilder(OUT2,samReader.getFileHeader(),ptWriter);
|
||||
|
||||
SAMFileWriter controlWriter = null;
|
||||
if ( OUTC != null ) controlWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(samReader.getFileHeader(),false,new File(OUTC));
|
||||
|
||||
try {
|
||||
col = new IndelRecordPileCollector(new DiscardingReceiver(), pileBuilder );
|
||||
col = new IndelRecordPileCollector(ptWriter, pileBuilder, controlWriter );
|
||||
} catch(Exception e) { System.err.println(e.getMessage()); }
|
||||
if ( col == null ) return 1;
|
||||
|
||||
|
||||
if ( VERBOSITY_LEVEL == null ) VERBOSITY_LEVEL = new String("SILENT");
|
||||
if ( VERBOSITY_LEVEL.toUpperCase().equals("SILENT")) pileBuilder.setVerbosity(pileBuilder.SILENT);
|
||||
else if ( VERBOSITY_LEVEL.toUpperCase().equals("PILESUMMARY") ) pileBuilder.setVerbosity(pileBuilder.PILESUMMARY);
|
||||
else if ( VERBOSITY_LEVEL.toUpperCase().equals("ALIGNMENTS") ) pileBuilder.setVerbosity(pileBuilder.ALIGNMENTS);
|
||||
else {
|
||||
System.out.println("Unrecognized VERBOSITY_LEVEL setting.");
|
||||
return 1;
|
||||
}
|
||||
|
||||
String cur_contig = null;
|
||||
int counter = 0;
|
||||
|
||||
|
|
@ -120,9 +136,14 @@ public class IndelInspector extends CommandLineProgram {
|
|||
col.receive(r);
|
||||
|
||||
}
|
||||
|
||||
pileBuilder.printStats();
|
||||
System.out.println("done.");
|
||||
col.printLengthHistograms();
|
||||
samReader.close();
|
||||
pileBuilder.close();
|
||||
ptWriter.close();
|
||||
if ( controlWriter != null ) controlWriter.close();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -89,6 +89,8 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
|||
private RecordReceiver defaultReceiver; // we will send there records that do not overlap with regions of interest
|
||||
private RecordPileReceiver indelPileReceiver; // piles over indel regions will be sent there
|
||||
|
||||
private SAMFileWriter controlWriter;
|
||||
|
||||
private String referenceSequence;
|
||||
|
||||
public String memStatsString() {
|
||||
|
|
@ -97,7 +99,12 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
|||
//+" Bndries="+mIndelRegionStart +":"+ mIndelRegionStop;
|
||||
}
|
||||
|
||||
public IndelRecordPileCollector(RecordReceiver rr, RecordPileReceiver rp) throws java.io.IOException {
|
||||
public IndelRecordPileCollector(RecordReceiver rr, RecordPileReceiver rp) throws java.io.IOException {
|
||||
this(rr,rp,null);
|
||||
}
|
||||
|
||||
|
||||
public IndelRecordPileCollector(RecordReceiver rr, RecordPileReceiver rp, SAMFileWriter cw) throws java.io.IOException {
|
||||
mRecordPile = new LinkedList<SAMRecord>();
|
||||
mAllIndels = new TreeSet<CountedObject<Indel> >(
|
||||
new CountedObjectComparatorAdapter<Indel>(new IntervalComparator()));
|
||||
|
|
@ -113,6 +120,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
|||
defaultReceiver = rr;
|
||||
indelPileReceiver = rp;
|
||||
referenceSequence = null;
|
||||
controlWriter = cw;
|
||||
setWaitState();
|
||||
}
|
||||
|
||||
|
|
@ -146,6 +154,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
|||
SAMRecord r = i.next();
|
||||
if ( r.getAlignmentEnd() <= pos ) {
|
||||
defaultReceiver.receive(r);
|
||||
if ( controlWriter != null ) controlWriter.addAlignment(r);
|
||||
i.remove();
|
||||
} else break;
|
||||
}
|
||||
|
|
@ -161,6 +170,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
|||
SAMRecord r = i.next();
|
||||
if ( r.getAlignmentStart() >= pos ) {
|
||||
defaultReceiver.receive(r);
|
||||
if ( controlWriter != null ) controlWriter.addAlignment(r);
|
||||
i.remove();
|
||||
} else break;
|
||||
}
|
||||
|
|
@ -250,7 +260,10 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
|||
}
|
||||
|
||||
// no indels or avoiding indels in bad region: send all records to defaultReceiver and clear the pile
|
||||
for ( SAMRecord r : mRecordPile ) defaultReceiver.receive(r);
|
||||
for ( SAMRecord r : mRecordPile ) {
|
||||
defaultReceiver.receive(r);
|
||||
if ( controlWriter != null ) controlWriter.addAlignment(r);
|
||||
}
|
||||
setWaitState();
|
||||
return;
|
||||
}
|
||||
|
|
@ -325,7 +338,13 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
|||
System.out.print(finalPile.size() + " reads in the pile;") ;
|
||||
System.out.println(formatRange(finalTrain));
|
||||
indelPileReceiver.receive(finalPile);
|
||||
} else for ( SAMRecord r : finalPile ) defaultReceiver.receive(r);
|
||||
if ( controlWriter != null ) for ( SAMRecord r : finalPile ) controlWriter.addAlignment(r);
|
||||
} else {
|
||||
for ( SAMRecord r : finalPile ) {
|
||||
defaultReceiver.receive(r);
|
||||
controlWriter.addAlignment(r);
|
||||
}
|
||||
}
|
||||
finalPile.clear();
|
||||
finalTrain.clear();
|
||||
curr_stop = -1;
|
||||
|
|
|
|||
|
|
@ -240,7 +240,7 @@ public class MultipleAlignment implements Iterable<Integer> {
|
|||
/** Returns a (multiline) string that represents the alignment visually: the sequences are appropriately
|
||||
* shifted and ready for printout;
|
||||
*/
|
||||
public String toString(boolean inorder) {
|
||||
public String toString(boolean inorder, boolean dotprint) {
|
||||
|
||||
StringBuilder b = new StringBuilder();
|
||||
java.util.Formatter frmt = new java.util.Formatter(b);
|
||||
|
|
@ -278,8 +278,16 @@ public class MultipleAlignment implements Iterable<Integer> {
|
|||
for ( int i = 0 ; i < seqs.size() ; i++ ) {
|
||||
int index = (inorder ? perm[i] : i);
|
||||
frmt.format("%3d:", ext_ids.get(index));
|
||||
skipN(alignment_offsets.get(index)+ first_offset,b);
|
||||
b.append(seqs.get(index));
|
||||
int pos = alignment_offsets.get(index)+ first_offset; // start position on the consensus sequence
|
||||
skipN(pos,b);
|
||||
String aSeq = seqs.get(index);
|
||||
if ( dotprint ) {
|
||||
for ( int j = 0 ; j < aSeq.length() ; j++, pos++ ) {
|
||||
if ( Character.toUpperCase(aSeq.charAt(j)) ==
|
||||
Character.toUpperCase(consensusString[3][pos]) ) b.append('.');
|
||||
else b.append(aSeq.charAt(j));
|
||||
}
|
||||
} else b.append(aSeq);
|
||||
b.append('\n');
|
||||
}
|
||||
// b.append(best_mm+" mismatches, "+ next_mm + " next best, " + getOverlap() + " overlapping bases, distance=" + distance() + "\n");
|
||||
|
|
@ -290,7 +298,7 @@ public class MultipleAlignment implements Iterable<Integer> {
|
|||
return consensus.getSequence();
|
||||
}
|
||||
|
||||
public String toString() { return toString(true); }
|
||||
public String toString() { return toString(true, false); }
|
||||
|
||||
public int size() { return seqs.size(); }
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,34 @@
|
|||
package org.broadinstitute.sting.playground.indels;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMFileWriterFactory;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Mar 25, 2009
|
||||
* Time: 8:27:09 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class PassThroughWriter implements RecordReceiver {
|
||||
private SAMFileWriter writer;
|
||||
|
||||
public PassThroughWriter( File f, SAMFileHeader h) {
|
||||
writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(h, false, f);
|
||||
}
|
||||
|
||||
public PassThroughWriter(String s, SAMFileHeader h) {
|
||||
this(new File(s), h);
|
||||
}
|
||||
|
||||
public void receive(SAMRecord r) {
|
||||
//To change body of implemented methods use File | Settings | File Templates.
|
||||
writer.addAlignment(r);
|
||||
}
|
||||
|
||||
public void close() { writer.close() ; }
|
||||
}
|
||||
|
|
@ -3,6 +3,8 @@ package org.broadinstitute.sting.playground.indels;
|
|||
import net.sf.samtools.*;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
|
||||
import org.broadinstitute.sting.utils.PrimitivePair;
|
||||
import org.broadinstitute.sting.playground.utils.CountedObject;
|
||||
import org.broadinstitute.sting.playground.utils.CountedObjectComparatorAdapter;
|
||||
|
|
@ -17,6 +19,30 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
private String referenceSequence;
|
||||
private int reference_start;
|
||||
|
||||
private int processed_piles = 0;
|
||||
private int improved_piles = 0;
|
||||
private int unmodified_piles = 0;
|
||||
private int failed_piles = 0;
|
||||
private int indels_improved = 0;
|
||||
private int indel_improvement_cnt = 0;
|
||||
private int indels_discarded = 0;
|
||||
private int indels_added = 0;
|
||||
private int indels_added_cnt = 0;
|
||||
private int total_mismatches_count_in_improved = 0;
|
||||
private int total_mismatches_count_in_failed = 0;
|
||||
private int total_improved_mismatches_count = 0;
|
||||
private int total_reads_in_improved = 0;
|
||||
private int total_reads_in_failed = 0;
|
||||
private int total_alignments_modified = 0;
|
||||
|
||||
public final static int SILENT = 0;
|
||||
public final static int PILESUMMARY = 1;
|
||||
public final static int ALIGNMENTS = 2;
|
||||
|
||||
private int mVerbosityLevel = SILENT;
|
||||
|
||||
private SAMFileWriter samWriter;
|
||||
private RecordReceiver failedPileReceiver;
|
||||
|
||||
private static class SelectedPair {
|
||||
private int i_;
|
||||
|
|
@ -68,9 +94,15 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
|
||||
}
|
||||
|
||||
public PileBuilder() {
|
||||
public PileBuilder(File f, SAMFileHeader h, RecordReceiver fr) {
|
||||
samWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(h,false,f);
|
||||
referenceSequence = null;
|
||||
reference_start = -1;
|
||||
failedPileReceiver = fr;
|
||||
}
|
||||
|
||||
public PileBuilder(String s, SAMFileHeader h, RecordReceiver fr) {
|
||||
this(new File(s),h, fr);
|
||||
}
|
||||
|
||||
public void setReferenceSequence(String seq, int start) {
|
||||
|
|
@ -84,6 +116,10 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
}
|
||||
|
||||
public void receive(Collection<SAMRecord> c) {
|
||||
|
||||
//TODO: if read starts/ends with an indel (insertion, actually), we detect this as a "different" indel introduced during cleanup.
|
||||
processed_piles++;
|
||||
|
||||
IndexedSequence[] seqs = new IndexedSequence[c.size()];
|
||||
int i = 0;
|
||||
int startOnRef = 1000000000; // absolute start (leftmost) position of the pile of reads on the ref
|
||||
|
|
@ -101,18 +137,24 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
TreeSet< CountedObject<Indel> > all_indels = new TreeSet< CountedObject<Indel> >(
|
||||
new CountedObjectComparatorAdapter<Indel>(new IntervalComparator()));
|
||||
|
||||
SequencePile originalAligns = new SequencePile(pileRef);
|
||||
SequencePile originalAligns = null;
|
||||
if ( mVerbosityLevel >= ALIGNMENTS ) originalAligns = new SequencePile(pileRef);
|
||||
|
||||
for ( SAMRecord r : c ) {
|
||||
originalAligns.addAlignedSequence(r.getReadString(), r.getReadNegativeStrandFlag(),
|
||||
r.getCigar(), r.getAlignmentStart() - startOnRef );
|
||||
if ( mVerbosityLevel >= ALIGNMENTS ) {
|
||||
originalAligns.addAlignedSequence(r.getReadString(), r.getReadNegativeStrandFlag(),
|
||||
r.getCigar(), r.getAlignmentStart() - startOnRef );
|
||||
}
|
||||
totalMismatches += AlignmentUtils.numMismatches(r,referenceSequence);
|
||||
AlignmentUtils.collectAndCountIndels(r,all_indels);
|
||||
}
|
||||
|
||||
System.out.println("\n#############################################################################");
|
||||
System.out.println("ORIGINAL ALIGNMENT: \n");
|
||||
originalAligns.dotprint(true);
|
||||
System.out.println("\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") ;
|
||||
if ( mVerbosityLevel >= ALIGNMENTS ) {
|
||||
System.out.println("\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++");
|
||||
System.out.println("ORIGINAL ALIGNMENT: \n");
|
||||
originalAligns.dotprint(true);
|
||||
System.out.println("\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") ;
|
||||
}
|
||||
|
||||
List<MultipleAlignment> piles = doMultipleAlignment2(seqs);
|
||||
|
||||
|
|
@ -120,91 +162,244 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
// System.out.print("; diameter of PILE1: "+ diameter(alignments1));
|
||||
// System.out.println("; diameter of PILE2: "+ diameter(alignments2));
|
||||
|
||||
SymmetricMatrix d = new SymmetricMatrix(piles.size());
|
||||
for ( int n = 0 ; n < piles.size() ; n++ ) {
|
||||
d.set(n,n,diameter(piles.get(n)));
|
||||
for ( int m = n+1 ; m < piles.size() ; m++ ) {
|
||||
d.set(n,m,distance(piles.get(n), piles.get(m)));
|
||||
SymmetricMatrix d = new SymmetricMatrix(piles.size());
|
||||
for ( int n = 0 ; n < piles.size() ; n++ ) {
|
||||
d.set(n,n,diameter(piles.get(n)));
|
||||
for ( int m = n+1 ; m < piles.size() ; m++ ) {
|
||||
d.set(n,m,distance(piles.get(n), piles.get(m)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int new_mismatches = 0 ; // number of mismatches after re-alignment:
|
||||
TreeSet< CountedObject<Indel> > new_indels = new TreeSet< CountedObject<Indel> >(
|
||||
new CountedObjectComparatorAdapter<Indel>(new IntervalComparator())
|
||||
); // new indels after realignment
|
||||
int shifted_reads = 0;
|
||||
int smashed_reads = 0;
|
||||
int new_mismatches = 0 ; // number of mismatches after re-alignment:
|
||||
TreeSet< CountedObject<Indel> > new_indels = new TreeSet< CountedObject<Indel> >(
|
||||
new CountedObjectComparatorAdapter<Indel>(new IntervalComparator())
|
||||
); // new indels after realignment
|
||||
int shifted_reads = 0;
|
||||
int smashed_reads = 0;
|
||||
|
||||
List<SAMRecord> as_list = (List<SAMRecord>)c; // ugly hack; need this to access records by ids
|
||||
List<SAMRecord> as_list = (List<SAMRecord>)c; // ugly hack; need this to access records by ids
|
||||
|
||||
System.out.println(d.format("%8.4g"));
|
||||
for ( int n = 0 ; n < piles.size() ; n++ ) {
|
||||
if ( mVerbosityLevel >= PILESUMMARY ) System.out.println(d.format("%8.4g"));
|
||||
|
||||
for ( int n = 0 ; n < piles.size() ; n++ ) {
|
||||
// SWPairwiseAlignment consToRef = new SWPairwiseAlignment(pileRef,piles.get(n).getConsensus(),2.0,-10.0,-2.0,-1.0);
|
||||
SWPairwiseAlignment consToRef = new SWPairwiseAlignment(pileRef,piles.get(n).getConsensus(),3.0,-1.0,-4.0,-0.5);
|
||||
|
||||
if ( mVerbosityLevel >= ALIGNMENTS ) {
|
||||
|
||||
System.out.println("PILE " + n + " to REF ("+ (consToRef.getCigar().numCigarElements()-1)/2 +" indels):");
|
||||
System.out.println(consToRef.toString());
|
||||
System.out.println("PILE " + n +" (READS):\n" +piles.get(n).toString(true,true));
|
||||
}
|
||||
// SequencePile pileAligns = new SequencePile(pileRef);
|
||||
|
||||
MultipleAlignment ma = piles.get(n);
|
||||
for ( Integer id : ma ) {
|
||||
SAMRecord r = as_list.get(id);
|
||||
int cons_offset = ma.getOffsetWrtConsensus(id); // offset of the read 'id' wrt multiple alignment's full consensus seq
|
||||
|
||||
/*
|
||||
System.out.println("id=" + id +": offset on consensus="+cons_offset+
|
||||
"; consensus wrt ref chunk="+consToRef.getAlignmentStart2wrt1()+"; chunk start="+startOnRef);
|
||||
*/
|
||||
|
||||
int ref_offset = cons_offset + startOnRef + consToRef.getAlignmentStart2wrt1()+indelCorrection(cons_offset,consToRef.getCigar());
|
||||
if ( ref_offset != r.getAlignmentStart()) shifted_reads++;
|
||||
Cigar cig = buildCigar(cons_offset, r.getReadLength(), consToRef.getCigar());
|
||||
/*
|
||||
if ( id == 9 ) {
|
||||
System.out.println("ref_offset="+ref_offset+"; orig_ref_off="+r.getAlignmentStart()+"; "+
|
||||
AlignmentUtils.toString(cig));
|
||||
}
|
||||
|
||||
System.out.println("adding "+id+" at "+ (ref_offset - refStarttemp));
|
||||
pileAligns.addAlignedSequence(r.getReadString(), r.getReadNegativeStrandFlag(), cig, ref_offset - refStarttemp);
|
||||
*/
|
||||
if ( cig.numCigarElements() != r.getCigar().numCigarElements() ) smashed_reads++;
|
||||
|
||||
if ( ref_offset != r.getAlignmentStart() || cig.numCigarElements() != r.getCigar().numCigarElements() ) total_alignments_modified++;
|
||||
|
||||
SAMRecord rtest = new SAMRecord(r.getHeader());
|
||||
rtest.setAlignmentStart(ref_offset);
|
||||
rtest.setReadString(r.getReadString());
|
||||
rtest.setReadUmappedFlag(r.getReadUnmappedFlag());
|
||||
rtest.setCigar(cig);
|
||||
AlignmentUtils.collectAndCountIndels(rtest,new_indels);
|
||||
new_mismatches += AlignmentUtils.numMismatches(rtest,referenceSequence);
|
||||
}
|
||||
// pileAligns.colorprint(true);
|
||||
}
|
||||
|
||||
boolean pile_improved = false;
|
||||
boolean pile_unmodified = false;
|
||||
boolean pile_failed = false;
|
||||
|
||||
double mmChangePct = Math.abs((new_mismatches - totalMismatches)*100.0/totalMismatches);
|
||||
|
||||
if ( shifted_reads == 0 && smashed_reads == 0 ) pile_unmodified = true;
|
||||
else {
|
||||
if ( new_mismatches < totalMismatches ||
|
||||
mmChangePct < 10.0 && ( new_indels.size() < all_indels.size() )
|
||||
) pile_improved = true;
|
||||
else pile_failed = true;
|
||||
}
|
||||
|
||||
if ( pile_improved ) {
|
||||
total_mismatches_count_in_improved +=totalMismatches;
|
||||
total_improved_mismatches_count += new_mismatches;
|
||||
total_reads_in_improved += c.size() ;
|
||||
}
|
||||
|
||||
if ( pile_failed ) {
|
||||
total_mismatches_count_in_failed += totalMismatches;
|
||||
total_reads_in_failed += c.size();
|
||||
}
|
||||
int discovered_indels = 0;
|
||||
int discovered_support = 0;
|
||||
int existing_indels = 0;
|
||||
int existing_support = 0;
|
||||
int existing_support_new = 0;
|
||||
int discarded_indels = 0;
|
||||
for ( CountedObject<Indel> ind : new_indels ) {
|
||||
//System.out.print("new indel: "+ind.getObject().getStart()+"+"+ind.getObject().getStop());
|
||||
if ( ! all_indels.contains(ind) ) {
|
||||
//System.out.println(" (DISCOVERED)");
|
||||
discovered_indels++;
|
||||
discovered_support += ind.getCount();
|
||||
if ( pile_improved ) {
|
||||
indels_added++;
|
||||
indels_added_cnt += ind.getCount();
|
||||
}
|
||||
} else {
|
||||
//System.out.println(" (EXISTING)");
|
||||
existing_indels++;
|
||||
existing_support_new += ind.getCount();
|
||||
if ( pile_improved && ( ind.getCount() > all_indels.floor(ind).getCount() ) ) {
|
||||
if ( ! ind.equals(all_indels.floor(ind))) System.out.println("ERROR MATCHING INDELS!!!") ;
|
||||
indels_improved++;
|
||||
indel_improvement_cnt += ( ind.getCount() - all_indels.floor(ind).getCount() );
|
||||
}
|
||||
}
|
||||
}
|
||||
for ( CountedObject<Indel> ind : all_indels ) {
|
||||
//System.out.print("old indel: "+ind.getObject().getStart()+"+"+ind.getObject().getStop());
|
||||
if ( ! new_indels.contains(ind )) {
|
||||
//System.out.println(" (DISCARDED)");
|
||||
discarded_indels++;
|
||||
if ( pile_improved ) indels_discarded++;
|
||||
} else {
|
||||
//System.out.println(" (KEPT)");
|
||||
existing_support += ind.getCount();
|
||||
}
|
||||
}
|
||||
|
||||
if ( pile_improved ) improved_piles++;
|
||||
if ( pile_unmodified ) unmodified_piles++;
|
||||
if ( pile_failed ) failed_piles++;
|
||||
|
||||
if ( mVerbosityLevel >= PILESUMMARY ) {
|
||||
System.out.print("TOTAL MISMATCHES: "+totalMismatches +" --> "+new_mismatches);
|
||||
if ( totalMismatches > new_mismatches ) System.out.print("(-");
|
||||
else System.out.print("(+");
|
||||
System.out.printf("%.2f%%)%n",mmChangePct);
|
||||
|
||||
System.out.println("CONFIRMED INDELS: "+existing_indels);
|
||||
System.out.print("CONFIRMED INDEL SUPPORT: "+existing_support + " --> " + existing_support_new );
|
||||
if ( existing_support > existing_support_new ) System.out.print("(-");
|
||||
else System.out.print("(+");
|
||||
System.out.printf("%.2f%%)%n",Math.abs((existing_support- existing_support_new)*100.0/existing_support));
|
||||
System.out.println("DROPPED INDELS: " + discarded_indels);
|
||||
System.out.println("DISCOVERED INDELS: " + discovered_indels) ;
|
||||
System.out.println("DISCOVERED INDELS SUPPORT: "+discovered_support);
|
||||
System.out.println("ALIGNMENTS SHIFTED: "+shifted_reads);
|
||||
System.out.println("ALIGNMENTS WITH GAPS CHANGED: "+smashed_reads);
|
||||
|
||||
if ( pile_improved ) System.out.println("OUTCOME: IMPROVED");
|
||||
if ( pile_unmodified ) System.out.println("OUTCOME: UNCHANGED");
|
||||
if ( pile_failed ) System.out.println("OUTCOME: FAILED");
|
||||
|
||||
System.out.println("\n#############################################################################\n");
|
||||
}
|
||||
// finally, writing stuff:
|
||||
for ( int n = 0 ; n < piles.size() ; n++ ) {
|
||||
|
||||
SWPairwiseAlignment consToRef = new SWPairwiseAlignment(pileRef,piles.get(n).getConsensus(),3.0,-1.0,-4.0,-0.5);
|
||||
|
||||
System.out.println("PILE " + n + " to REF ("+ (consToRef.getCigar().numCigarElements()-1)/2 +" indels):");
|
||||
System.out.println(consToRef.toString());
|
||||
System.out.println("PILE " + n +" (READS):\n" +piles.get(n).toString());
|
||||
|
||||
MultipleAlignment ma = piles.get(n);
|
||||
for ( Integer id : ma ) {
|
||||
|
||||
Iterator<Integer> id_iter = ma.sequenceIdByOffsetIterator();
|
||||
while ( id_iter.hasNext() ) {
|
||||
|
||||
int id = id_iter.next();
|
||||
|
||||
SAMRecord r = as_list.get(id);
|
||||
if ( pile_failed || pile_unmodified ) {
|
||||
failedPileReceiver.receive(r); // nothing to do, send failed piles directly for writing
|
||||
continue;
|
||||
}
|
||||
// we improved stuff!! let's reset the alignment parameters!
|
||||
|
||||
int cons_offset = ma.getOffsetWrtConsensus(id); // offset of the read 'id' wrt multiple alignment's full consensus seq
|
||||
int ref_offset = cons_offset + startOnRef + consToRef.getAlignmentStart2wrt1();
|
||||
if ( ref_offset != r.getAlignmentStart()) shifted_reads++;
|
||||
|
||||
// offset of the realigned read r on the reference
|
||||
int ref_offset = cons_offset + startOnRef + consToRef.getAlignmentStart2wrt1()+indelCorrection(cons_offset,consToRef.getCigar());
|
||||
|
||||
r.setAlignmentStart(ref_offset);
|
||||
|
||||
Cigar cig = buildCigar(cons_offset, r.getReadLength(), consToRef.getCigar());
|
||||
if ( cig.numCigarElements() != r.getCigar().numCigarElements() ) smashed_reads++;
|
||||
SAMRecord rtest = new SAMRecord(r.getHeader());
|
||||
rtest.setAlignmentStart(ref_offset);
|
||||
rtest.setReadString(r.getReadString());
|
||||
rtest.setReadUmappedFlag(r.getReadUnmappedFlag());
|
||||
rtest.setCigar(cig);
|
||||
AlignmentUtils.collectAndCountIndels(rtest,new_indels);
|
||||
new_mismatches += AlignmentUtils.numMismatches(rtest,referenceSequence);
|
||||
}
|
||||
|
||||
}
|
||||
r.setCigar(cig);
|
||||
|
||||
r.setAttribute("NM",new Integer(AlignmentUtils.numMismatches(r,referenceSequence)));
|
||||
|
||||
if ( r.getAlignmentStart() == 713655 ) {
|
||||
System.out.println("!!!----> "+r.format());
|
||||
System.out.println("!!!----> "+AlignmentUtils.toString(cig) +" --- " +AlignmentUtils.toString(r.getCigar()));
|
||||
}
|
||||
// System.out.println("writing " + id);
|
||||
samWriter.addAlignment(r);
|
||||
|
||||
int discovered_indels = 0;
|
||||
int discovered_support = 0;
|
||||
int existing_indels = 0;
|
||||
int existing_support = 0;
|
||||
int existing_support_new = 0;
|
||||
int discarded_indels = 0;
|
||||
for ( CountedObject<Indel> ind : new_indels ) {
|
||||
if ( ! all_indels.contains(ind) ) {
|
||||
discovered_indels++;
|
||||
discovered_support += ind.getCount();
|
||||
} else {
|
||||
existing_indels++;
|
||||
existing_support_new += ind.getCount();
|
||||
}
|
||||
}
|
||||
for ( CountedObject<Indel> ind : all_indels ) {
|
||||
if ( ! new_indels.contains(ind )) {
|
||||
discarded_indels++;
|
||||
} else {
|
||||
existing_support += ind.getCount();
|
||||
}
|
||||
}
|
||||
|
||||
System.out.print("TOTAL MISMATCHES: "+totalMismatches +" --> "+new_mismatches);
|
||||
if ( totalMismatches > new_mismatches ) System.out.print("(-");
|
||||
else System.out.print("(+");
|
||||
System.out.println(Math.abs((new_mismatches - totalMismatches)*100.0/totalMismatches)+"%)");
|
||||
|
||||
System.out.println("CONFIRMED INDELS: "+existing_indels);
|
||||
System.out.print("CONFIRMED INDEL SUPPORT: "+existing_support + " --> " + existing_support_new );
|
||||
if ( existing_support > existing_support_new ) System.out.print("(-");
|
||||
else System.out.print("(+");
|
||||
System.out.println(Math.abs((existing_support- existing_support_new)*100.0/existing_support)+"%)");
|
||||
System.out.println("DROPPED INDELS: " + discarded_indels);
|
||||
System.out.println("DISCOVERED INDELS: " + discovered_indels) ;
|
||||
System.out.println("DISCOVERED INDELS SUPPORT: "+discovered_support);
|
||||
System.out.println("ALIGNMENTS SHIFTED: "+shifted_reads);
|
||||
System.out.println("ALIGNMENTS WITH GAPS CHANGED: "+smashed_reads);
|
||||
}
|
||||
|
||||
public void close() { samWriter.close(); }
|
||||
|
||||
public double pct (int i, int t) {
|
||||
return ((double)i*100.0/((double)t));
|
||||
}
|
||||
|
||||
public void printStats() {
|
||||
System.out.println("\n---------------------------------------------------------------------------------");
|
||||
System.out.println("Piles processed: "+ processed_piles);
|
||||
System.out.printf("Piles improved: %d (%.2f%%)%n", improved_piles,pct(improved_piles,processed_piles));
|
||||
System.out.printf("Piles confirmed (unchanged): %d (%.2f%%)%n", unmodified_piles,pct(unmodified_piles,processed_piles));
|
||||
System.out.printf("Piles failed: %d (%.2f%%)%n", failed_piles,pct(failed_piles,processed_piles));
|
||||
System.out.println("In improved piles:");
|
||||
System.out.printf(" Total reads: %d (%.1f per pile) with %.2f mm/read originally%n", total_reads_in_improved,
|
||||
(double)total_reads_in_improved/(double)improved_piles,(double) total_mismatches_count_in_improved /(double)total_reads_in_improved);
|
||||
System.out.printf(" Overall mismatch count: %d --> %d (%.2f%%)%n", total_mismatches_count_in_improved,total_improved_mismatches_count,
|
||||
pct(total_improved_mismatches_count- total_mismatches_count_in_improved, total_mismatches_count_in_improved));
|
||||
System.out.printf(" Mismatch improvement: suppressed %.2f mm/read%n",
|
||||
(double)(total_mismatches_count_in_improved -total_improved_mismatches_count)/(double)total_reads_in_improved );
|
||||
System.out.printf(" Alignments modified: %d (%.2f%% of total or %.2f per pile)%n",total_alignments_modified,
|
||||
pct(total_alignments_modified,total_reads_in_improved),(double)total_alignments_modified/(double)improved_piles);
|
||||
System.out.printf(" Improved indels: %d (%.2f per pile) with %.3f additional reads per indel%n",
|
||||
indels_improved,(double)indels_improved/(double)improved_piles,(double)indel_improvement_cnt/(double)indels_improved);
|
||||
System.out.printf(" New indels: %d (%.2f per pile) with %.3f reads per indel%n",
|
||||
indels_added,(double)indels_added/(double)improved_piles,(double)indels_added_cnt/(double)indels_added);
|
||||
System.out.printf(" Discarded indels: %d (%.2f per pile)%n",
|
||||
indels_discarded,(double)indels_discarded/(double)improved_piles);
|
||||
System.out.println("In failed piles:");
|
||||
System.out.printf(" Total reads: %d (%.1f per pile) with %.2f mm/read originally%n", total_reads_in_failed,
|
||||
(double)total_reads_in_failed/(double)failed_piles,(double) total_mismatches_count_in_failed /(double)total_reads_in_failed);
|
||||
System.out.println("---------------------------------------------------------------------------------\n");
|
||||
|
||||
}
|
||||
|
||||
public void setVerbosity(int v) {
|
||||
mVerbosityLevel = v;
|
||||
}
|
||||
/** Assuming that a read of length l has a gapless, fully consumed align starting at s (ZERO-based) to some sequence X,
|
||||
* and that sequence's alignment to some reference Y is described by baseCigar, builds a cigar for the direct
|
||||
* alignment of the read to Y (i.e. if the alignment of X to Y contains indel(s) and the read spans them, the
|
||||
|
|
@ -224,7 +419,7 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
int i = 0;
|
||||
while ( refpos <= s ) {
|
||||
celem = baseCigar.getCigarElement(i);
|
||||
refpos+=celem.getLength();
|
||||
if ( celem.getOperator() != CigarOperator.D ) refpos+=celem.getLength();
|
||||
i++;
|
||||
}
|
||||
// we now sit on cigar element that contains start s, and refpos points to the end of that element; i points to next element
|
||||
|
|
@ -241,6 +436,24 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
return new Cigar(lce);
|
||||
}
|
||||
|
||||
private int indelCorrection(int offset, Cigar cig) {
|
||||
int correction = 0;
|
||||
for ( int i = 0 ; i < cig.numCigarElements() && offset > 0 ; i++ ) {
|
||||
CigarElement ce = cig.getCigarElement(i);
|
||||
switch ( ce.getOperator() ) {
|
||||
case M: offset -= ce.getLength() ; break;
|
||||
case I:
|
||||
if ( offset >= ce.getLength() ) correction-= ce.getLength();
|
||||
else correction -= offset;
|
||||
offset -= ce.getLength();
|
||||
break;
|
||||
case D: correction+=ce.getLength();
|
||||
break;
|
||||
}
|
||||
}
|
||||
return correction;
|
||||
}
|
||||
|
||||
public void initPairwiseAlignments( IndexedSequence [] seqs ) {
|
||||
distances = new SymmetricMatrix( seqs.length );
|
||||
alignments = new Matrix<PairwiseAlignment>( seqs.length );
|
||||
|
|
@ -517,7 +730,7 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
*/
|
||||
public double diameter(MultipleAlignment a) {
|
||||
double dmaxmin = 0.0;
|
||||
System.out.print("\n[");
|
||||
if ( mVerbosityLevel >= PILESUMMARY ) System.out.print("\nclosest neighbor for each seq: [");
|
||||
Iterator<Integer> ids1 = a.sequenceIdByOffsetIterator();
|
||||
while ( ids1.hasNext() ) {
|
||||
Integer id1 = ids1.next();
|
||||
|
|
@ -528,10 +741,10 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
d = Math.min(d,dpair);
|
||||
}
|
||||
// d = distance from id1 to its closest neighbor within the pile
|
||||
if ( d < 1e99 ) System.out.printf("%8.4g",d);
|
||||
if ( d < 1e99 && mVerbosityLevel >= PILESUMMARY ) System.out.printf("%8.4g",d);
|
||||
if ( d < 1e99 && d > dmaxmin ) dmaxmin = d;
|
||||
}
|
||||
System.out.println(" ]");
|
||||
if ( mVerbosityLevel >= PILESUMMARY ) System.out.println(" ]");
|
||||
// dmaxmin = the largest distance from a sequence in this pile to its closest neighbor
|
||||
// System.out.println();
|
||||
return dmaxmin;
|
||||
|
|
@ -545,7 +758,7 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
// IndexedSequence [] seqs = testSet3(K); // initialize test set data
|
||||
IndexedSequence [] seqs = testSet4(K); // initialize test set data
|
||||
|
||||
PileBuilder pb = new PileBuilder();
|
||||
PileBuilder pb = new PileBuilder("test1.bam",null,new DiscardingReceiver());
|
||||
|
||||
//pb.doMultipleAlignment(seqs);
|
||||
pb.doMultipleAlignment2(seqs);
|
||||
|
|
|
|||
|
|
@ -91,6 +91,8 @@ public class SWPairwiseAlignment {
|
|||
|
||||
PrimitivePair.Int p = new PrimitivePair.Int();
|
||||
int maxscore = 0;
|
||||
int segment_length = 0; // length of the segment (continuous matches, insertions or deletions)
|
||||
|
||||
// look for largest score. we use >= combined with the traversal direction
|
||||
// to ensure that if two scores are equal, the one closer to diagonal gets picked
|
||||
for ( int i = 1 ; i < n+1 ; i++ ) {
|
||||
|
|
@ -102,6 +104,7 @@ public class SWPairwiseAlignment {
|
|||
p.first = n;
|
||||
p.second = j ;
|
||||
maxscore = sw[n][j];
|
||||
segment_length = m - j; // end of sequence 2 is overhanging; we will just record it as 'M' segment
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -114,7 +117,6 @@ public class SWPairwiseAlignment {
|
|||
// to that sequence
|
||||
|
||||
int state = MSTATE;
|
||||
int segment_length = 0; // length of the segment (continuous matches, insertions or deletions)
|
||||
|
||||
int [] scores = new int[3];
|
||||
|
||||
|
|
@ -160,12 +162,12 @@ public class SWPairwiseAlignment {
|
|||
case DSTATE: o = CigarOperator.D; break;
|
||||
}
|
||||
|
||||
alignment_offset = p.first - p.second;
|
||||
segment_length+=p.second;
|
||||
CigarElement e = new CigarElement(segment_length,o);
|
||||
lce.add(e);
|
||||
Collections.reverse(lce);
|
||||
alignmentCigar = new Cigar(lce);
|
||||
alignment_offset = p.first - p.second;
|
||||
}
|
||||
|
||||
/** Allows for separate gap opening end extension penalties, no direct backtracking.
|
||||
|
|
@ -198,6 +200,8 @@ public class SWPairwiseAlignment {
|
|||
|
||||
PrimitivePair.Int p = new PrimitivePair.Int();
|
||||
double maxscore = 0.0;
|
||||
int segment_length = 0; // length of the segment (continuous matches, insertions or deletions)
|
||||
|
||||
// look for largest score. we use >= combined with the traversal direction
|
||||
// to ensure that if two scores are equal, the one closer to diagonal gets picked
|
||||
for ( int i = 1 ; i < n+1 ; i++ ) {
|
||||
|
|
@ -209,6 +213,7 @@ public class SWPairwiseAlignment {
|
|||
p.first = n;
|
||||
p.second = j ;
|
||||
maxscore = sw[n][j];
|
||||
segment_length = m - j; // end of sequence 2 is overhanging; we will just record it as 'M' segment
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -221,7 +226,6 @@ public class SWPairwiseAlignment {
|
|||
// to that sequence
|
||||
|
||||
int state = MSTATE;
|
||||
int segment_length = 0; // length of the segment (continuous matches, insertions or deletions)
|
||||
|
||||
double [] scores = new double[3];
|
||||
|
||||
|
|
@ -268,12 +272,12 @@ public class SWPairwiseAlignment {
|
|||
case ISTATE: o = CigarOperator.I; break;
|
||||
case DSTATE: o = CigarOperator.D; break;
|
||||
}
|
||||
alignment_offset = p.first - p.second;
|
||||
segment_length+=p.second;
|
||||
CigarElement e = new CigarElement(segment_length,o);
|
||||
lce.add(e);
|
||||
Collections.reverse(lce);
|
||||
alignmentCigar = new Cigar(lce);
|
||||
alignment_offset = p.first - p.second ;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -342,6 +346,8 @@ public void align3(String a, String b) {
|
|||
|
||||
PrimitivePair.Int p = new PrimitivePair.Int();
|
||||
double maxscore = 0.0;
|
||||
int segment_length = 0; // length of the segment (continuous matches, insertions or deletions)
|
||||
|
||||
// look for largest score. we use >= combined with the traversal direction
|
||||
// to ensure that if two scores are equal, the one closer to diagonal gets picked
|
||||
for ( int i = 1 ; i < n+1 ; i++ ) {
|
||||
|
|
@ -353,6 +359,7 @@ public void align3(String a, String b) {
|
|||
p.first = n;
|
||||
p.second = j ;
|
||||
maxscore = sw[n][j];
|
||||
segment_length = m - j ; // end of sequence 2 is overhanging; we will just record it as 'M' segment
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -365,7 +372,6 @@ public void align3(String a, String b) {
|
|||
// to that sequence
|
||||
|
||||
int state = MSTATE;
|
||||
int segment_length = 0; // length of the segment (continuous matches, insertions or deletions)
|
||||
|
||||
double [] scores = new double[3];
|
||||
|
||||
|
|
@ -419,12 +425,12 @@ public void align3(String a, String b) {
|
|||
case ISTATE: o = CigarOperator.I; break;
|
||||
case DSTATE: o = CigarOperator.D; break;
|
||||
}
|
||||
alignment_offset = p.first - p.second;
|
||||
segment_length+=p.second;
|
||||
CigarElement e = new CigarElement(segment_length,o);
|
||||
lce.add(e);
|
||||
Collections.reverse(lce);
|
||||
alignmentCigar = new Cigar(lce);
|
||||
alignment_offset = p.first - p.second;
|
||||
}
|
||||
|
||||
public void align4(String a, String b) {
|
||||
|
|
@ -485,6 +491,8 @@ public void align3(String a, String b) {
|
|||
|
||||
PrimitivePair.Int p = new PrimitivePair.Int();
|
||||
double maxscore = 0.0;
|
||||
int segment_length = 0; // length of the segment (continuous matches, insertions or deletions)
|
||||
|
||||
// look for largest score. we use >= combined with the traversal direction
|
||||
// to ensure that if two scores are equal, the one closer to diagonal gets picked
|
||||
for ( int i = 1 ; i < n+1 ; i++ ) {
|
||||
|
|
@ -496,6 +504,7 @@ public void align3(String a, String b) {
|
|||
p.first = n;
|
||||
p.second = j ;
|
||||
maxscore = sw[n][j];
|
||||
segment_length = m - j ; // end of sequence 2 is overhanging; we will just record it as 'M' segment
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -508,7 +517,6 @@ public void align3(String a, String b) {
|
|||
// to that sequence
|
||||
|
||||
int state = MSTATE;
|
||||
int segment_length = 0; // length of the segment (continuous matches, insertions or deletions)
|
||||
|
||||
double [] scores = new double[3];
|
||||
|
||||
|
|
@ -558,12 +566,12 @@ public void align3(String a, String b) {
|
|||
case ISTATE: o = CigarOperator.I; break;
|
||||
case DSTATE: o = CigarOperator.D; break;
|
||||
}
|
||||
alignment_offset = p.first - p.second;
|
||||
segment_length+=p.second;
|
||||
CigarElement e = new CigarElement(segment_length,o);
|
||||
lce.add(e);
|
||||
Collections.reverse(lce);
|
||||
alignmentCigar = new Cigar(lce);
|
||||
alignment_offset = p.first - p.second;
|
||||
}
|
||||
|
||||
private int w(char x, char y) {
|
||||
|
|
@ -688,17 +696,21 @@ public void align3(String a, String b) {
|
|||
}
|
||||
// now pos1 = alignment_offset
|
||||
}
|
||||
System.out.println(AlignmentUtils.toString(getCigar()));
|
||||
System.out.println("seq1l="+s1.length()+"; seq2l=" + s2.length());
|
||||
System.out.println("offset="+alignment_offset);
|
||||
|
||||
try {
|
||||
System.out.println("pos1="+pos1+"; pos2=" + pos2);
|
||||
/* debug prints: */
|
||||
// System.out.println(AlignmentUtils.toString(getCigar()));
|
||||
// System.out.println("seq1l="+s1.length()+"; seq2l=" + s2.length());
|
||||
// System.out.println("offset="+alignment_offset);
|
||||
// System.out.println("pos1="+pos1+"; pos2=" + pos2);
|
||||
/**/
|
||||
for ( int i = 0 ; i < getCigar().numCigarElements() ; i++ ) {
|
||||
CigarElement ce = getCigar().getCigarElement(i) ;
|
||||
switch( ce.getOperator() ) {
|
||||
case M:
|
||||
for ( int k = 0 ; k < ce.getLength() ; k++ ) {
|
||||
int z = ( i == 0 ? pos2 : 0); // if we are in the first element and seq overhangs to the left,
|
||||
// start inside the first segment, at the position where actual matches begin
|
||||
// check separately for pos1 < s1.length() since seq2 is allowed to overhang beyond seq1's end
|
||||
for ( ; z < ce.getLength() && pos1 < s1.length() ; z++ ) {
|
||||
// System.out.println("pos1="+pos1+"; pos2="+pos2+"; k="+z);
|
||||
if ( Character.toUpperCase(s1.charAt(pos1)) !=
|
||||
Character.toUpperCase(s2.charAt(pos2)) ) bmm.append('*');
|
||||
else bmm.append(' ');
|
||||
|
|
@ -722,10 +734,7 @@ public void align3(String a, String b) {
|
|||
break;
|
||||
}
|
||||
}
|
||||
} catch(Exception e) {}
|
||||
b1.append("<---");
|
||||
b2.append("<---");
|
||||
bmm.append("<---");
|
||||
|
||||
bmm.append('\n');
|
||||
b1.append(s1,pos1,s1.length());
|
||||
bmm.append(b1);
|
||||
|
|
@ -733,6 +742,7 @@ public void align3(String a, String b) {
|
|||
b2.append(s2,pos2,s2.length());
|
||||
bmm.append(b2);
|
||||
bmm.append('\n');
|
||||
|
||||
return bmm.toString();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -231,7 +231,7 @@ public class SequencePile {
|
|||
if ( refC == '+' ) {
|
||||
// count number of observations for insertion
|
||||
for ( int j = 0 ; j < col.size() ; j++ ) {
|
||||
if ( col.charAt(j) != '*' ) count++;
|
||||
if ( col.charAt(j) != '*' && col.charAt(j) != ' ') count++;
|
||||
}
|
||||
} else {
|
||||
if ( headerGrid.charAt(i) == '-' ) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue