git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@114 348d0f76-0448-11de-a6fe-93d51630548a

This commit is contained in:
asivache 2009-03-20 15:54:29 +00:00
parent 322f4b944f
commit 9ec96414c7
3 changed files with 80 additions and 47 deletions

View File

@ -36,6 +36,7 @@ public class IndelInspector extends CommandLineProgram {
protected int doWork() {
System.out.println("I am at version 0.1");
GenomeLoc location = null;
if ( GENOME_LOCATION != null ) {
location = GenomeLoc.parseGenomeLoc(GENOME_LOCATION);
@ -59,7 +60,7 @@ public class IndelInspector extends CommandLineProgram {
IndelRecordPileCollector col = null;
try {
col = new IndelRecordPileCollector();
col = new IndelRecordPileCollector(new DiscardingReceiver(), new PileBuilder() );
} catch(Exception e) { System.err.println(e.getMessage()); }
if ( col == null ) return 1;
@ -105,8 +106,8 @@ public class IndelInspector extends CommandLineProgram {
else if ( ERR_MODE.equals("ERR")) err = numErrors(r);
else if ( ERR_MODE.equals("MG")) err = numMismatchesGaps(r);
if ( err > MAX_ERRS.intValue() ) continue;
counter++;
if ( counter % 1000000 == 0 ) System.out.println(counter+" records; "+col.memStatsString());
// counter++;
// if ( counter % 1000000 == 0 ) System.out.println(counter+" records; "+col.memStatsString());
col.receive(r);
}

View File

@ -86,16 +86,16 @@ public class IndelRecordPileCollector implements RecordReceiver {
private List<Integer> mIndelLengthHistI;
private List<Integer> mIndelLengthHistD;
private RecordReceiver nonindelReceiver; // we will send there records that do not overlap with regions of interest
private RecordReceiver defaultReceiver; // we will send there records that do not overlap with regions of interest
private RecordPileReceiver indelPileReceiver; // piles over indel regions will be sent there
public String memStatsString() {
public String memStatsString() {
String s = "mRecordPile: ";
return s+mRecordPile.size() + " mAllIndels: "+mAllIndels.size() + " mLastContig=" +mLastContig + " mLastStartOnref="+mLastStartOnRef;
//+" Bndries="+mIndelRegionStart +":"+ mIndelRegionStop;
}
public IndelRecordPileCollector() throws java.io.IOException {
public IndelRecordPileCollector(RecordReceiver rr, RecordPileReceiver rp) throws java.io.IOException {
mRecordPile = new LinkedList<SAMRecord>();
mAllIndels = new TreeSet<CountedObject<Indel> >(
new CountedObjectComparatorAdapter<Indel>(new IntervalComparator()));
@ -108,8 +108,8 @@ public class IndelRecordPileCollector implements RecordReceiver {
mIndelLengthHistI.add(0);
mIndelLengthHistD.add(0);
}
nonindelReceiver = new DiscardingReceiver();
indelPileReceiver = new DiscardingPileReceiver();
defaultReceiver = rr;
indelPileReceiver = rp;
setWaitState();
}
@ -138,7 +138,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
while ( i.hasNext() ) {
SAMRecord r = i.next();
if ( r.getAlignmentEnd() <= pos ) {
nonindelReceiver.receive(r);
defaultReceiver.receive(r);
i.remove();
} else break;
}
@ -153,7 +153,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
while ( i.hasNext() ) {
SAMRecord r = i.next();
if ( r.getAlignmentStart() >= pos ) {
nonindelReceiver.receive(r);
defaultReceiver.receive(r);
i.remove();
} else break;
}
@ -243,8 +243,8 @@ public class IndelRecordPileCollector implements RecordReceiver {
" bases between indels");
}
// no indels or avoiding indels in bad region: send all records to nonindelReceiver and clear the pile
for ( SAMRecord r : mRecordPile ) nonindelReceiver.receive(r);
// no indels or avoiding indels in bad region: send all records to defaultReceiver and clear the pile
for ( SAMRecord r : mRecordPile ) defaultReceiver.receive(r);
setWaitState();
return;
}
@ -277,7 +277,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
while ( indel != null ) {
// first, if we just started new indel train, then emit into nonindelReceiver all alignments
// first, if we just started new indel train, then emit into defaultReceiver all alignments
// that end prior to the first indel in the train:
if ( finalTrain.size() == 0 ) purgeRecordsEndingAtOrBefore(indel.getObject().getStart() - 1);
@ -315,8 +315,14 @@ public class IndelRecordPileCollector implements RecordReceiver {
finalTrain.size() + " indels");
System.out.println(formatRange(finalTrain));
if ( shouldAcceptForOutput(finalTrain ) ) indelPileReceiver.receive(finalPile);
else for ( SAMRecord r : finalPile ) nonindelReceiver.receive(r);
if ( shouldAcceptForOutput(finalTrain ) ) {
System.out.print(mLastContig+":"+ finalTrain.get(0).getObject().getStart() + "-" +
finalTrain.get(finalTrain.size()-1).getObject().getStop() + " " +
finalTrain.size() + " indels; ");
System.out.print(finalPile.size() + " reads in the pile;") ;
System.out.println(formatRange(finalTrain));
indelPileReceiver.receive(finalPile);
} else for ( SAMRecord r : finalPile ) defaultReceiver.receive(r);
finalPile.clear();
finalTrain.clear();
curr_stop = -1;
@ -444,7 +450,8 @@ public class IndelRecordPileCollector implements RecordReceiver {
long min = 1000000000;
long max = 0;
all.append("passing indels:");
for ( CountedObject<Indel> o : indels ) {
if ( o.getCount() < 2 ) continue;
all.append(" ");
@ -452,9 +459,9 @@ public class IndelRecordPileCollector implements RecordReceiver {
if ( o.getObject().getIndelLength() < min ) min = o.getObject().getIndelLength();
if ( o.getObject().getIndelLength() > max ) max = o.getObject().getIndelLength();
}
b.append(" min: ");
b.append("; passing min length: ");
b.append(min);
b.append(" max: ");
b.append("; passing max length: ");
b.append(max);
b.append(all);
return b.toString();

View File

@ -1,11 +1,18 @@
package org.broadinstitute.sting.indels;
import net.sf.samtools.SAMRecord;
public class PileBuilder {
import java.util.Collection;
public class PileBuilder implements RecordPileReceiver {
private StrictlyUpperTriangularMatrix distances ;
private Matrix<PairwiseAlignment> alignments ;
private static class SelectedPair {
private static final int KmerSize = 8;
private MultipleAlignment alignments1;
private MultipleAlignment alignments2;
private static class SelectedPair {
private int i_;
private int j_;
private double d_;
@ -40,7 +47,23 @@ public class PileBuilder {
}
public PileBuilder() {}
public void receive(Collection<SAMRecord> c) {
IndexedSequence[] seqs = new IndexedSequence[c.size()];
int i = 0;
for ( SAMRecord r : c ) {
seqs[i++] = new IndexedSequence(r.getReadString(),KmerSize);
}
doMultipleAlignment(seqs);
System.out.print("Distance between final piles: "+distance(alignments1, alignments2));
System.out.println("; diameter of PILE1: "+ diameter(alignments1));
System.out.println("; diameter of PILE2: "+ diameter(alignments2));
System.out.println("PILE 1: \n"+alignments1.toString());
System.out.println("PILE 2: \n"+alignments2.toString());
}
public void initPairwiseAlignments( IndexedSequence [] seqs ) {
distances = new StrictlyUpperTriangularMatrix( seqs.length );
alignments = new Matrix<PairwiseAlignment>( seqs.length );
@ -57,7 +80,7 @@ public class PileBuilder {
* so that the alignments are pre-computed.
* @return id's of the two sequences and the distance between them in a compound object.
*/
public SelectedPair findBest() {
public SelectedPair findClosestPair() {
SelectedPair p = new SelectedPair(-1,-1,1e100);
@ -95,7 +118,7 @@ public class PileBuilder {
* @param pexclude neither of the two sequences in the returned pair can have its id listed in pexclude pair.
* @return Best pairwise alignment excluding alignments between pairs involving at least one sequence from pexclude
*/
public SelectedPair findBest(SelectedPair pexclude) {
public SelectedPair findNextClosestPairAfter(SelectedPair pexclude) {
SelectedPair p = new SelectedPair(-1,-1,1e100);
@ -117,7 +140,7 @@ public class PileBuilder {
* @return a compound SelectedPair object that contains the index of the closest sequence found (is guaranteed to
* be not in the pile), the index of the sequence in the pile it is closest to, and the actual distance between the two.
*/
public SelectedPair findClosest(MultipleAlignment a) {
public SelectedPair findClosestToPile(MultipleAlignment a) {
SelectedPair p = new SelectedPair(-1,-1,1e100);
@ -143,7 +166,7 @@ public class PileBuilder {
* @param a alignment pile to find the closest sequence for
* @return index of the farthest sequence
*/
public int findFarthest(MultipleAlignment a) {
public int findFarthestFromPile(MultipleAlignment a) {
double dmaxmin = 0;
int i_out = -1;
@ -209,17 +232,22 @@ public class PileBuilder {
IndexedSequence [] seqs = testSet3(K); // initialize test set data
PileBuilder pb = new PileBuilder();
pb.doMultipleAlignment(seqs);
}
public void doMultipleAlignment(IndexedSequence[] seqs) {
// two piles we are going to grow until all sequences are assigned to one of them.
// we intend to keep the piles disjoint, e.g. no sequence should be placed in both
MultipleAlignment pile1 = new MultipleAlignment();
MultipleAlignment pile2 = new MultipleAlignment();
pb.initPairwiseAlignments(seqs);
initPairwiseAlignments(seqs);
// all the pairwise alignments are computed and disjoint best and next-best pairs are found
System.out.println( pb.distances.format("%8.4g "));
//System.out.println( distances.format("%8.4g "));
/*
SelectedPair pworst = pb.findWorst();
@ -229,10 +257,10 @@ public class PileBuilder {
*/
// initialize piles with best and next-best pairs
SelectedPair p_best = pb.findBest();
SelectedPair p_nextbest = pb.findBest(p_best);
pile1.add( pb.alignments.get(p_best.i(), p_best.j()),p_best.i(),p_best.j());
pile2.add( pb.alignments.get(p_nextbest.i(), p_nextbest.j()),p_nextbest.i(),p_nextbest.j());
SelectedPair p_best = findClosestPair();
SelectedPair p_nextbest = findNextClosestPairAfter(p_best);
pile1.add( alignments.get(p_best.i(), p_best.j()),p_best.i(),p_best.j());
pile2.add( alignments.get(p_nextbest.i(), p_nextbest.j()),p_nextbest.i(),p_nextbest.j());
/*
System.out.println("Best pair ("+p_best.i() + "," + p_best.j()+", d="+p_best.d()+"):");
System.out.println(pile1.toString());
@ -245,42 +273,39 @@ public class PileBuilder {
// grow piles hierarchical clustering-style
while ( pile1.size() + pile2.size() < seqs.length ) {
// find candidate sequences closest to each of the two piles
p1 = pb.findClosest(pile1);
p2 = pb.findClosest(pile2);
p1 = findClosestToPile(pile1);
p2 = findClosestToPile(pile2);
int id1_cand = pile1.selectExternal(p1.i(), p1.j()); // id of the sequence closest to the pile 1
int id2_cand = pile2.selectExternal(p2.i(), p2.j()); // id of the sequence closest to the pile 2
if ( pile2.contains(id1_cand) && pile1.contains(id2_cand)) {
// pile1 and pile 2 are mutually the closest, so we need to merge them.
// if piles are mutually the closest, then p1 and p2 are the same pair (id1, id2),
// so we just merge on one of the (redundant) instances:
pile1.add(pile2, pb.alignments.get( p1.i(), p1.j()),p1.i(), p1.j());
pile1.add(pile2, alignments.get( p1.i(), p1.j()),p1.i(), p1.j());
pile2.clear(); // need to reset pile 2 to something else
int z = pb.findFarthest(pile1); // get sequence farthest from merged pile 1
int z = findFarthestFromPile(pile1); // get sequence farthest from merged pile 1
pile2.add(seqs[z].getSequence(), z); // and reinitialize pile 2 with that sequence
} else {
if ( p1.d() < p2.d() ) {
if ( pile2.contains(id1_cand) ) {
pile1.add(pile2, pb.alignments.get( p1.i(), p1.j()),p1.i(), p1.j());
pile1.add(pile2, alignments.get( p1.i(), p1.j()),p1.i(), p1.j());
pile2.clear(); // need to reset pile 2 to something else
int z = pb.findFarthest(pile1); // get sequence farthest from merged pile 1
int z = findFarthestFromPile(pile1); // get sequence farthest from merged pile 1
pile2.add(seqs[z].getSequence(), z); // and reinitialize pile 2 with that sequence
} else pile1.add( pb.alignments.get(p1.i(), p1.j()) );
} else pile1.add( alignments.get(p1.i(), p1.j()) );
} else {
if ( pile1.contains(id2_cand) ) {
pile2.add(pile1, pb.alignments.get( p2.i(), p2.j()),p2.i(), p2.j());
pile2.add(pile1, alignments.get( p2.i(), p2.j()),p2.i(), p2.j());
pile1.clear(); // need to reset pile 2 to something else
int z = pb.findFarthest(pile2); // get sequence farthest from merged pile 1
int z = findFarthestFromPile(pile2); // get sequence farthest from merged pile 1
pile1.add(seqs[z].getSequence(), z); // and reinitialize pile 2 with that sequence
} else pile2.add( pb.alignments.get(p2.i(), p2.j()) );
} else pile2.add( alignments.get(p2.i(), p2.j()) );
}
}
System.out.println("PILE 1: \n"+pile1.toString());
System.out.println("PILE 2: \n"+pile2.toString());
} // end WHILE
System.out.println("Distance between final piles: "+pb.distance(pile1, pile2));
System.out.println("Diameter of PILE1: "+ pb.diameter(pile1));
System.out.println("Diameter of PILE2: "+ pb.diameter(pile2));
alignments1 = pile1;
alignments2 = pile2;
/*
* System.out.println("Closest distance to the pile: " + best_d
+ "(adding: " + best_i + "," + best_j + "):");