minor changes and shuffling code around; also, now when realigned piles are printed they are sorted by start position
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@125 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0ea44a5805
commit
497eea2e5c
|
|
@ -23,7 +23,7 @@ public class IndelInspector extends CommandLineProgram {
|
|||
|
||||
// Usage and parameters
|
||||
@Usage(programVersion="1.0") public String USAGE = "Investigates indels called in the alignment data\n";
|
||||
@Option(shortName="I", doc="SAM or BAM file for calling") public File INPUT_FILE;
|
||||
@Option(shortName="I", doc="SAM or BAM file for calling",optional=true) public File INPUT_FILE;
|
||||
@Option(shortName="L",doc="Genomic interval to run on, as contig[:start[-stop]]; whole genome if not specified", optional=true) public String GENOME_LOCATION;
|
||||
@Option(doc="Error counting mode: MM - count mismatches only, ERR - count errors (arachne style), MG - count mismatches and gaps as one error each") public String ERR_MODE;
|
||||
@Option(doc="Maximum number of errors allowed (see ERR_MODE)") public Integer MAX_ERRS;
|
||||
|
|
@ -36,7 +36,7 @@ public class IndelInspector extends CommandLineProgram {
|
|||
|
||||
protected int doWork() {
|
||||
|
||||
System.out.println("I am at version 0.1");
|
||||
System.out.println("I am at version 0.2");
|
||||
GenomeLoc location = null;
|
||||
if ( GENOME_LOCATION != null ) {
|
||||
location = GenomeLoc.parseGenomeLoc(GENOME_LOCATION);
|
||||
|
|
@ -47,7 +47,8 @@ public class IndelInspector extends CommandLineProgram {
|
|||
return 1;
|
||||
}
|
||||
|
||||
final SAMFileReader samReader = new SAMFileReader(getInputFile(INPUT_FILE,"/broad/1kG/"));
|
||||
final SAMFileReader samReader = new SAMFileReader(getInputFile(INPUT_FILE,"/broad/1KG/"));
|
||||
samReader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
||||
|
||||
setContigOrdering(samReader);
|
||||
|
||||
|
|
@ -69,6 +70,7 @@ public class IndelInspector extends CommandLineProgram {
|
|||
|
||||
for ( SAMRecord r : samReader ) {
|
||||
|
||||
if ( r.getReadUnmappedFlag() ) continue;
|
||||
if ( r.getReferenceName() != cur_contig) {
|
||||
cur_contig = r.getReferenceName();
|
||||
System.out.println("Contig "+cur_contig);
|
||||
|
|
@ -173,6 +175,7 @@ public class IndelInspector extends CommandLineProgram {
|
|||
}
|
||||
|
||||
private static int numMismatchesDirect(SAMRecord r, ReferenceSequence ref) {
|
||||
if ( r.getReadUnmappedFlag() ) return 1000000;
|
||||
int i_ref = r.getAlignmentStart()-1; // position on the ref
|
||||
int i_read = 0; // position on the read
|
||||
int mm_count = 0; // number of mismatches
|
||||
|
|
|
|||
|
|
@ -310,11 +310,6 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
|||
// this means that the current train and pile of reads overlapping with it are fully built;
|
||||
// emit into indel receiver if the train is interesting enough, or into the nonindel receiver:
|
||||
|
||||
System.out.print(mLastContig+":"+ finalTrain.get(0).getObject().getStart() + "-" +
|
||||
finalTrain.get(finalTrain.size()-1).getObject().getStop() + " " +
|
||||
finalTrain.size() + " indels");
|
||||
System.out.println(formatRange(finalTrain));
|
||||
|
||||
if ( shouldAcceptForOutput(finalTrain ) ) {
|
||||
System.out.print(mLastContig+":"+ finalTrain.get(0).getObject().getStart() + "-" +
|
||||
finalTrain.get(finalTrain.size()-1).getObject().getStop() + " " +
|
||||
|
|
@ -451,7 +446,7 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
|||
long min = 1000000000;
|
||||
long max = 0;
|
||||
|
||||
all.append("passing indels:");
|
||||
all.append("; passing indels:");
|
||||
for ( CountedObject<Indel> o : indels ) {
|
||||
if ( o.getCount() < 2 ) continue;
|
||||
all.append(" ");
|
||||
|
|
@ -459,7 +454,9 @@ public class IndelRecordPileCollector implements RecordReceiver {
|
|||
if ( o.getObject().getIndelLength() < min ) min = o.getObject().getIndelLength();
|
||||
if ( o.getObject().getIndelLength() > max ) max = o.getObject().getIndelLength();
|
||||
}
|
||||
b.append("; passing min length: ");
|
||||
if ( max == 0 ) return new String(); // no passinf indels, return empty string
|
||||
|
||||
b.append(" passing min length: ");
|
||||
b.append(min);
|
||||
b.append("; passing max length: ");
|
||||
b.append(max);
|
||||
|
|
|
|||
|
|
@ -1,18 +1,14 @@
|
|||
package org.broadinstitute.sting.indels;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
public class MultipleAlignment implements Iterable<Integer> {
|
||||
private static final int IMPOSSIBLE = 1000000000;
|
||||
private Map<Integer,Integer> index;
|
||||
private List<String> seqs;
|
||||
private List<Integer> ext_ids;
|
||||
private List<Integer> alignment_offsets; // offset of seqs[i] w/respect to seqs[0]
|
||||
private Map<Integer,Integer> index; // maps external id of the sequence onto its index in the pile
|
||||
private List<String> seqs; // sequences, in order they were added
|
||||
private List<Integer> ext_ids; // external ids of the sequences, in order they were added to the pile
|
||||
private List<Integer> alignment_offsets; // offset of seqs[i] w/respect to seqs[0] (i.e. in order the seqs were added)
|
||||
private int best_mm; // mismatch count
|
||||
private int next_mm; // next-best mismatch count
|
||||
|
||||
|
|
@ -29,7 +25,27 @@ public class MultipleAlignment implements Iterable<Integer> {
|
|||
alignment_offsets.clear();
|
||||
ext_ids.clear();
|
||||
}
|
||||
|
||||
|
||||
public static <T extends Comparable> Integer[] SortPermutation( List<T> A )
|
||||
{
|
||||
final Object[] data = A.toArray();
|
||||
|
||||
class comparator implements Comparator<Integer>
|
||||
{
|
||||
public int compare(Integer a, Integer b)
|
||||
{
|
||||
return ((T)data[a]).compareTo(data[b]);
|
||||
}
|
||||
}
|
||||
Integer[] permutation = new Integer[A.size()];
|
||||
for (int i = 0; i < A.size(); i++)
|
||||
{
|
||||
permutation[i] = i;
|
||||
}
|
||||
Arrays.sort(permutation, new comparator());
|
||||
return permutation;
|
||||
}
|
||||
|
||||
/** Adds single sequence with id set to i. Pile must be empty, or IllegalStateException will be thrown
|
||||
*
|
||||
* @param seq sequence to add
|
||||
|
|
@ -182,11 +198,22 @@ public class MultipleAlignment implements Iterable<Integer> {
|
|||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public String skipN(int n) {
|
||||
StringBuilder b=new StringBuilder();
|
||||
for ( int k = 0 ; k < n ; k++ ) b.append(' ');
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
public void skipN(int n, StringBuilder b) {
|
||||
for ( int k = 0 ; k < n ; k++ ) b.append(' ');
|
||||
}
|
||||
|
||||
/** Returns a (multiline) string that represents the alignment visually: the sequences are appropriately
|
||||
* shifted and ready for printout;
|
||||
*/
|
||||
public String toString() {
|
||||
public String toString(boolean inorder) {
|
||||
|
||||
StringBuilder b = new StringBuilder();
|
||||
java.util.Formatter frmt = new java.util.Formatter(b);
|
||||
|
||||
|
|
@ -196,23 +223,23 @@ public class MultipleAlignment implements Iterable<Integer> {
|
|||
for ( int i = 0 ; i < seqs.size() ; i++ ) {
|
||||
if ( -alignment_offsets.get(i) > skip_first ) skip_first = -alignment_offsets.get(i);
|
||||
}
|
||||
|
||||
Integer[] perm = null;
|
||||
if ( inorder ) perm = SortPermutation(alignment_offsets);
|
||||
|
||||
frmt.format("%3d:", ext_ids.get(0));
|
||||
for ( int k = 0 ; k < skip_first ; k++ ) b.append(' ');
|
||||
b.append(seqs.get(0));
|
||||
b.append('\n');
|
||||
|
||||
for ( int i = 1 ; i < seqs.size() ; i++ ) {
|
||||
frmt.format("%3d:", ext_ids.get(i));
|
||||
int skip = alignment_offsets.get(i) + skip_first ;
|
||||
for ( int k = 0 ; k < skip ; k++ ) b.append(' ');
|
||||
b.append(seqs.get(i));
|
||||
for ( int i = 0 ; i < seqs.size() ; i++ ) {
|
||||
int index = (inorder ? perm[i] : i);
|
||||
frmt.format("%3d:", ext_ids.get(index));
|
||||
skipN(alignment_offsets.get(index)+skip_first,b);
|
||||
b.append(seqs.get(index));
|
||||
b.append('\n');
|
||||
}
|
||||
// b.append(best_mm+" mismatches, "+ next_mm + " next best, " + getOverlap() + " overlapping bases, distance=" + distance() + "\n");
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
|
||||
public String toString() { return toString(true); }
|
||||
|
||||
public int size() { return seqs.size(); }
|
||||
|
||||
/** Returns an iterator over the id's of the sequences currently stored in the pile
|
||||
|
|
@ -221,8 +248,27 @@ public class MultipleAlignment implements Iterable<Integer> {
|
|||
*/
|
||||
public Iterator<Integer> sequenceIdIterator() { return index.keySet().iterator(); }
|
||||
|
||||
public Iterator<Integer> sequenceIdByOffsetIterator() {
|
||||
final Integer[] perm = SortPermutation(alignment_offsets);
|
||||
return new Iterator<Integer>() {
|
||||
private int i = 0;
|
||||
public boolean hasNext() {
|
||||
return i < perm.length;
|
||||
}
|
||||
public Integer next() {
|
||||
return ext_ids.get(perm[i++]);
|
||||
}
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("remove not supported");
|
||||
}
|
||||
} ;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<Integer> iterator() {
|
||||
return sequenceIdIterator();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.indels;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
|
||||
|
||||
public class PileBuilder implements RecordPileReceiver {
|
||||
|
|
@ -56,7 +57,7 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
}
|
||||
doMultipleAlignment(seqs);
|
||||
System.out.print("Distance between final piles: "+distance(alignments1, alignments2));
|
||||
System.out.println("; diameter of PILE1: "+ diameter(alignments1));
|
||||
System.out.print("; diameter of PILE1: "+ diameter(alignments1));
|
||||
System.out.println("; diameter of PILE2: "+ diameter(alignments2));
|
||||
|
||||
System.out.println("PILE 1: \n"+alignments1.toString());
|
||||
|
|
@ -209,18 +210,23 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
*/
|
||||
public double diameter(MultipleAlignment a) {
|
||||
double dmaxmin = 0.0;
|
||||
for ( Integer id1 : a ) {
|
||||
double d = 1e100;
|
||||
for ( Integer id2 : a ) {
|
||||
if ( id2 <= id1 ) continue;
|
||||
if ( distances.get(id1,id2) < d ) d = distances.get(id1,id2);
|
||||
System.out.print("\n[");
|
||||
Iterator<Integer> ids1 = a.sequenceIdByOffsetIterator();
|
||||
while ( ids1.hasNext() ) {
|
||||
Integer id1 = ids1.next();
|
||||
double d = 1e100; // will hold distance from id1 to its closest neighbor
|
||||
for ( Integer id2 : a ) {
|
||||
if ( id2 == id1 ) continue;
|
||||
double dpair = ( id1 < id2 ? distances.get(id1,id2) : distances.get(id2,id1) );
|
||||
d = Math.min(d,dpair);
|
||||
}
|
||||
// d = distance from id1 to its closest neighbor within the pile
|
||||
if ( d < 1e99 ) System.out.printf("%8.4g",d);
|
||||
if ( d < 1e99 && d > dmaxmin ) dmaxmin = d;
|
||||
}
|
||||
System.out.println(" ]");
|
||||
// dmaxmin = the largest distance from a sequence in this pile to its closest neighbor
|
||||
System.out.println();
|
||||
// System.out.println();
|
||||
return dmaxmin;
|
||||
}
|
||||
|
||||
|
|
@ -229,11 +235,18 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
int K=8;
|
||||
// IndexedSequence [] seqs = testSet1(K); // initialize test set data
|
||||
// IndexedSequence [] seqs = testSet2(K); // initialize test set data
|
||||
IndexedSequence [] seqs = testSet3(K); // initialize test set data
|
||||
|
||||
// IndexedSequence [] seqs = testSet3(K); // initialize test set data
|
||||
IndexedSequence [] seqs = testSet4(K); // initialize test set data
|
||||
|
||||
PileBuilder pb = new PileBuilder();
|
||||
|
||||
pb.doMultipleAlignment(seqs);
|
||||
System.out.print("Distance between final piles: "+pb.distance(pb.alignments1, pb.alignments2));
|
||||
System.out.print("; diameter of PILE1: "+ pb.diameter(pb.alignments1));
|
||||
System.out.println("; diameter of PILE2: "+ pb.diameter(pb.alignments2));
|
||||
|
||||
System.out.println("PILE 1: \n"+pb.alignments1.toString());
|
||||
System.out.println("PILE 2: \n"+pb.alignments2.toString());
|
||||
}
|
||||
|
||||
public void doMultipleAlignment(IndexedSequence[] seqs) {
|
||||
|
|
@ -359,4 +372,28 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
seqs[10] = new IndexedSequence("GTCTGGTGAGGGTAGGGTGCACTCTCTGCTTCATAAATGGGTCTCTTGCCGCAAAAAAATCTGTTTGCTCCTCCAG",K);
|
||||
return seqs;
|
||||
}
|
||||
|
||||
public static IndexedSequence[] testSet4(int K) {
|
||||
IndexedSequence [] seqs = new IndexedSequence[19];
|
||||
seqs[0] = new IndexedSequence("CGTGTGTGTGTGTGCAGTGCGTGGTGCTGTGAGATCAGCGTGTGTGTGTGTGCAGTGCATGGTGCTTTGTGAGATC",K);
|
||||
seqs[1] = new IndexedSequence("ATGTGTGTGTGTGCAGTGCATGGTGCTGTGAGATCAGCGTGTGTGTGTGTGCAGTGCAT",K);
|
||||
seqs[2] = new IndexedSequence("GTGTGTGTGTGCAGTGCATGGTGCTGTGAGATCAGCGTGTGTGTGTGTGCAGTGCATGGTGCTGTGTGAGATCAGC",K);
|
||||
seqs[3] = new IndexedSequence("TGTGTGTGTGCAGTGCATGGTGCTGTGAGATCAGCGTGTGTGTGTGTGCAGTGCATGGTGCTGTGTGAGATCAGCA",K);
|
||||
seqs[4] = new IndexedSequence("GTGTGTGTGCAGTGCATGGTGCTGTGAGATCAGCGTGTGTGTGTGTGCAGTGCATGGTGCTGTGTGAGATCAGCAT",K);
|
||||
seqs[5] = new IndexedSequence("GTGTGTGTGCCGTGCTTTGTGCTGTGAGATCTGCGTGTGTGTGTGTGCAGTGCATGGTGCTGTGTGAGATCTGCAT",K);
|
||||
seqs[6] = new IndexedSequence("GTGTGTGCAGTGCATGGTGCTGTGAGATCAGCGTGTGTGTGTGTGCAGTGCATGGTGCTGTGTGAGATCAGCATGT",K);
|
||||
seqs[7] = new IndexedSequence("GTGCAGTGCATGGTGCTGTGAGATCAGCGTGTGTGTGTGTGCAGTGCATGGTGCTGTGTGAGATCAGCATGTGTGT",K);
|
||||
seqs[8] = new IndexedSequence("TGCAGTGCATGGTGCTGTGAGATCAGCGTGTGTGTGTGTGCAGTGCATGGTGCTGTGTGAGATCAGCATGTGTGTG",K);
|
||||
seqs[9] = new IndexedSequence("AGTGCATGGTGCTGTGAGATCAGCGTGTGTGTGTGTGCAGTGCATGGTGCTGTGTGAGATCAGCATGTGTGTGTGT",K);
|
||||
seqs[10] = new IndexedSequence("TGGGCATGGTGCTGTGAGATCAGCGTGTGTGTGTGCAGCGCATGGTGCTGTGTGAGATCAGCGTGTGTGTGTGCAG",K);
|
||||
seqs[11] = new IndexedSequence("GCTGTGAGATCAGCGTGTGTGTGTGAGCAGTGCATGGGGATGTGTGAGATCAGCATGTGTGTGTGTGTGCAGCGCG",K);
|
||||
seqs[12] = new IndexedSequence("GCTGTGAGATCAGCGTGTGTGTGTGTGCAGTGCATGGTGCTGTGTGAGATCAGCATGTGTGTGTGTGTGCAGTGCA",K);
|
||||
seqs[13] = new IndexedSequence("AGATCAGCATGTGTGTGTGTGCAGTGCATGGTGCTGTGTGAGATCAGCATGTGTGTGTGTGTGCAGTGCATGGTGC",K);
|
||||
seqs[14] = new IndexedSequence("AGATCAGCGTGTGTGTGTGCAGCGCATGGCGCTGTGTGAGATCAGCATGTGTGTGTGTGTGCGGCGCATGGGGGTG",K);
|
||||
seqs[15] = new IndexedSequence("GATCAGCGTGTGTGTGTGTGCAGTGCATGGTGCTGTGTGAGATCAGAATGTGTGTGTGTGTGCAGTGCATGGTGCT",K);
|
||||
seqs[16] = new IndexedSequence("ATCAGCATGGGTGTGTGTGCAGTGCATGGTGCTGTGTGAGATCAGCATGTGTGGGTGTGTGGGGTGGGTGGTGGTG",K);
|
||||
seqs[17] = new IndexedSequence("ATCAGCATGTGTGTGTGTGCAGTGCATGGTGCTGTGTGAGATCAGCATGTGTGTGTGTGTGCAGTGCATGGGGCTG",K);
|
||||
seqs[18] = new IndexedSequence("GTGTGTGTGTGTGCAGTGCATGGTGCTGTGTGAGATCAGCATGTGTGTGTGTGTGCAGTGCATGGTGCTGAGTGTG",K);
|
||||
return seqs;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue