* refactored GenomeLoc to use contigIndex internally for performance and fixed several calling classes

* added basic unit test for GenomeLoc
* fixed bug when parsing genome locations like chr1:5000 the start position was being left as maxint rather than being set to the same as the stop position.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@365 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kcibul 2009-04-12 02:25:17 +00:00
parent 49fd951d8c
commit ce72932a45
4 changed files with 128 additions and 36 deletions

View File

@ -113,7 +113,7 @@ public class LocusIteratorByHanger extends LocusIterator {
}
protected void hangRead(final SAMRecord read) {
GenomeLoc readLoc = new GenomeLoc(read.getReferenceName(), read.getAlignmentStart());
GenomeLoc readLoc = new GenomeLoc(read.getReferenceIndex(), read.getAlignmentStart());
//System.out.printf("Adding read %s at %d%n", read.getReadName(), read.getAlignmentStart());
/*
@ -128,7 +128,7 @@ public class LocusIteratorByHanger extends LocusIterator {
if ( DEBUG )
logger.debug(String.format("Processing block %s len=%d%n", block, block.getLength()));
for ( int i = 0; i < block.getLength(); i++ ) {
GenomeLoc offset = new GenomeLoc(readLoc.getContig(), block.getReferenceStart() + i);
GenomeLoc offset = new GenomeLoc(readLoc.getContigIndex(), block.getReferenceStart() + i);
readHanger.expandingPut(offset, read);
offsetHanger.expandingPut(offset, block.getReadStart() + i - 1);
if ( DEBUG )

View File

@ -43,8 +43,10 @@ public class SomaticCoverageWalker extends LocusWalker<Integer, Integer> {
int tumorCovered;
int normalCovered;
int somaticCovered;
long start = 0;
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
if (start ==0) { start = System.currentTimeMillis(); }
List<SAMRecord> reads = context.getReads();
int tumorDepth = 0;
@ -58,10 +60,10 @@ public class SomaticCoverageWalker extends LocusWalker<Integer, Integer> {
// that come from fully mapped pairs with a mappign quality threshold >= x
if (read.getNotPrimaryAlignmentFlag() ||
read.getDuplicateReadFlag() ||
read.getReadUnmappedFlag()
||
read.getMateUnmappedFlag() ||
read.getReadUnmappedFlag() ||
read.getMappingQuality() < MAPPING_QUALITY_THRESHOLD
// ||
// read.getMateUnmappedFlag() ||
) {
continue;
}
@ -90,10 +92,15 @@ public class SomaticCoverageWalker extends LocusWalker<Integer, Integer> {
if (isTumorCovered && isNormalCovered) {somaticCovered++; }
totalSites++;
if (totalSites % 20000 == 0) {
out.println(String.format("%s:%d %d %d %d %d", context.getContig(), context.getPosition(), totalSites, tumorCovered, normalCovered, somaticCovered));
}
// if (totalSites % 100000 == 0) {
// long now = System.currentTimeMillis();
// out.println(String.format("%s:%d %d %d %d %d %dms", context.getContig(), context.getPosition(), totalSites, tumorCovered, normalCovered, somaticCovered, (now-start)));
// }
StringBuilder sb = new StringBuilder();
sb.append(context.getContig()).append(" ");
sb.append(context.getPosition());
out.println(sb.toString());
return 1;
}
@ -107,7 +114,7 @@ public class SomaticCoverageWalker extends LocusWalker<Integer, Integer> {
@Override
public void onTraversalDone(Integer result) {
out.println(String.format("FINAL - %d %d %d %d", totalSites, tumorCovered, normalCovered, somaticCovered));
// out.println(String.format("FINAL - %d %d %d %d", totalSites, tumorCovered, normalCovered, somaticCovered));
}

View File

@ -30,7 +30,7 @@ import org.apache.log4j.Logger;
public class GenomeLoc implements Comparable<GenomeLoc> {
private static Logger logger = Logger.getLogger(GenomeLoc.class);
private String contig;
private Integer contigIndex;
private long start;
private long stop;
@ -41,7 +41,6 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
// --------------------------------------------------------------------------------------------------------------
//public static Map<String, Integer> refContigOrdering = null;
private static SAMSequenceDictionary contigInfo = null;
private static HashMap<String, String> interns = null;
public static boolean hasKnownContigOrdering() {
return contigInfo != null;
@ -52,6 +51,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
}
public static int getContigIndex( final String contig ) {
// if the conig isn't found, a null is returned which will NPE here (which is what we want right now)
return contigInfo.getSequenceIndex(contig);
}
@ -113,28 +113,32 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
// constructors
//
// --------------------------------------------------------------------------------------------------------------
public GenomeLoc( String contig, final long start, final long stop ) {
if ( interns != null )
contig = interns.get(contig);
public GenomeLoc( int contigIndex, final long start, final long stop ) {
this.contig = contig;
this.contigIndex = contigIndex;
this.start = start;
this.stop = stop;
}
public GenomeLoc( String contig, final long start, final long stop ) {
this(contigInfo.getSequenceIndex(contig), start, stop);
}
public GenomeLoc( final String contig, final long pos ) {
this( contig, pos, pos );
this(contig, pos, pos );
}
public GenomeLoc( final int contig, final long pos ) {
this(contig, pos, pos );
}
public GenomeLoc( final GenomeLoc toCopy ) {
this( new String(toCopy.getContig()), toCopy.getStart(), toCopy.getStop() );
this( toCopy.contigIndex, toCopy.getStart(), toCopy.getStop() );
}
// TODO: why isn't this just a constructor?
public static GenomeLoc genomicLocationOf(final SAMRecord read) {
String contig = read.getReferenceName();
if (read.getReadUnmappedFlag())
contig = null;
return new GenomeLoc(contig, read.getAlignmentStart(), read.getAlignmentEnd());
return new GenomeLoc(read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd());
}
// --------------------------------------------------------------------------------------------------------------
@ -171,6 +175,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
else if ( match2.matches() ) {
contig = match2.group(1);
start = parsePosition(match2.group(2));
stop = start;
}
else if ( match3.matches() ) {
contig = match3.group(1);
@ -304,7 +309,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
return returnTrueIfEmpty;
// skip loci before intervals begin
if ( hasKnownContigOrdering() && getContigIndex(curr.contig) < getContigIndex(locs.get(0).contig) )
if ( hasKnownContigOrdering() && curr.contigIndex < locs.get(0).contigIndex )
return false;
for ( GenomeLoc loc : locs ) {
@ -320,7 +325,8 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
//
// Accessors and setters
//
public final String getContig() { return this.contig; }
public final String getContig() { return contigInfo.getSequence(this.contigIndex).getSequenceName(); }
public final int getContigIndex() { return this.contigIndex; }
public final long getStart() { return this.start; }
public final long getStop() { return this.stop; }
public final String toString() {
@ -333,12 +339,12 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
}
public final boolean isUnmapped() { return this.contig == null; }
public final boolean isUnmapped() { return this.contigIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX; }
public final boolean throughEndOfContigP() { return this.stop == Integer.MAX_VALUE; }
public final boolean atBeginningOfContigP() { return this.start == 1; }
public void setContig(String contig) {
this.contig = contig;
this.contigIndex = contigInfo.getSequenceIndex(contig);
}
public void setStart(long start) {
@ -351,16 +357,16 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
public final boolean isSingleBP() { return stop == start; }
public final boolean disjointP(GenomeLoc that) {
if ( compareContigs(this.contig, that.contig) != 0 ) return true; // different chromosomes
if ( this.start > that.stop ) return true; // this guy is past that
if ( that.start > this.stop ) return true; // that guy is past our start
if ( this.contigIndex != that.contigIndex ) return true; // different chromosomes
if ( this.start > that.stop ) return true; // this guy is past that
if ( that.start > this.stop ) return true; // that guy is past our start
return false;
}
public final boolean discontinuousP(GenomeLoc that) {
if ( compareContigs(this.contig, that.contig) != 0 ) return true; // different chromosomes
if ( (this.start - 1) > that.stop ) return true; // this guy is past that
if ( (that.start - 1) > this.stop ) return true; // that guy is past our start
if ( this.contigIndex != that.contigIndex ) return true; // different chromosomes
if ( (this.start - 1) > that.stop ) return true; // this guy is past that
if ( (that.start - 1) > this.stop ) return true; // that guy is past our start
return false;
}
@ -386,11 +392,11 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
}
public final boolean onSameContig(GenomeLoc that) {
return this.contig.equals(that.contig);
return (this.contigIndex == that.contigIndex);
}
public final int minus( final GenomeLoc that ) {
if ( this.getContig().equals(that.getContig()) )
if ( this.contigIndex == that.contigIndex )
return (int) (this.getStart() - that.getStart());
else
return Integer.MAX_VALUE;
@ -425,6 +431,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
//
// Comparison operations
//
// TODO: get rid of this method because it's sloooooooooooooow
public static int compareContigs( final String thisContig, final String thatContig )
{
if ( thisContig == thatContig )
@ -471,15 +478,16 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
}
}
public int compareContigs( GenomeLoc that ) {
return compareContigs( this.contig, that.contig );
public final int compareContigs( GenomeLoc that ) {
return (this.contigIndex == that.contigIndex)?0:((this.contigIndex < that.contigIndex)?-1:1);
}
public int compareTo( GenomeLoc that ) {
if ( this == that ) return 0;
final int cmpContig = compareContigs( this.getContig(), that.getContig() );
final int cmpContig = compareContigs(that);
if ( cmpContig != 0 ) return cmpContig;
if ( this.getStart() < that.getStart() ) return -1;
if ( this.getStart() > that.getStart() ) return 1;

View File

@ -0,0 +1,77 @@
// our package
package org.broadinstitute.sting.utils;
// the imports for unit testing.
import org.junit.*;
import static org.junit.Assert.*;
import org.apache.commons.cli.ParseException;
import org.broadinstitute.sting.utils.cmdLine.ArgumentParser;
import java.util.ArrayList;
import java.io.File;
/**
* Basic unit test for GenomeLoc
*/
public class GenomeLocTest {
private static FastaSequenceFile2 seq;
@BeforeClass
public static void init() {
// sequence
seq = new FastaSequenceFile2(new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
}
/**
* Tests that we got a string parameter in correctly
*/
@Test
public void testIsBetween() {
GenomeLoc.setupRefContigOrdering(seq);
GenomeLoc locMiddle = new GenomeLoc("chr1", 3, 3);
GenomeLoc locLeft = new GenomeLoc("chr1", 1, 1);
GenomeLoc locRight = new GenomeLoc("chr1", 5, 5);
Assert.assertTrue(locMiddle.isBetween(locLeft, locRight));
Assert.assertFalse(locLeft.isBetween(locMiddle, locRight));
Assert.assertFalse(locRight.isBetween(locLeft, locMiddle));
}
@Test
public void testContigIndex() {
GenomeLoc locOne = new GenomeLoc("chr1",1,1);
Assert.assertEquals(locOne.getContigIndex(), 1);
Assert.assertEquals(locOne.getContig(), "chr1");
GenomeLoc locX = new GenomeLoc("chrX",1,1);
Assert.assertEquals(locX.getContigIndex(), 23);
Assert.assertEquals(locX.getContig(), "chrX");
GenomeLoc locNumber = new GenomeLoc(1,1,1);
Assert.assertEquals(locNumber.getContigIndex(), 1);
Assert.assertEquals(locNumber.getContig(), "chr1");
Assert.assertEquals(locOne.compareTo(locNumber), 0);
}
@Test
public void testCompareTo() {
GenomeLoc twoOne = new GenomeLoc("chr2", 1);
GenomeLoc twoFive = new GenomeLoc("chr2", 5);
GenomeLoc twoOtherFive = new GenomeLoc("chr2", 5);
Assert.assertEquals(0, twoFive.compareTo(twoOtherFive));
Assert.assertEquals(-1, twoOne.compareTo(twoFive));
Assert.assertEquals(1, twoFive.compareTo(twoOne));
GenomeLoc oneOne = new GenomeLoc("chr1", 5);
Assert.assertEquals(-1, oneOne.compareTo(twoOne));
Assert.assertEquals(1, twoOne.compareTo(oneOne));
}
}