* refactored GenomeLoc to use contigIndex internally for performance and fixed several calling classes
* added basic unit test for GenomeLoc * fixed bug when parsing genome locations like chr1:5000 the start position was being left as maxint rather than being set to the same as the stop position. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@365 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
49fd951d8c
commit
ce72932a45
|
|
@ -113,7 +113,7 @@ public class LocusIteratorByHanger extends LocusIterator {
|
|||
}
|
||||
|
||||
protected void hangRead(final SAMRecord read) {
|
||||
GenomeLoc readLoc = new GenomeLoc(read.getReferenceName(), read.getAlignmentStart());
|
||||
GenomeLoc readLoc = new GenomeLoc(read.getReferenceIndex(), read.getAlignmentStart());
|
||||
//System.out.printf("Adding read %s at %d%n", read.getReadName(), read.getAlignmentStart());
|
||||
|
||||
/*
|
||||
|
|
@ -128,7 +128,7 @@ public class LocusIteratorByHanger extends LocusIterator {
|
|||
if ( DEBUG )
|
||||
logger.debug(String.format("Processing block %s len=%d%n", block, block.getLength()));
|
||||
for ( int i = 0; i < block.getLength(); i++ ) {
|
||||
GenomeLoc offset = new GenomeLoc(readLoc.getContig(), block.getReferenceStart() + i);
|
||||
GenomeLoc offset = new GenomeLoc(readLoc.getContigIndex(), block.getReferenceStart() + i);
|
||||
readHanger.expandingPut(offset, read);
|
||||
offsetHanger.expandingPut(offset, block.getReadStart() + i - 1);
|
||||
if ( DEBUG )
|
||||
|
|
|
|||
|
|
@ -43,8 +43,10 @@ public class SomaticCoverageWalker extends LocusWalker<Integer, Integer> {
|
|||
int tumorCovered;
|
||||
int normalCovered;
|
||||
int somaticCovered;
|
||||
long start = 0;
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
if (start ==0) { start = System.currentTimeMillis(); }
|
||||
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
int tumorDepth = 0;
|
||||
|
|
@ -58,10 +60,10 @@ public class SomaticCoverageWalker extends LocusWalker<Integer, Integer> {
|
|||
// that come from fully mapped pairs with a mappign quality threshold >= x
|
||||
if (read.getNotPrimaryAlignmentFlag() ||
|
||||
read.getDuplicateReadFlag() ||
|
||||
read.getReadUnmappedFlag()
|
||||
||
|
||||
read.getMateUnmappedFlag() ||
|
||||
read.getReadUnmappedFlag() ||
|
||||
read.getMappingQuality() < MAPPING_QUALITY_THRESHOLD
|
||||
// ||
|
||||
// read.getMateUnmappedFlag() ||
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -90,10 +92,15 @@ public class SomaticCoverageWalker extends LocusWalker<Integer, Integer> {
|
|||
if (isTumorCovered && isNormalCovered) {somaticCovered++; }
|
||||
totalSites++;
|
||||
|
||||
if (totalSites % 20000 == 0) {
|
||||
out.println(String.format("%s:%d %d %d %d %d", context.getContig(), context.getPosition(), totalSites, tumorCovered, normalCovered, somaticCovered));
|
||||
}
|
||||
// if (totalSites % 100000 == 0) {
|
||||
// long now = System.currentTimeMillis();
|
||||
// out.println(String.format("%s:%d %d %d %d %d %dms", context.getContig(), context.getPosition(), totalSites, tumorCovered, normalCovered, somaticCovered, (now-start)));
|
||||
// }
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(context.getContig()).append(" ");
|
||||
sb.append(context.getPosition());
|
||||
out.println(sb.toString());
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
@ -107,7 +114,7 @@ public class SomaticCoverageWalker extends LocusWalker<Integer, Integer> {
|
|||
|
||||
@Override
|
||||
public void onTraversalDone(Integer result) {
|
||||
out.println(String.format("FINAL - %d %d %d %d", totalSites, tumorCovered, normalCovered, somaticCovered));
|
||||
// out.println(String.format("FINAL - %d %d %d %d", totalSites, tumorCovered, normalCovered, somaticCovered));
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import org.apache.log4j.Logger;
|
|||
public class GenomeLoc implements Comparable<GenomeLoc> {
|
||||
private static Logger logger = Logger.getLogger(GenomeLoc.class);
|
||||
|
||||
private String contig;
|
||||
private Integer contigIndex;
|
||||
private long start;
|
||||
private long stop;
|
||||
|
||||
|
|
@ -41,7 +41,6 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
//public static Map<String, Integer> refContigOrdering = null;
|
||||
private static SAMSequenceDictionary contigInfo = null;
|
||||
private static HashMap<String, String> interns = null;
|
||||
|
||||
public static boolean hasKnownContigOrdering() {
|
||||
return contigInfo != null;
|
||||
|
|
@ -52,6 +51,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
}
|
||||
|
||||
public static int getContigIndex( final String contig ) {
|
||||
// if the conig isn't found, a null is returned which will NPE here (which is what we want right now)
|
||||
return contigInfo.getSequenceIndex(contig);
|
||||
}
|
||||
|
||||
|
|
@ -113,28 +113,32 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
// constructors
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
public GenomeLoc( String contig, final long start, final long stop ) {
|
||||
if ( interns != null )
|
||||
contig = interns.get(contig);
|
||||
public GenomeLoc( int contigIndex, final long start, final long stop ) {
|
||||
|
||||
this.contig = contig;
|
||||
this.contigIndex = contigIndex;
|
||||
this.start = start;
|
||||
this.stop = stop;
|
||||
}
|
||||
|
||||
public GenomeLoc( String contig, final long start, final long stop ) {
|
||||
this(contigInfo.getSequenceIndex(contig), start, stop);
|
||||
}
|
||||
|
||||
public GenomeLoc( final String contig, final long pos ) {
|
||||
this( contig, pos, pos );
|
||||
this(contig, pos, pos );
|
||||
}
|
||||
|
||||
public GenomeLoc( final int contig, final long pos ) {
|
||||
this(contig, pos, pos );
|
||||
}
|
||||
|
||||
public GenomeLoc( final GenomeLoc toCopy ) {
|
||||
this( new String(toCopy.getContig()), toCopy.getStart(), toCopy.getStop() );
|
||||
this( toCopy.contigIndex, toCopy.getStart(), toCopy.getStop() );
|
||||
}
|
||||
|
||||
// TODO: why isn't this just a constructor?
|
||||
public static GenomeLoc genomicLocationOf(final SAMRecord read) {
|
||||
String contig = read.getReferenceName();
|
||||
if (read.getReadUnmappedFlag())
|
||||
contig = null;
|
||||
return new GenomeLoc(contig, read.getAlignmentStart(), read.getAlignmentEnd());
|
||||
return new GenomeLoc(read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd());
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -171,6 +175,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
else if ( match2.matches() ) {
|
||||
contig = match2.group(1);
|
||||
start = parsePosition(match2.group(2));
|
||||
stop = start;
|
||||
}
|
||||
else if ( match3.matches() ) {
|
||||
contig = match3.group(1);
|
||||
|
|
@ -304,7 +309,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
return returnTrueIfEmpty;
|
||||
|
||||
// skip loci before intervals begin
|
||||
if ( hasKnownContigOrdering() && getContigIndex(curr.contig) < getContigIndex(locs.get(0).contig) )
|
||||
if ( hasKnownContigOrdering() && curr.contigIndex < locs.get(0).contigIndex )
|
||||
return false;
|
||||
|
||||
for ( GenomeLoc loc : locs ) {
|
||||
|
|
@ -320,7 +325,8 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
//
|
||||
// Accessors and setters
|
||||
//
|
||||
public final String getContig() { return this.contig; }
|
||||
public final String getContig() { return contigInfo.getSequence(this.contigIndex).getSequenceName(); }
|
||||
public final int getContigIndex() { return this.contigIndex; }
|
||||
public final long getStart() { return this.start; }
|
||||
public final long getStop() { return this.stop; }
|
||||
public final String toString() {
|
||||
|
|
@ -333,12 +339,12 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
}
|
||||
|
||||
|
||||
public final boolean isUnmapped() { return this.contig == null; }
|
||||
public final boolean isUnmapped() { return this.contigIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX; }
|
||||
public final boolean throughEndOfContigP() { return this.stop == Integer.MAX_VALUE; }
|
||||
public final boolean atBeginningOfContigP() { return this.start == 1; }
|
||||
|
||||
public void setContig(String contig) {
|
||||
this.contig = contig;
|
||||
this.contigIndex = contigInfo.getSequenceIndex(contig);
|
||||
}
|
||||
|
||||
public void setStart(long start) {
|
||||
|
|
@ -351,16 +357,16 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
public final boolean isSingleBP() { return stop == start; }
|
||||
|
||||
public final boolean disjointP(GenomeLoc that) {
|
||||
if ( compareContigs(this.contig, that.contig) != 0 ) return true; // different chromosomes
|
||||
if ( this.start > that.stop ) return true; // this guy is past that
|
||||
if ( that.start > this.stop ) return true; // that guy is past our start
|
||||
if ( this.contigIndex != that.contigIndex ) return true; // different chromosomes
|
||||
if ( this.start > that.stop ) return true; // this guy is past that
|
||||
if ( that.start > this.stop ) return true; // that guy is past our start
|
||||
return false;
|
||||
}
|
||||
|
||||
public final boolean discontinuousP(GenomeLoc that) {
|
||||
if ( compareContigs(this.contig, that.contig) != 0 ) return true; // different chromosomes
|
||||
if ( (this.start - 1) > that.stop ) return true; // this guy is past that
|
||||
if ( (that.start - 1) > this.stop ) return true; // that guy is past our start
|
||||
if ( this.contigIndex != that.contigIndex ) return true; // different chromosomes
|
||||
if ( (this.start - 1) > that.stop ) return true; // this guy is past that
|
||||
if ( (that.start - 1) > this.stop ) return true; // that guy is past our start
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -386,11 +392,11 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
}
|
||||
|
||||
public final boolean onSameContig(GenomeLoc that) {
|
||||
return this.contig.equals(that.contig);
|
||||
return (this.contigIndex == that.contigIndex);
|
||||
}
|
||||
|
||||
public final int minus( final GenomeLoc that ) {
|
||||
if ( this.getContig().equals(that.getContig()) )
|
||||
if ( this.contigIndex == that.contigIndex )
|
||||
return (int) (this.getStart() - that.getStart());
|
||||
else
|
||||
return Integer.MAX_VALUE;
|
||||
|
|
@ -425,6 +431,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
//
|
||||
// Comparison operations
|
||||
//
|
||||
// TODO: get rid of this method because it's sloooooooooooooow
|
||||
public static int compareContigs( final String thisContig, final String thatContig )
|
||||
{
|
||||
if ( thisContig == thatContig )
|
||||
|
|
@ -471,15 +478,16 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
}
|
||||
}
|
||||
|
||||
public int compareContigs( GenomeLoc that ) {
|
||||
return compareContigs( this.contig, that.contig );
|
||||
public final int compareContigs( GenomeLoc that ) {
|
||||
return (this.contigIndex == that.contigIndex)?0:((this.contigIndex < that.contigIndex)?-1:1);
|
||||
}
|
||||
|
||||
|
||||
public int compareTo( GenomeLoc that ) {
|
||||
if ( this == that ) return 0;
|
||||
|
||||
final int cmpContig = compareContigs( this.getContig(), that.getContig() );
|
||||
final int cmpContig = compareContigs(that);
|
||||
|
||||
if ( cmpContig != 0 ) return cmpContig;
|
||||
if ( this.getStart() < that.getStart() ) return -1;
|
||||
if ( this.getStart() > that.getStart() ) return 1;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,77 @@
|
|||
// our package
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
|
||||
// the imports for unit testing.
|
||||
|
||||
import org.junit.*;
|
||||
import static org.junit.Assert.*;
|
||||
import org.apache.commons.cli.ParseException;
|
||||
import org.broadinstitute.sting.utils.cmdLine.ArgumentParser;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* Basic unit test for GenomeLoc
|
||||
*/
|
||||
public class GenomeLocTest {
|
||||
private static FastaSequenceFile2 seq;
|
||||
|
||||
@BeforeClass
|
||||
public static void init() {
|
||||
// sequence
|
||||
seq = new FastaSequenceFile2(new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that we got a string parameter in correctly
|
||||
*/
|
||||
@Test
|
||||
public void testIsBetween() {
|
||||
GenomeLoc.setupRefContigOrdering(seq);
|
||||
GenomeLoc locMiddle = new GenomeLoc("chr1", 3, 3);
|
||||
|
||||
GenomeLoc locLeft = new GenomeLoc("chr1", 1, 1);
|
||||
GenomeLoc locRight = new GenomeLoc("chr1", 5, 5);
|
||||
|
||||
Assert.assertTrue(locMiddle.isBetween(locLeft, locRight));
|
||||
Assert.assertFalse(locLeft.isBetween(locMiddle, locRight));
|
||||
Assert.assertFalse(locRight.isBetween(locLeft, locMiddle));
|
||||
|
||||
}
|
||||
@Test
|
||||
public void testContigIndex() {
|
||||
GenomeLoc locOne = new GenomeLoc("chr1",1,1);
|
||||
Assert.assertEquals(locOne.getContigIndex(), 1);
|
||||
Assert.assertEquals(locOne.getContig(), "chr1");
|
||||
|
||||
GenomeLoc locX = new GenomeLoc("chrX",1,1);
|
||||
Assert.assertEquals(locX.getContigIndex(), 23);
|
||||
Assert.assertEquals(locX.getContig(), "chrX");
|
||||
|
||||
GenomeLoc locNumber = new GenomeLoc(1,1,1);
|
||||
Assert.assertEquals(locNumber.getContigIndex(), 1);
|
||||
Assert.assertEquals(locNumber.getContig(), "chr1");
|
||||
Assert.assertEquals(locOne.compareTo(locNumber), 0);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompareTo() {
|
||||
GenomeLoc twoOne = new GenomeLoc("chr2", 1);
|
||||
GenomeLoc twoFive = new GenomeLoc("chr2", 5);
|
||||
GenomeLoc twoOtherFive = new GenomeLoc("chr2", 5);
|
||||
Assert.assertEquals(0, twoFive.compareTo(twoOtherFive));
|
||||
|
||||
Assert.assertEquals(-1, twoOne.compareTo(twoFive));
|
||||
Assert.assertEquals(1, twoFive.compareTo(twoOne));
|
||||
|
||||
GenomeLoc oneOne = new GenomeLoc("chr1", 5);
|
||||
Assert.assertEquals(-1, oneOne.compareTo(twoOne));
|
||||
Assert.assertEquals(1, twoOne.compareTo(oneOne));
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue