Significant performance optimization for reduced reads due to better algorithm for including reads in the variable regions. Fixed a critical bug that actually produced multiple copies of the same read in the variable regions with this optimization as well. Scala exploration script updated as well.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6005 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
38d7733989
commit
43fdd31e20
|
|
@ -8,10 +8,7 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
|
@ -48,7 +45,7 @@ import java.util.Set;
|
||||||
* on the reference genome, or is a dummy site that is only used to calculate insertion statistics
|
* on the reference genome, or is a dummy site that is only used to calculate insertion statistics
|
||||||
*/
|
*/
|
||||||
final class ConsensusSite {
|
final class ConsensusSite {
|
||||||
final Set<PileupElement> overlappingReads = new HashSet<PileupElement>();
|
final Collection<PileupElement> overlappingReads = new LinkedList<PileupElement>();
|
||||||
final int offset, position;
|
final int offset, position;
|
||||||
final BaseCounts counts = new BaseCounts();
|
final BaseCounts counts = new BaseCounts();
|
||||||
|
|
||||||
|
|
@ -57,17 +54,21 @@ final class ConsensusSite {
|
||||||
public ConsensusSite(int position, int offset) {
|
public ConsensusSite(int position, int offset) {
|
||||||
this.position = position;
|
this.position = position;
|
||||||
this.offset = offset;
|
this.offset = offset;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getPosition() {
|
public int getPosition() {
|
||||||
return position;
|
return position;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Set<PileupElement> getOverlappingReads() {
|
public Collection<PileupElement> getOverlappingReads() {
|
||||||
return overlappingReads;
|
return overlappingReads;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a pileup element (read / offset pair) to this consensus site. Assumes
|
||||||
|
* that the same element isn't added to site more than once.
|
||||||
|
* @param elt
|
||||||
|
*/
|
||||||
public void addOverlappingRead(PileupElement elt) {
|
public void addOverlappingRead(PileupElement elt) {
|
||||||
overlappingReads.add(elt);
|
overlappingReads.add(elt);
|
||||||
counts.incr(elt.getBase());
|
counts.incr(elt.getBase());
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.reducereads;
|
package org.broadinstitute.sting.playground.gatk.walkers.reducereads;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import org.apache.commons.math.stat.descriptive.summary.Sum;
|
import org.apache.commons.math.stat.descriptive.summary.Sum;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
@ -323,7 +325,7 @@ public class SingleSampleConsensusReadCompressor implements ConsensusReadCompres
|
||||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||||
|
|
||||||
for ( ConsensusSpan span : spans ) {
|
for ( ConsensusSpan span : spans ) {
|
||||||
logger.info("Span is " + span);
|
//logger.info("Span is " + span);
|
||||||
if ( span.isConserved() )
|
if ( span.isConserved() )
|
||||||
reads.addAll(conservedSpanReads(sites, span));
|
reads.addAll(conservedSpanReads(sites, span));
|
||||||
else
|
else
|
||||||
|
|
@ -416,19 +418,27 @@ public class SingleSampleConsensusReadCompressor implements ConsensusReadCompres
|
||||||
return Collections.singletonList(consensus);
|
return Collections.singletonList(consensus);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Requires({"sites != null", "span.isVariable()"})
|
||||||
|
@Ensures("result != null")
|
||||||
private Collection<SAMRecord> variableSpanReads(List<ConsensusSite> sites, ConsensusSpan span) {
|
private Collection<SAMRecord> variableSpanReads(List<ConsensusSite> sites, ConsensusSpan span) {
|
||||||
Set<SAMRecord> reads = new HashSet<SAMRecord>();
|
Collection<SAMRecord> reads = new LinkedList<SAMRecord>();
|
||||||
|
Set<String> readNames = new HashSet<String>();
|
||||||
|
|
||||||
// todo -- this code is grossly inefficient, as it checks each variable read at each site in the span
|
|
||||||
for ( int i = 0; i < span.size(); i++ ) {
|
for ( int i = 0; i < span.size(); i++ ) {
|
||||||
int refI = i + span.getOffsetFromStartOfSites();
|
int refI = i + span.getOffsetFromStartOfSites();
|
||||||
ConsensusSite site = sites.get(refI);
|
|
||||||
for ( PileupElement p : site.getOverlappingReads() ) {
|
for ( PileupElement p : sites.get(refI).getOverlappingReads() ) {
|
||||||
|
if ( readNames.contains(p.getRead().getReadName()) ) {
|
||||||
|
;
|
||||||
|
//logger.info("Rejecting already seen read: " + p.getRead().getReadName());
|
||||||
|
} else {
|
||||||
|
readNames.add(p.getRead().getReadName());
|
||||||
SAMRecord read = clipReadToSpan(p.getRead(), span);
|
SAMRecord read = clipReadToSpan(p.getRead(), span);
|
||||||
if ( keepClippedReadInVariableSpan(p.getRead(), read) )
|
if ( keepClippedReadInVariableSpan(p.getRead(), read) )
|
||||||
reads.add(read);
|
reads.add(read);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return reads;
|
return reads;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ class ReducedBAMEvaluation extends QScript {
|
||||||
}
|
}
|
||||||
|
|
||||||
trait CoFoJa extends JavaCommandLineFunction {
|
trait CoFoJa extends JavaCommandLineFunction {
|
||||||
override def javaOpts = super.javaOpts + " -javaagent:lib/cofoja.jar"
|
override def javaOpts = super.javaOpts // + " -javaagent:lib/cofoja.jar"
|
||||||
}
|
}
|
||||||
|
|
||||||
def script = {
|
def script = {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue