Significant performance optimization for reduced reads due to better algorithm for including reads in the variable regions. Fixed a critical bug that actually produced multiple copies of the same read in the variable regions with this optimization as well. Scala exploration script updated as well.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6005 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2011-06-16 12:54:59 +00:00
parent 38d7733989
commit 43fdd31e20
3 changed files with 27 additions and 16 deletions

View File

@ -8,10 +8,7 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.*;
/*
* Copyright (c) 2009 The Broad Institute
@ -48,7 +45,7 @@ import java.util.Set;
* on the reference genome, or is a dummy site that is only used to calculate insertion statistics
*/
final class ConsensusSite {
final Set<PileupElement> overlappingReads = new HashSet<PileupElement>();
final Collection<PileupElement> overlappingReads = new LinkedList<PileupElement>();
final int offset, position;
final BaseCounts counts = new BaseCounts();
@ -57,17 +54,21 @@ final class ConsensusSite {
public ConsensusSite(int position, int offset) {
this.position = position;
this.offset = offset;
}
public int getPosition() {
return position;
}
public Set<PileupElement> getOverlappingReads() {
public Collection<PileupElement> getOverlappingReads() {
return overlappingReads;
}
/**
* Adds a pileup element (read / offset pair) to this consensus site. Assumes
* that the same element isn't added to site more than once.
* @param elt
*/
public void addOverlappingRead(PileupElement elt) {
overlappingReads.add(elt);
counts.incr(elt.getBase());

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.reducereads;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.*;
import org.apache.commons.math.stat.descriptive.summary.Sum;
import org.apache.log4j.Logger;
@ -323,7 +325,7 @@ public class SingleSampleConsensusReadCompressor implements ConsensusReadCompres
List<SAMRecord> reads = new ArrayList<SAMRecord>();
for ( ConsensusSpan span : spans ) {
logger.info("Span is " + span);
//logger.info("Span is " + span);
if ( span.isConserved() )
reads.addAll(conservedSpanReads(sites, span));
else
@ -416,17 +418,25 @@ public class SingleSampleConsensusReadCompressor implements ConsensusReadCompres
return Collections.singletonList(consensus);
}
@Requires({"sites != null", "span.isVariable()"})
@Ensures("result != null")
private Collection<SAMRecord> variableSpanReads(List<ConsensusSite> sites, ConsensusSpan span) {
Set<SAMRecord> reads = new HashSet<SAMRecord>();
Collection<SAMRecord> reads = new LinkedList<SAMRecord>();
Set<String> readNames = new HashSet<String>();
// todo -- this code is grossly inefficient, as it checks each variable read at each site in the span
for ( int i = 0; i < span.size(); i++ ) {
int refI = i + span.getOffsetFromStartOfSites();
ConsensusSite site = sites.get(refI);
for ( PileupElement p : site.getOverlappingReads() ) {
SAMRecord read = clipReadToSpan(p.getRead(), span);
if ( keepClippedReadInVariableSpan(p.getRead(), read) )
reads.add(read);
for ( PileupElement p : sites.get(refI).getOverlappingReads() ) {
if ( readNames.contains(p.getRead().getReadName()) ) {
;
//logger.info("Rejecting already seen read: " + p.getRead().getReadName());
} else {
readNames.add(p.getRead().getReadName());
SAMRecord read = clipReadToSpan(p.getRead(), span);
if ( keepClippedReadInVariableSpan(p.getRead(), read) )
reads.add(read);
}
}
}

View File

@ -37,7 +37,7 @@ class ReducedBAMEvaluation extends QScript {
}
trait CoFoJa extends JavaCommandLineFunction {
override def javaOpts = super.javaOpts + " -javaagent:lib/cofoja.jar"
override def javaOpts = super.javaOpts // + " -javaagent:lib/cofoja.jar"
}
def script = {