Significant performance optimization for reduced reads due to better algorithm for including reads in the variable regions. Fixed a critical bug that actually produced multiple copies of the same read in the variable regions with this optimization as well. Scala exploration script updated as well.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6005 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
38d7733989
commit
43fdd31e20
|
|
@ -8,10 +8,7 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
|
|||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.*;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -48,7 +45,7 @@ import java.util.Set;
|
|||
* on the reference genome, or is a dummy site that is only used to calculate insertion statistics
|
||||
*/
|
||||
final class ConsensusSite {
|
||||
final Set<PileupElement> overlappingReads = new HashSet<PileupElement>();
|
||||
final Collection<PileupElement> overlappingReads = new LinkedList<PileupElement>();
|
||||
final int offset, position;
|
||||
final BaseCounts counts = new BaseCounts();
|
||||
|
||||
|
|
@ -57,17 +54,21 @@ final class ConsensusSite {
|
|||
public ConsensusSite(int position, int offset) {
|
||||
this.position = position;
|
||||
this.offset = offset;
|
||||
|
||||
}
|
||||
|
||||
public int getPosition() {
|
||||
return position;
|
||||
}
|
||||
|
||||
public Set<PileupElement> getOverlappingReads() {
|
||||
public Collection<PileupElement> getOverlappingReads() {
|
||||
return overlappingReads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a pileup element (read / offset pair) to this consensus site. Assumes
|
||||
* that the same element isn't added to site more than once.
|
||||
* @param elt
|
||||
*/
|
||||
public void addOverlappingRead(PileupElement elt) {
|
||||
overlappingReads.add(elt);
|
||||
counts.incr(elt.getBase());
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.reducereads;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.*;
|
||||
import org.apache.commons.math.stat.descriptive.summary.Sum;
|
||||
import org.apache.log4j.Logger;
|
||||
|
|
@ -323,7 +325,7 @@ public class SingleSampleConsensusReadCompressor implements ConsensusReadCompres
|
|||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
|
||||
for ( ConsensusSpan span : spans ) {
|
||||
logger.info("Span is " + span);
|
||||
//logger.info("Span is " + span);
|
||||
if ( span.isConserved() )
|
||||
reads.addAll(conservedSpanReads(sites, span));
|
||||
else
|
||||
|
|
@ -416,17 +418,25 @@ public class SingleSampleConsensusReadCompressor implements ConsensusReadCompres
|
|||
return Collections.singletonList(consensus);
|
||||
}
|
||||
|
||||
@Requires({"sites != null", "span.isVariable()"})
|
||||
@Ensures("result != null")
|
||||
private Collection<SAMRecord> variableSpanReads(List<ConsensusSite> sites, ConsensusSpan span) {
|
||||
Set<SAMRecord> reads = new HashSet<SAMRecord>();
|
||||
Collection<SAMRecord> reads = new LinkedList<SAMRecord>();
|
||||
Set<String> readNames = new HashSet<String>();
|
||||
|
||||
// todo -- this code is grossly inefficient, as it checks each variable read at each site in the span
|
||||
for ( int i = 0; i < span.size(); i++ ) {
|
||||
int refI = i + span.getOffsetFromStartOfSites();
|
||||
ConsensusSite site = sites.get(refI);
|
||||
for ( PileupElement p : site.getOverlappingReads() ) {
|
||||
SAMRecord read = clipReadToSpan(p.getRead(), span);
|
||||
if ( keepClippedReadInVariableSpan(p.getRead(), read) )
|
||||
reads.add(read);
|
||||
|
||||
for ( PileupElement p : sites.get(refI).getOverlappingReads() ) {
|
||||
if ( readNames.contains(p.getRead().getReadName()) ) {
|
||||
;
|
||||
//logger.info("Rejecting already seen read: " + p.getRead().getReadName());
|
||||
} else {
|
||||
readNames.add(p.getRead().getReadName());
|
||||
SAMRecord read = clipReadToSpan(p.getRead(), span);
|
||||
if ( keepClippedReadInVariableSpan(p.getRead(), read) )
|
||||
reads.add(read);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ class ReducedBAMEvaluation extends QScript {
|
|||
}
|
||||
|
||||
trait CoFoJa extends JavaCommandLineFunction {
|
||||
override def javaOpts = super.javaOpts + " -javaagent:lib/cofoja.jar"
|
||||
override def javaOpts = super.javaOpts // + " -javaagent:lib/cofoja.jar"
|
||||
}
|
||||
|
||||
def script = {
|
||||
|
|
|
|||
Loading…
Reference in New Issue