New way of looking at the size of a pileup: there's a physical number of elements in the data structure and there's a representative depth of coverage (since a reduced read represents depth >= 1). The size() method has been removed because its meaning is ambiguous. Updated several annotations and the UG engine to make use of the representative depths.

This commit is contained in:
Eric Banks 2011-11-02 12:47:30 -04:00
parent 0839c75c8d
commit 54331b44e9
22 changed files with 71 additions and 60 deletions

View File

@ -138,7 +138,7 @@ public class AlignmentContext implements HasGenomeLocation {
* @return
*/
public boolean hasReads() {
return basePileup != null && basePileup.size() > 0 ;
return basePileup != null && basePileup.getNumberOfElements() > 0 ;
}
/**
@ -146,7 +146,7 @@ public class AlignmentContext implements HasGenomeLocation {
* @return
*/
public int size() {
return basePileup.size();
return basePileup.getNumberOfElements();
}
/**

View File

@ -92,7 +92,7 @@ public class AlignmentContextUtils {
ReadBackedPileup pileupBySample = context.getPileup().getPileupForSample(sample);
// Don't add empty pileups to the split context.
if(pileupBySample.size() == 0)
if(pileupBySample.getNumberOfElements() == 0)
continue;
if(sample != null)

View File

@ -92,7 +92,7 @@ public class AlleleBalance extends InfoFieldAnnotation {
continue;
}
// todo -- actually care about indel length from the pileup (agnostic at the moment)
int refCount = indelPileup.size();
int refCount = indelPileup.getNumberOfElements();
int altCount = vc.isSimpleInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
if ( refCount + altCount == 0 ) {

View File

@ -41,7 +41,7 @@ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnno
int depth = 0;
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() )
depth += sample.getValue().size();
depth += sample.getValue().getBasePileup().depthOfCoverage();
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%d", depth));
return map;

View File

@ -1,6 +1,5 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -43,7 +42,7 @@ public class QualByDepth extends AnnotationByDepth implements StandardAnnotation
if ( context == null )
continue;
depth += context.size();
depth += context.getBasePileup().depthOfCoverage();
}
if ( depth == 0 )

View File

@ -79,7 +79,7 @@ public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
alleleCounts.put(allele.getBases()[0], 0);
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
int totalDepth = pileup.size();
int totalDepth = pileup.getNumberOfElements();
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
@ -119,7 +119,7 @@ public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup();
if ( pileup == null )
return null;
int totalDepth = pileup.size();
int totalDepth = pileup.getNumberOfElements();
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), totalDepth); // put total depth in right away

View File

@ -43,7 +43,7 @@ public class SpanningDeletions extends InfoFieldAnnotation implements StandardAn
if (pileup != null) {
deletions += pileup.getNumberOfDeletions();
depth += pileup.size();
depth += pileup.getNumberOfElements();
}
}
Map<String, Object> map = new HashMap<String, Object>();

View File

@ -278,10 +278,10 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
if ( elt.isReducedRead() ) {
// reduced read representation
byte qual = elt.getReducedQual();
byte qual = elt.getQual();
if ( BaseUtils.isRegularBase( elt.getBase() )) {
add(obsBase, qual, (byte)0, (byte)0, elt.getReducedCount()); // fast calculation of n identical likelihoods
return elt.getReducedCount(); // we added nObs bases here
add(obsBase, qual, (byte)0, (byte)0, elt.getRepresentativeCount()); // fast calculation of n identical likelihoods
return elt.getRepresentativeCount(); // we added nObs bases here
} else // odd bases or deletions => don't use them
return 0;
} else {

View File

@ -569,7 +569,7 @@ public class UnifiedGenotyperEngine {
ReadBackedPileup pileup = rawContext.getBasePileup() .getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);
// don't call when there is no coverage
if ( pileup.size() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES )
if ( pileup.getNumberOfElements() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES )
return null;
// stratify the AlignmentContext and cut by sample
@ -586,7 +586,7 @@ public class UnifiedGenotyperEngine {
ReadBackedExtendedEventPileup pileup = rawPileup.getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);
// don't call when there is no coverage
if ( pileup.size() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES )
if ( pileup.getNumberOfElements() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES )
return null;
// stratify the AlignmentContext and cut by sample
@ -602,7 +602,7 @@ public class UnifiedGenotyperEngine {
for( final PileupElement p : rawContext.getBasePileup() ) {
if( p.isDeletion() ) { numDeletions++; }
}
if( ((double) numDeletions) / ((double) rawContext.getBasePileup().size()) > UAC.MAX_DELETION_FRACTION ) {
if( ((double) numDeletions) / ((double) rawContext.getBasePileup().getNumberOfElements()) > UAC.MAX_DELETION_FRACTION ) {
return null;
}
}
@ -649,9 +649,9 @@ public class UnifiedGenotyperEngine {
if (isCovered) {
AlignmentContext context = contexts.get(sample);
if (context.hasBasePileup())
depth = context.getBasePileup().size();
depth = context.getBasePileup().depthOfCoverage();
else if (context.hasExtendedEventPileup())
depth = context.getExtendedEventPileup().size();
depth = context.getExtendedEventPileup().depthOfCoverage();
}
P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth);

View File

@ -32,17 +32,11 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.io.File;
import java.io.FileWriter;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
@ -376,8 +370,8 @@ public class PairHMMIndelErrorModel {
HashMap<PileupElement, LinkedHashMap<Allele,Double>> indelLikelihoodMap){
int numHaplotypes = haplotypeMap.size();
final double readLikelihoods[][] = new double[pileup.size()][numHaplotypes];
final int readCounts[] = new int[pileup.size()];
final double readLikelihoods[][] = new double[pileup.getNumberOfElements()][numHaplotypes];
final int readCounts[] = new int[pileup.getNumberOfElements()];
int readIdx=0;
LinkedHashMap<Allele,double[]> gapOpenProbabilityMap = new LinkedHashMap<Allele,double[]>();
@ -403,8 +397,7 @@ public class PairHMMIndelErrorModel {
for (PileupElement p: pileup) {
// > 1 when the read is a consensus read representing multiple independent observations
final boolean isReduced = p.isReducedRead();
readCounts[readIdx] = isReduced ? p.getReducedCount() : 1;
readCounts[readIdx] = p.getRepresentativeCount();
// check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)
if (indelLikelihoodMap.containsKey(p)) {
@ -607,7 +600,7 @@ public class PairHMMIndelErrorModel {
if (DEBUG) {
System.out.println("\nLikelihood summary");
for (readIdx=0; readIdx < pileup.size(); readIdx++) {
for (readIdx=0; readIdx < pileup.getNumberOfElements(); readIdx++) {
System.out.format("Read Index: %d ",readIdx);
for (int i=0; i < readLikelihoods[readIdx].length; i++)
System.out.format("L%d: %f ",i,readLikelihoods[readIdx][i]);

View File

@ -228,7 +228,7 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
// make sure we're supposed to look for high entropy
if ( mismatchThreshold > 0.0 &&
mismatchThreshold <= 1.0 &&
pileup.size() >= minReadsAtLocus &&
pileup.getNumberOfElements() >= minReadsAtLocus &&
(double)mismatchQualities / (double)totalQualities >= mismatchThreshold )
hasPointEvent = true;
}

View File

@ -258,10 +258,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
int numReads = 0;
if (context.hasBasePileup()) {
numReads = context.getBasePileup().size();
numReads = context.getBasePileup().getNumberOfElements();
}
else if (context.hasExtendedEventPileup()) {
numReads = context.getExtendedEventPileup().size();
numReads = context.getExtendedEventPileup().getNumberOfElements();
}
PhasingStats addInPhaseStats = new PhasingStats(numReads, 1);
phaseStats.addIn(addInPhaseStats);

View File

@ -78,7 +78,7 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
}
}
return pileup.size();
return pileup.getNumberOfElements();
}
private static String maybeSorted( final String x, boolean sortMe )
@ -94,7 +94,7 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
public String pileupDiff(final ReadBackedPileup a, final SAMPileupFeature b, boolean orderDependent)
{
if ( a.size() != b.size() )
if ( a.getNumberOfElements() != b.size() )
return "Sizes not equal";
GenomeLoc featureLocation = getToolkit().getGenomeLocParser().createGenomeLoc(b.getChr(),b.getStart(),b.getEnd());
if ( a.getLocation().compareTo(featureLocation) != 0 )

View File

@ -113,7 +113,7 @@ public class FragmentUtils {
}
public final static FragmentCollection<PileupElement> create(ReadBackedPileup rbp) {
return create(rbp, rbp.size(), PileupElementGetter);
return create(rbp, rbp.getNumberOfElements(), PileupElementGetter);
}
public final static FragmentCollection<SAMRecord> create(List<SAMRecord> reads) {

View File

@ -45,6 +45,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
protected final PileupElementTracker<PE> pileupElementTracker;
protected int size = 0; // cached value of the size of the pileup
protected int abstractSize = -1; // cached value of the abstract size of the pileup
protected int nDeletions = 0; // cached value of the number of deletions
protected int nMQ0Reads = 0; // cached value of the number of MQ0 reads
@ -145,8 +146,16 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
}
}
protected void calculateAbstractSize() {
abstractSize = 0;
for ( PileupElement p : pileupElementTracker ) {
abstractSize += p.getRepresentativeCount();
}
}
protected void addPileupToCumulativeStats(AbstractReadBackedPileup<RBP,PE> pileup) {
size += pileup.size();
size += pileup.getNumberOfElements();
abstractSize += pileup.depthOfCoverage();
nDeletions += pileup.getNumberOfDeletions();
nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads();
}
@ -574,7 +583,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
*/
@Override
public RBP getDownsampledPileup(int desiredCoverage) {
if ( size() <= desiredCoverage )
if ( getNumberOfElements() <= desiredCoverage )
return (RBP)this;
// randomly choose numbers corresponding to positions in the reads list
@ -727,13 +736,23 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
}
/**
* @return the number of elements in this pileup
* @return the number of physical elements in this pileup
*/
@Override
public int size() {
public int getNumberOfElements() {
return size;
}
/**
* @return the number of abstract elements in this pileup
*/
@Override
public int depthOfCoverage() {
if ( abstractSize == -1 )
calculateAbstractSize();
return abstractSize;
}
/**
* @return true if there are 0 elements in the pileup, false otherwise
*/
@ -806,7 +825,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
*/
@Override
public List<SAMRecord> getReads() {
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
List<SAMRecord> reads = new ArrayList<SAMRecord>(getNumberOfElements());
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
return reads;
}
@ -817,7 +836,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
*/
@Override
public List<Integer> getOffsets() {
List<Integer> offsets = new ArrayList<Integer>(size());
List<Integer> offsets = new ArrayList<Integer>(getNumberOfElements());
for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); }
return offsets;
}
@ -828,7 +847,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
*/
@Override
public byte[] getBases() {
byte[] v = new byte[size()];
byte[] v = new byte[getNumberOfElements()];
int pos = 0;
for ( PileupElement pile : pileupElementTracker ) { v[pos++] = pile.getBase(); }
return v;
@ -840,7 +859,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
*/
@Override
public byte[] getQuals() {
byte[] v = new byte[size()];
byte[] v = new byte[getNumberOfElements()];
int pos = 0;
for ( PileupElement pile : pileupElementTracker ) { v[pos++] = pile.getQual(); }
return v;
@ -852,7 +871,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
*/
@Override
public byte[] getMappingQuals() {
byte[] v = new byte[size()];
byte[] v = new byte[getNumberOfElements()];
int pos = 0;
for ( PileupElement pile : pileupElementTracker ) { v[pos++] = (byte)pile.getRead().getMappingQuality(); }
return v;

View File

@ -100,13 +100,8 @@ public class PileupElement implements Comparable<PileupElement> {
return ((GATKSAMRecord)read).isReducedRead();
}
public int getReducedCount() {
if ( ! isReducedRead() ) throw new IllegalArgumentException("Cannot get reduced count for non-reduced read " + getRead().getReadName());
return ((GATKSAMRecord)read).getReducedCount(offset);
public int getRepresentativeCount() {
return isReducedRead() ? ((GATKSAMRecord)read).getReducedCount(offset) : 1;
}
public byte getReducedQual() {
if ( ! isReducedRead() ) throw new IllegalArgumentException("Cannot get reduced qual for non-reduced read " + getRead().getReadName());
return getQual();
}
}

View File

@ -155,7 +155,7 @@ public interface ReadBackedExtendedEventPileup extends ReadBackedPileup {
/**
* @return the number of elements in this pileup
*/
public int size();
public int getNumberOfElements();
/**
* @return the location of this pileup

View File

@ -133,7 +133,7 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
*/
@Override
public byte[] getEvents() {
byte[] v = new byte[size()];
byte[] v = new byte[getNumberOfElements()];
int i = 0;
for ( ExtendedEventPileupElement e : this.toExtendedIterable() ) {
switch ( e.getType() ) {

View File

@ -169,9 +169,14 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
public int getNumberOfMappingQualityZeroReads();
/**
* @return the number of elements in this pileup
* @return the number of physical elements in this pileup (a reduced read is counted just once)
*/
public int size();
public int getNumberOfElements();
/**
* @return the number of abstract elements in this pileup (reduced reads are expanded to count all reads that they represent)
*/
public int depthOfCoverage();
/**
* @return true if there are 0 elements in the pileup, false otherwise

View File

@ -78,7 +78,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
ReadBackedExtendedEventPileup pileup = context.getExtendedEventPileup().getBaseFilteredPileup(10);
Assert.assertEquals(pileup.getLocation().getStart(), 5, "Extended event pileup at wrong location");
Assert.assertEquals(pileup.size(), 3, "Pileup size is incorrect");
Assert.assertEquals(pileup.getNumberOfElements(), 3, "Pileup size is incorrect");
foundExtendedEventPileup = true;
}

View File

@ -65,7 +65,7 @@ public class ReadUtilsUnitTest extends BaseTest {
Assert.assertFalse(readp.isReducedRead());
Assert.assertTrue(reducedreadp.isReducedRead());
Assert.assertEquals(reducedreadp.getReducedCount(), REDUCED_READ_COUNTS[0]);
Assert.assertEquals(reducedreadp.getReducedQual(), readp.getQual());
Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]);
Assert.assertEquals(reducedreadp.getQual(), readp.getQual());
}
}

View File

@ -102,7 +102,7 @@ public class ReadBackedPileupUnitTest {
ReadBackedPileup nullRgPileup = pileup.getPileupForReadGroup(null);
List<SAMRecord> nullRgReads = nullRgPileup.getReads();
Assert.assertEquals(nullRgPileup.size(), 3, "Wrong number of reads in null read group");
Assert.assertEquals(nullRgPileup.getNumberOfElements(), 3, "Wrong number of reads in null read group");
Assert.assertEquals(nullRgReads.get(0), read1, "Read " + read1.getReadName() + " should be in null rg but isn't");
Assert.assertEquals(nullRgReads.get(1), read2, "Read " + read2.getReadName() + " should be in null rg but isn't");
Assert.assertEquals(nullRgReads.get(2), read3, "Read " + read3.getReadName() + " should be in null rg but isn't");
@ -187,7 +187,7 @@ public class ReadBackedPileupUnitTest {
ReadBackedPileup pileup = new ReadBackedPileupImpl(null,sampleToPileupMap);
ReadBackedPileup sample2Pileup = pileup.getPileupForSample(sample2);
Assert.assertEquals(sample2Pileup.size(),1,"Sample 2 pileup has wrong number of elements");
Assert.assertEquals(sample2Pileup.getNumberOfElements(),1,"Sample 2 pileup has wrong number of elements");
Assert.assertEquals(sample2Pileup.getReads().get(0),read2,"Sample 2 pileup has incorrect read");
ReadBackedPileup missingSamplePileup = pileup.getPileupForSample("missing");