ReadBackedPileup in all its glory. Documented, aligned with the output of LocusIteratorByState, and caching common outputs for performance

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2165 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-11-25 20:54:44 +00:00
parent b44363d20a
commit db40e28e54
22 changed files with 385 additions and 348 deletions

View File

@ -28,6 +28,8 @@ package org.broadinstitute.sting.gatk.contexts;
import net.sf.picard.reference.ReferenceSequence;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.util.*;
@ -42,8 +44,7 @@ import java.util.*;
*/
public class AlignmentContext {
protected GenomeLoc loc = null;
protected List<SAMRecord> reads = null;
protected List<Integer> offsets = null;
protected ReadBackedPileup pileup = null;
/**
* The number of bases we've skipped over in the reference since the last map invocation.
@ -65,31 +66,45 @@ public class AlignmentContext {
* @param reads
* @param offsets
*/
@Deprecated
public AlignmentContext(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets) {
//assert loc != null;
//assert loc.getContig() != null;
//assert reads != null;
//assert offsets != null;
this.loc = loc;
this.reads = reads;
this.offsets = offsets;
this(loc, reads, offsets, 0);
}
@Deprecated
public AlignmentContext(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets, long skippedBases ) {
if ( loc == null ) throw new StingException("BUG: GenomeLoc in Alignment context is null");
if ( skippedBases < 0 ) throw new StingException("BUG: skippedBases is -1 in Alignment context");
this.loc = loc;
this.reads = reads;
this.offsets = offsets;
this.pileup = new ReadBackedPileup(loc, reads, offsets);
this.skippedBases = skippedBases;
}
public AlignmentContext(GenomeLoc loc, ReadBackedPileup pileup ) {
this(loc, pileup, 0);
}
public AlignmentContext(GenomeLoc loc, ReadBackedPileup pileup, long skippedBases ) {
if ( loc == null ) throw new StingException("BUG: GenomeLoc in Alignment context is null");
if ( pileup == null ) throw new StingException("BUG: ReadBackedPileup in Alignment context is null");
if ( skippedBases < 0 ) throw new StingException("BUG: skippedBases is -1 in Alignment context");
this.loc = loc;
this.pileup = pileup;
this.skippedBases = skippedBases;
}
public ReadBackedPileup getPileup() { return pileup; }
/**
* get all of the reads within this context
*
* @return
*/
public List<SAMRecord> getReads() { return reads; }
@Deprecated
public List<SAMRecord> getReads() { return pileup.getReads(); }
/**
* Are there any reads associated with this locus?
@ -97,16 +112,15 @@ public class AlignmentContext {
* @return
*/
public boolean hasReads() {
return reads != null;
return pileup.size() > 0;
}
/**
* How many reads cover this locus?
* @return
*/
public int numReads() {
assert( reads != null );
return reads.size();
public int size() {
return pileup.size();
}
/**
@ -114,89 +128,17 @@ public class AlignmentContext {
*
* @return
*/
@Deprecated
public List<Integer> getOffsets() {
return offsets;
return pileup.getOffsets();
}
public String getContig() { return getLocation().getContig(); }
public long getPosition() { return getLocation().getStart(); }
public GenomeLoc getLocation() { return loc; }
//public void setLocation(GenomeLoc loc) {
// this.loc = loc.clone();
//}
public void downsampleToCoverage(int coverage) {
if ( numReads() <= coverage )
return;
// randomly choose numbers corresponding to positions in the reads list
Random generator = new Random();
TreeSet positions = new TreeSet();
int i = 0;
while ( i < coverage ) {
if (positions.add(new Integer(generator.nextInt(reads.size()))))
i++;
}
ArrayList<SAMRecord> downsampledReads = new ArrayList<SAMRecord>();
ArrayList<Integer> downsampledOffsets = new ArrayList<Integer>();
Iterator positionIter = positions.iterator();
Iterator<SAMRecord> readsIter = reads.iterator();
Iterator<Integer> offsetsIter = offsets.iterator();
int currentRead = 0;
while ( positionIter.hasNext() ) {
int nextReadToKeep = (Integer)positionIter.next();
// fast-forward to the right read
while ( currentRead < nextReadToKeep ) {
readsIter.next();
offsetsIter.next();
currentRead++;
}
downsampledReads.add(readsIter.next());
downsampledOffsets.add(offsetsIter.next());
currentRead++;
}
reads = downsampledReads;
offsets = downsampledOffsets;
}
/**
* Returns only the reads in ac that do not contain spanning deletions of this locus
*
* @param ac
* @return
*/
public static AlignmentContext withoutSpanningDeletions( AlignmentContext ac ) {
return subsetDeletions( ac, true );
}
/**
* Returns only the reads in ac that do contain spanning deletions of this locus
*
* @param ac
* @return
*/
public static AlignmentContext withSpanningDeletions( AlignmentContext ac ) {
return subsetDeletions( ac, false );
}
private static AlignmentContext subsetDeletions( AlignmentContext ac, boolean readsWithoutDeletions ) {
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>(ac.getReads().size());
ArrayList<Integer> offsets = new ArrayList<Integer>(ac.getReads().size());
for ( int i = 0; i < ac.getReads().size(); i++ ) {
SAMRecord read = ac.getReads().get(i);
int offset = ac.getOffsets().get(i);
if ( (offset == -1 && ! readsWithoutDeletions) || (offset != -1 && readsWithoutDeletions) ) {
reads.add(read);
offsets.add(offset);
}
}
return new AlignmentContext(ac.getLocation(), reads, offsets);
pileup = pileup.getDownsampledPileup(coverage);
}
/**

View File

@ -32,6 +32,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.util.*;
@ -217,8 +219,7 @@ public class LocusIteratorByState extends LocusIterator {
// printState();
//}
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>(readStates.size());
ArrayList<Integer> offsets = new ArrayList<Integer>(readStates.size());
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
// keep iterating forward until we encounter a reference position that has something "real" hanging over it
// (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true)
@ -229,11 +230,9 @@ public class LocusIteratorByState extends LocusIterator {
for ( SAMRecordState state : readStates ) {
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
// System.out.println("Location: "+getLocation()+"; Read "+state.getRead().getReadName()+"; offset="+state.getReadOffset());
reads.add(state.getRead());
offsets.add(state.getReadOffset());
pile.add(new PileupElement(state.getRead(), state.getReadOffset()));
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
reads.add(state.getRead());
offsets.add(-1);
pile.add(new PileupElement(state.getRead(), -1));
}
}
GenomeLoc loc = getLocation();
@ -245,10 +244,49 @@ public class LocusIteratorByState extends LocusIterator {
// printState();
//}
// if we got reads with non-D/N over the current position, we are done
if ( reads.size() != 0 ) return new AlignmentContext(loc, reads, offsets);
if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile));
}
}
// old implementation -- uses lists of reads and offsets
// public AlignmentContext next() {
// //if (DEBUG) {
// // logger.debug("in Next:");
// // printState();
// //}
//
// ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>(readStates.size());
// ArrayList<Integer> offsets = new ArrayList<Integer>(readStates.size());
//
// // keep iterating forward until we encounter a reference position that has something "real" hanging over it
// // (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true)
// while(true) {
// collectPendingReads(readInfo.getMaxReadsAtLocus());
//
// // todo -- performance problem -- should be lazy, really
// for ( SAMRecordState state : readStates ) {
// if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
//// System.out.println("Location: "+getLocation()+"; Read "+state.getRead().getReadName()+"; offset="+state.getReadOffset());
// reads.add(state.getRead());
// offsets.add(state.getReadOffset());
// } else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
// reads.add(state.getRead());
// offsets.add(-1);
// }
// }
// GenomeLoc loc = getLocation();
//
// updateReadStates(); // critical - must be called after we get the current state offsets and location
//
// //if (DEBUG) {
// // logger.debug("DONE WITH NEXT, updating read states, current state is:");
// // printState();
// //}
// // if we got reads with non-D/N over the current position, we are done
// if ( reads.size() != 0 ) return new AlignmentContext(loc, reads, offsets);
// }
// }
private void collectPendingReads(final int maximumPileupSize) {
//if (DEBUG) {
// logger.debug(String.format("entering collectPendingReads..., hasNext=%b", it.hasNext()));

View File

@ -10,8 +10,10 @@ import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Pair;
import java.util.ArrayList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
@ -40,32 +42,32 @@ public class TraverseLocusWindows extends TraversalEngine {
LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
ReferenceOrderedView referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
AlignmentContext locus = getLocusContext(readView.iterator(), interval);
Pair<GenomeLoc, List<SAMRecord>> locus = getLocusContext(readView.iterator(), interval);
// The TraverseByLocusWindow expands intervals to cover all reads in a non-standard way.
// TODO: Convert this approach to the standard.
GenomeLoc expandedInterval = locus.getLocation();
GenomeLoc expandedInterval = locus.getFirst();
String referenceSubsequence = new String(referenceView.getReferenceBases(expandedInterval));
// Iterate forward to get all reference ordered data covering this interval
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation());
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getFirst());
//
// Execute our contract with the walker. Call filter, map, and reduce
//
final boolean keepMeP = locusWindowWalker.filter(tracker, referenceSubsequence, locus);
if (keepMeP) {
M x = locusWindowWalker.map(tracker, referenceSubsequence, locus);
sum = locusWindowWalker.reduce(x, sum);
}
//final boolean keepMeP = locusWindowWalker.filter(tracker, referenceSubsequence, locus);
//if (keepMeP) {
M x = locusWindowWalker.map(tracker, referenceSubsequence, locus.getFirst(), locus.getSecond());
sum = locusWindowWalker.reduce(x, sum);
//}
printProgress(LOCUS_WINDOW_STRING, locus.getLocation());
printProgress(LOCUS_WINDOW_STRING, locus.getFirst());
return sum;
}
private AlignmentContext getLocusContext(StingSAMIterator readIter, GenomeLoc interval) {
private Pair<GenomeLoc, List<SAMRecord>> getLocusContext(StingSAMIterator readIter, GenomeLoc interval) {
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
boolean done = false;
long leftmostIndex = interval.getStart(),
@ -86,11 +88,11 @@ public class TraverseLocusWindows extends TraversalEngine {
}
GenomeLoc window = GenomeLocParser.createGenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex);
AlignmentContext locus = new AlignmentContext(window, reads, null);
if ( readIter.getSourceInfo().getDownsampleToCoverage() != null )
locus.downsampleToCoverage(readIter.getSourceInfo().getDownsampleToCoverage());
// AlignmentContext locus = new AlignmentContext(window, reads, null);
// if ( readIter.getSourceInfo().getDownsampleToCoverage() != null )
// locus.downsampleToCoverage(readIter.getSourceInfo().getDownsampleToCoverage());
return locus;
return new Pair<GenomeLoc, List<SAMRecord>>(window, reads);
}
/**

View File

@ -2,6 +2,10 @@ package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import net.sf.samtools.SAMRecord;
import java.util.List;
/**
* Created by IntelliJ IDEA.
@ -12,13 +16,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
*/
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
public abstract class LocusWindowWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context?
public boolean filter(RefMetaDataTracker tracker, String ref, AlignmentContext context) {
return true; // We are keeping all the intervals
}
// Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
public abstract MapType map(RefMetaDataTracker tracker, String ref, AlignmentContext context);
public abstract MapType map(RefMetaDataTracker tracker, String ref, GenomeLoc loc, List<SAMRecord> reads);
// Given result of map function
public abstract ReduceType reduceInit();

View File

@ -66,14 +66,14 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
ReadBackedPileup pileup = context.getPileup();
String secondBasePileup = "";
if(shouldShowSecondaryBasePileup(pileup))
secondBasePileup = getSecondBasePileup(pileup);
String rods = getReferenceOrderedData( tracker );
out.printf("%s%s %s%n", pileup.getPileupString(qualsAsInts), secondBasePileup, rods);
out.printf("%s%s %s%n", pileup.getPileupString(ref.getBase(), qualsAsInts), secondBasePileup, rods);
return 1;
}

View File

@ -25,11 +25,11 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
public boolean CONTINUE_AFTER_AN_ERROR = false;
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
ReadBackedPileup pileup = context.getPileup();
SAMPileupRecord truePileup = getTruePileup( tracker );
if ( truePileup == null ) {
out.printf("No truth pileup data available at %s%n", pileup.getPileupString(false));
out.printf("No truth pileup data available at %s%n", pileup.getPileupString(ref.getBase(), false));
if ( ! CONTINUE_AFTER_AN_ERROR ) {
Utils.scareUser(String.format("No pileup data available at %s given GATK's output of %s -- this walker requires samtools pileup data over all bases",
context.getLocation(), new String(pileup.getBases())));
@ -37,7 +37,7 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
} else {
String pileupDiff = pileupDiff(pileup, truePileup, true);
if ( pileupDiff != null ) {
out.printf("%s vs. %s%n", pileup.getPileupString(true), truePileup.getPileupString());
out.printf("%s vs. %s%n", pileup.getPileupString(ref.getBase(), true), truePileup.getPileupString());
if ( ! CONTINUE_AFTER_AN_ERROR ) {
throw new RuntimeException(String.format("Pileups aren't equal: %s", pileupDiff));
}

View File

@ -29,7 +29,7 @@ public class SecondBaseSkew implements VariantAnnotation {
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileupWithDel, Variation variation, List<Genotype> genotypes) {
ReadBackedPileup pileup = pileupWithDel; // .getPileupWithoutDeletions();
Pair<Integer,Double> depthProp = getSecondaryPileupNonrefEstimator(pileup,genotypes);
Pair<Integer,Double> depthProp = getSecondaryPileupNonrefEstimator(ref.getBase(), pileup,genotypes);
if ( depthProp == null ) {
return null;
} else {
@ -48,10 +48,10 @@ public class SecondBaseSkew implements VariantAnnotation {
return proportion / ( Math.sqrt ( proportion*(1-proportion)/depth ) );
}
private Pair<Integer, Double> getSecondaryPileupNonrefEstimator(ReadBackedPileup p, List<Genotype> genotypes) {
private Pair<Integer, Double> getSecondaryPileupNonrefEstimator(char ref, ReadBackedPileup p, List<Genotype> genotypes) {
char snp;
try {
snp = getNonref(genotypes, p.getRef());
snp = getNonref(genotypes, ref);
} catch ( IllegalStateException e ) {
// tri-allelic site
// System.out.println("Illegal State Exception caught at "+p.getLocation().toString()+" 2bb skew annotation suppressed ("+e.getLocalizedMessage()+")");
@ -67,7 +67,7 @@ public class SecondBaseSkew implements VariantAnnotation {
if ( BaseUtils.isRegularBase((char)sbase) && BaseUtils.basesAreEqual(pbase, (byte) snp) ) {
variantDepth++;
if ( BaseUtils.basesAreEqual(sbase, (byte)p.getRef()) ) {
if ( BaseUtils.basesAreEqual(sbase, (byte)ref) ) {
variantsWithRefSecondBase++;
}
}

View File

@ -187,8 +187,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
public static Map<String, String> getAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation, List<Genotype> genotypes, Collection<VariantAnnotation> annotations) {
// set up the pileup for the full collection of reads at this position
ReadBackedPileup fullPileup = new ReadBackedPileup(ref.getBase(), context);
ReadBackedPileup fullPileup = context.getPileup();
// todo -- reimplement directly using ReadBackedPileups, which is vastly more efficient
// also, set up the pileup for the mapping-quality-zero-free context
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();
@ -204,7 +205,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
MQ0freeOffsets.add(offset);
}
}
ReadBackedPileup MQ0freePileup = new ReadBackedPileup(context.getLocation(), ref.getBase(), MQ0freeReads, MQ0freeOffsets);
ReadBackedPileup MQ0freePileup = new ReadBackedPileup(context.getLocation(), MQ0freeReads, MQ0freeOffsets);
HashMap<String, String> results = new HashMap<String, String>();

View File

@ -33,7 +33,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
int index = 0;
for ( String sample : contexts.keySet() ) {
AlignmentContextBySample context = contexts.get(sample);
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
// create the GenotypeLikelihoods object
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
@ -82,7 +82,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc);
if ( call instanceof ReadBacked ) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(sample).getContext(StratifiedContext.OVERALL));
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedContext.OVERALL).getPileup();
((ReadBacked)call).setPileup(pileup);
}
if ( call instanceof SampleBacked ) {

View File

@ -96,7 +96,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
if ( call instanceof ReadBacked ) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(sample).getContext(StratifiedContext.OVERALL));
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedContext.OVERALL).getPileup();
((ReadBacked)call).setPileup(pileup);
}
if ( call instanceof SampleBacked ) {

View File

@ -108,7 +108,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
private Pair<ReadBackedPileup, GenotypeLikelihoods> getSingleSampleLikelihoods(char ref, AlignmentContextBySample sampleContext, DiploidGenotypePriors priors, StratifiedContext contextType) {
// create the pileup
AlignmentContext myContext = sampleContext.getContext(contextType);
ReadBackedPileup pileup = new ReadBackedPileup(ref, myContext);
ReadBackedPileup pileup = myContext.getPileup();
// create the GenotypeLikelihoods object
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
@ -132,7 +132,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
for ( String sample : contexts.keySet() ) {
AlignmentContextBySample context = contexts.get(sample);
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
// create the GenotypeLikelihoods object
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, AFPriors, defaultPlatform);

View File

@ -77,7 +77,7 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int nChromosomes, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME);
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref);
int altIndex = BaseUtils.simpleBaseToBaseIndex(alt);

View File

@ -106,8 +106,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
}
}
public Integer map(RefMetaDataTracker tracker, String ref, AlignmentContext context) {
List<SAMRecord> reads = context.getReads();
public Integer map(RefMetaDataTracker tracker, String ref, GenomeLoc loc, List<SAMRecord> reads) {
ArrayList<SAMRecord> goodReads = new ArrayList<SAMRecord>();
for ( SAMRecord read : reads ) {
if ( !read.getReadUnmappedFlag() &&
@ -121,7 +120,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
readsToWrite.add(new ComparableSAMRecord(read));
}
clean(goodReads, ref, context.getLocation());
clean(goodReads, ref, loc);
//bruteForceClean(goodReads, ref, context.getLocation().getStart());
//testCleanWithDeletion();
//testCleanWithInsertion();

View File

@ -76,7 +76,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
}
public Set<BaseTransitionTable> map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(),context);
ReadBackedPileup pileup = context.getPileup();
Set<BaseTransitionTable> newCounts = null;
//System.out.println(pileup.getBases());
if ( baseIsUsable(tracker, ref, pileup, context) ) {
@ -279,7 +279,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
}
if ( usePileupMismatches ) {
conditions.add(countMismatches(pileup));
conditions.add(countMismatches(ref.getBase(), pileup));
}
if ( useReadGroup ) {
@ -331,8 +331,8 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
return String.format("%s\t%s%n",header,"Counts");
}
public int countMismatches(ReadBackedPileup p) {
int refM = p.getBaseCounts()[BaseUtils.simpleBaseToBaseIndex(p.getRef())];
public int countMismatches(char ref, ReadBackedPileup p) {
int refM = p.getBaseCounts()[BaseUtils.simpleBaseToBaseIndex(ref)];
return p.size()-refM;
}
@ -345,7 +345,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
}
public boolean pileupBelowMismatchThreshold( ReferenceContext ref, ReadBackedPileup pileup ) {
return countMismatches(pileup) <= maxNumMismatches;
return countMismatches(ref.getBase(), pileup) <= maxNumMismatches;
}
public boolean pileupContainsNoNs(ReadBackedPileup pileup) {

View File

@ -318,7 +318,7 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
GenomeLoc Gloc = context.getLocation();
//Create pileup of reads at this locus
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
ReadBackedPileup pileup = context.getPileup();
long loc = context.getPosition();
if( context.getReads().size() > 0 ) {

View File

@ -77,7 +77,7 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker<String, PrintWriter
}
int numVariantAllele = alleleFreqInfo.getSecond().getFirst();
int numChipsObserved = alleleFreqInfo.getSecond().getSecond();
int depth = context.numReads();
int depth = context.size();
double power = powerWalker.calculatePowerAtFrequency(context,numVariantAllele);
int called;
Variation call = (Variation) tracker.lookup("calls",null);
@ -89,7 +89,7 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker<String, PrintWriter
called = 1;
}
ReadBackedPileup p = new ReadBackedPileup(ref.getBase(),context);
ReadBackedPileup p = context.getPileup();
int support = p.getBaseCounts()[BaseUtils.simpleBaseToBaseIndex(alternate)];
// sanity check

View File

@ -69,7 +69,7 @@ public class MinimumNQSWalker extends LocusWalker<Pair<List<Pair<Integer,Integer
ArrayList<Pair<Integer,Integer>> matchingQualityNQSPairs = new ArrayList<Pair<Integer,Integer>>();
ArrayList<Pair<Integer,Integer>> mismatchingQualityNQSPairs = new ArrayList<Pair<Integer,Integer>>();
if ( (Variation) tracker.lookup("dbsnp",null) == null ) {
for ( int r = 0; r < context.numReads(); r ++ ) {
for ( int r = 0; r < context.size(); r ++ ) {
SAMRecord read = context.getReads().get(r);
int offset = context.getOffsets().get(r);
int quality = read.getBaseQualities()[offset];

View File

@ -69,7 +69,7 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
int altCount = 0;
int totalCount = 0;
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
ReadBackedPileup pileup = context.getPileup();
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
for (byte base : pileup.getBases() ) {
@ -108,7 +108,7 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
String colName = String.format("%s.%d", context.getContig(), context.getPosition());
for (int i = 0; i < context.numReads(); i++) {
for (int i = 0; i < context.size(); i++) {
SAMRecord read = context.getReads().get(i);
int offset = context.getOffsets().get(i);

View File

@ -42,7 +42,7 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> impleme
public SimpleCall map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (ref.getBase() == 'N' || ref.getBase() == 'n') return null; // we don't deal with the N ref base case
ReadBackedPileup pileup = new ReadBackedPileup(context.getLocation(), ref.getBase(), context.getReads(), context.getOffsets());
ReadBackedPileup pileup = context.getPileup();
double likelihoods[] = DiploidGenotypePriors.getReferencePolarizedPriors(ref.getBase(),
DiploidGenotypePriors.HUMAN_HETEROZYGOSITY,
DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);

View File

@ -111,7 +111,7 @@ public class PowerBelowFrequencyWalker extends LocusWalker<Integer,Integer> {
}
public double calculatePowerAtFrequency( AlignmentContext context, int alleles ) {
return theoreticalPower( context.numReads(), getMeanQ(context), alleles, lodThresh );
return theoreticalPower( context.size(), getMeanQ(context), alleles, lodThresh );
}
public byte getMeanQ( AlignmentContext context ) {
@ -126,7 +126,7 @@ public class PowerBelowFrequencyWalker extends LocusWalker<Integer,Integer> {
}
public double expectedMatchRate(AlignmentContext context) {
int nReads = context.numReads();
int nReads = context.size();
double matches = 0.0;
for ( int r = 0; r < nReads; r ++ ) {
matches += QualityUtils.qualToProb(context.getReads().get(r).getBaseQualities()[context.getOffsets().get(r)]);

View File

@ -18,8 +18,11 @@ import java.util.Random;
abstract public class BasicPileup {
public static final char DELETION_CHAR = 'D';
@Deprecated
abstract GenomeLoc getLocation();
@Deprecated
abstract char getRef();
@Deprecated
abstract int size();
@ -28,6 +31,7 @@ abstract public class BasicPileup {
*
* @return
*/
@Deprecated
byte[] getBases() { return null; }
/**
@ -35,6 +39,7 @@ abstract public class BasicPileup {
*
* @return
*/
@Deprecated
byte[] getQuals() { return null; }
/**
@ -57,50 +62,6 @@ abstract public class BasicPileup {
return String.format("%s: %s %s %s", getLocation(), getRef(), getBasesAsString(), getQualsAsString());
}
public static String basePileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
StringBuilder bases = new StringBuilder();
for ( byte base : getBasesAsArrayList(reads, offsets)) {
bases.append((char)base);
}
return bases.toString();
}
public static String baseWithStrandPileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
StringBuilder bases = new StringBuilder();
for ( int i = 0; i < reads.size(); i++ ) {
SAMRecord read = reads.get(i);
int offset = offsets.get(i);
char base;
if ( offset == -1 ) {
base = DELETION_CHAR;
} else {
base = (char) read.getReadBases()[offset];
}
base = Character.toUpperCase(base);
if (read.getReadNegativeStrandFlag()) {
base = Character.toLowerCase(base);
}
bases.append(base);
}
return bases.toString();
}
//
// byte[] methods
//
public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets ) {
return getBasesAsArray(reads,offsets);
}
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets ) {
return getQualsAsArray( reads, offsets );
}
//
// ArrayList<Byte> methods
//
@ -119,7 +80,7 @@ abstract public class BasicPileup {
return array;
}
@Deprecated
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size());
for (byte value : getBasesAsArray(reads, offsets))
@ -127,6 +88,7 @@ abstract public class BasicPileup {
return bases;
}
@Deprecated
public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
for (byte value : getQualsAsArray(reads, offsets))
@ -134,6 +96,7 @@ abstract public class BasicPileup {
return quals;
}
@Deprecated
public static byte[] getQualsAsArray( List<SAMRecord> reads, List<Integer> offsets ) {
byte array[] = new byte[reads.size()];
int index = 0;
@ -151,6 +114,7 @@ abstract public class BasicPileup {
return array;
}
@Deprecated
public static ArrayList<Byte> mappingQualPileup( List<SAMRecord> reads) {
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
for ( int i = 0; i < reads.size(); i++ ) {
@ -161,73 +125,6 @@ abstract public class BasicPileup {
return quals;
}
public static String mappingQualPileupAsString( List<SAMRecord> reads) {
return quals2String(mappingQualPileup(reads));
}
public static String quals2String( List<Byte> quals ) {
StringBuilder qualStr = new StringBuilder();
for ( int qual : quals ) {
qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea
char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63
qualStr.append(qualChar);
}
return qualStr.toString();
}
public static String qualPileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
return quals2String(getQualsAsArrayList(reads, offsets));
}
public static ArrayList<Byte> getSecondaryBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
ArrayList<Byte> bases2 = new ArrayList<Byte>(reads.size());
boolean hasAtLeastOneSQorE2Field = false;
for ( int i = 0; i < reads.size(); i++ ) {
SAMRecord read = reads.get(i);
int offset = offsets.get(i);
byte base2 = BaseUtils.getSecondBase(read, offset);
hasAtLeastOneSQorE2Field = hasAtLeastOneSQorE2Field || BaseUtils.simpleBaseToBaseIndex((char)base2) != -1;
bases2.add(base2);
}
return (hasAtLeastOneSQorE2Field ? bases2 : null);
}
public static String getSecondaryBasePileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
StringBuilder bases2 = new StringBuilder();
ArrayList<Byte> sbases = getSecondaryBasesAsArrayList(reads, offsets);
if (sbases == null) { return null; }
ArrayList<Byte> pbases = getBasesAsArrayList(reads, offsets);
//Random generator = new Random();
if ( sbases.size() != pbases.size() ) {
throw new StingException("BUG in conversion of secondary bases: primary and secondary base vectors are different sizes!");
}
for (int pileupIndex = 0; pileupIndex < sbases.size(); pileupIndex++) {
byte pbase = pbases.get(pileupIndex);
byte sbase = sbases.get(pileupIndex);
if ( sbase == pbase ) {
throw new StingException("BUG in conversion of secondary bases!");
}
// while (sbase == pbase) { // TODO why is here?
// sbase = (byte) BaseUtils.baseIndexToSimpleBase(generator.nextInt(4));
// }
bases2.append((char) sbase);
}
return bases2.toString();
}
@Deprecated // todo -- delete me
public static String[] indelPileup( List<SAMRecord> reads, List<Integer> offsets )
{

View File

@ -8,9 +8,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.BaseUtils;
import java.util.List;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.*;
import net.sf.samtools.SAMRecord;
@ -21,26 +19,74 @@ import net.sf.samtools.SAMRecord;
*/
public class ReadBackedPileup implements Iterable<PileupElement> {
private GenomeLoc loc = null;
private char ref = 0;
private ArrayList<PileupElement> pileup = null;
private int size = 0; // cached value of the size of the pileup
private int nDeletions = 0; // cached value of the number of deletions
private int[] counts = new int[4]; // cached value of counts
public ReadBackedPileup(char ref, AlignmentContext context ) {
this(context.getLocation(), ref, context.getReads(), context.getOffsets());
/**
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
* offsets. This pileup will contain a list, in order of the reads, of the piled bases at
* reads[i] for all i in offsets. Does not make a copy of the data, so it's not safe to
* go changing the reads.
*
* @param loc
* @param reads
* @param offsets
*/
public ReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets ) {
this(loc, readsOffsets2Pileup(reads, offsets));
}
public ReadBackedPileup(GenomeLoc loc, char ref, List<SAMRecord> reads, List<Integer> offsets ) {
this(loc, ref, readsOffsets2Pileup(reads, offsets));
}
public ReadBackedPileup(GenomeLoc loc, char ref, ArrayList<PileupElement> pileup ) {
/**
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
* offsets. This lower level constructure assumes pileup is well-formed and merely keeps a
* pointer to pileup. Don't go changing the data in pileup.
*
*/
public ReadBackedPileup(GenomeLoc loc, ArrayList<PileupElement> pileup ) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2");
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2");
this.loc = loc;
this.ref = ref;
this.pileup = pileup;
calculatedCachedData();
}
/**
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
* sizes, nDeletion, etc. over and over potentially.
*/
private void calculatedCachedData() {
size = 0;
nDeletions = 0;
counts[0] = 0; counts[1] = 0; counts[2] = 0; counts[3] = 0;
for ( PileupElement p : this ) {
size++;
if ( p.isDeletion() ) {
nDeletions++;
} else {
int index = BaseUtils.simpleBaseToBaseIndex((char)p.getBase());
if (index == -1)
continue;
counts[index]++;
}
}
}
/**
* Helper routine for converting reads and offset lists to a PileupElement list.
*
* @param reads
* @param offsets
* @return
*/
private static ArrayList<PileupElement> readsOffsets2Pileup(List<SAMRecord> reads, List<Integer> offsets ) {
if ( reads == null ) throw new StingException("Illegal null read list in ReadBackedPileup2");
if ( offsets == null ) throw new StingException("Illegal null offsets list in ReadBackedPileup2");
@ -54,25 +100,19 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
return pileup;
}
// --------------------------------------------------------
//
// iterators
// Special 'constructors'
//
public Iterator<PileupElement> iterator() {
return pileup.iterator();
}
// todo -- reimplement efficiently
public IterableIterator<ExtendedPileupElement> extendedForeachIterator() {
ArrayList<ExtendedPileupElement> x = new ArrayList<ExtendedPileupElement>(size());
int i = 0;
for ( PileupElement pile : this ) {
x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this));
}
return new IterableIterator<ExtendedPileupElement>(x.iterator());
}
// --------------------------------------------------------
/**
* Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
* of the pileup (just returns this) if there are no deletions in the pileup.
*
* @return
*/
public ReadBackedPileup getPileupWithoutDeletions() {
// todo -- fixme
if ( getNumberOfDeletions() > 0 ) { // todo -- remember number of deletions
@ -85,80 +125,144 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
newOffsets.add(getOffsets().get(i));
}
}
return new ReadBackedPileup(loc, ref, newReads, newOffsets);
return new ReadBackedPileup(loc, newReads, newOffsets);
} else {
return this;
}
}
public int getNumberOfDeletions() {
int n = 0;
/**
* Returns a pileup randomly downsampled to the desiredCoverage.
*
* @param desiredCoverage
* @return
*/
public ReadBackedPileup getDownsampledPileup(int desiredCoverage) {
if ( size() <= desiredCoverage )
return this;
for ( int i = 0; i < size(); i++ ) {
if ( getOffsets().get(i) != -1 ) { n++; }
// randomly choose numbers corresponding to positions in the reads list
Random generator = new Random();
TreeSet<Integer> positions = new TreeSet<Integer>();
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
if ( positions.add(generator.nextInt(pileup.size())) )
i++;
}
return n;
Iterator positionIter = positions.iterator();
ArrayList<PileupElement> downsampledPileup = new ArrayList<PileupElement>();
while ( positionIter.hasNext() ) {
int nextReadToKeep = (Integer)positionIter.next();
downsampledPileup.add(pileup.get(nextReadToKeep));
}
return new ReadBackedPileup(getLocation(), downsampledPileup);
}
// --------------------------------------------------------
//
// iterators
//
// --------------------------------------------------------
/**
* The best way to access PileupElements where you only care about the bases and quals in the pileup.
*
* for (PileupElement p : this) { doSomething(p); }
*
* Provides efficient iteration of the data.
*
* @return
*/
public Iterator<PileupElement> iterator() {
return pileup.iterator();
}
/**
* The best way to access PileupElements where you only care not only about bases and quals in the pileup
* but also need access to the index of the pileup element in the pile.
*
* for (ExtendedPileupElement p : this) { doSomething(p); }
*
* Provides efficient iteration of the data.
*
* @return
*/
// todo -- reimplement efficiently
public IterableIterator<ExtendedPileupElement> extendedForeachIterator() {
ArrayList<ExtendedPileupElement> x = new ArrayList<ExtendedPileupElement>(size());
int i = 0;
for ( PileupElement pile : this ) {
x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this));
}
return new IterableIterator<ExtendedPileupElement>(x.iterator());
}
/**
* Simple useful routine to count the number of deletion bases in this pileup
*
* @return
*/
public int getNumberOfDeletions() {
return nDeletions;
}
// public int getNumberOfDeletions() {
// int n = 0;
//
// for ( int i = 0; i < size(); i++ ) {
// if ( getOffsets().get(i) != -1 ) { n++; }
// }
//
// return n;
// }
// todo -- optimize me
/**
* @return the number of elements in this pileup
*/
public int size() {
return pileup.size();
return size;
//return pileup.size();
}
/**
* @return the location of this pileup
*/
public GenomeLoc getLocation() {
return loc;
}
public char getRef() {
return ref;
}
public List<SAMRecord> getReads() {
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
return reads;
}
public List<Integer> getOffsets() {
List<Integer> offsets = new ArrayList<Integer>(size());
for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); }
return offsets;
}
public byte[] getBases() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); }
return v;
}
public byte[] getSecondaryBases() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); }
return v;
}
public byte[] getQuals() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); }
return v;
}
/**
* Get counts of A, C, G, T in order, which returns a int[4] vector with counts according
* to BaseUtils.simpleBaseToBaseIndex for each base.
*
* @return
*/
public int[] getBaseCounts() {
int[] counts = new int[4];
for ( PileupElement pile : this ) {
// skip deletion sites
if ( ! pile.isDeletion() ) {
char base = Character.toUpperCase((char)(pile.getBase()));
if (BaseUtils.simpleBaseToBaseIndex(base) == -1)
continue;
counts[BaseUtils.simpleBaseToBaseIndex(base)]++;
}
}
return counts;
// int[] counts = new int[4];
// for ( PileupElement pile : this ) {
// // skip deletion sites
// if ( ! pile.isDeletion() ) {
// int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase());
// if (index == -1)
// continue;
// counts[index]++;
// }
// }
//
// return counts;
}
/**
* Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated
* @return
*/
public boolean hasSecondaryBases() {
for ( PileupElement pile : this ) {
// skip deletion sites
@ -169,14 +273,69 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
return false;
}
public String getPileupString(boolean qualsAsInts) {
public String getPileupString(char ref, boolean qualsAsInts) {
// In the pileup format, each line represents a genomic position, consisting of chromosome name,
// coordinate, reference base, read bases, read qualities and alignment mapping qualities.
//return String.format("%s %s %s %s", getLocation(), getRef(), getBases(), getQuals());
return String.format("%s %s %s %s",
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
getRef(), // reference base
ref, // reference base
new String(getBases()));
}
// --------------------------------------------------------
//
// Convenience functions that may be slow
//
// --------------------------------------------------------
/**
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
public List<SAMRecord> getReads() {
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
return reads;
}
/**
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
public List<Integer> getOffsets() {
List<Integer> offsets = new ArrayList<Integer>(size());
for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); }
return offsets;
}
/**
* Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return
*/
public byte[] getBases() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); }
return v;
}
/**
* Returns an array of the secondary bases in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return
*/
public byte[] getSecondaryBases() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); }
return v;
}
/**
* Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time
* @return
*/
public byte[] getQuals() {
byte[] v = new byte[size()];
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); }
return v;
}
}