From db40e28e542c932ce2cdbf06dc20f5b0fc408565 Mon Sep 17 00:00:00 2001 From: depristo Date: Wed, 25 Nov 2009 20:54:44 +0000 Subject: [PATCH] ReadBackedPileup in all its glory. Documented, aligned with the output of LocusIteratorByState, and caching common outputs for performance git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2165 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/contexts/AlignmentContext.java | 126 ++----- .../gatk/iterators/LocusIteratorByState.java | 52 ++- .../gatk/traversals/TraverseLocusWindows.java | 30 +- .../sting/gatk/walkers/LocusWindowWalker.java | 11 +- .../sting/gatk/walkers/PileupWalker.java | 4 +- .../gatk/walkers/ValidatingPileupWalker.java | 6 +- .../walkers/annotator/SecondBaseSkew.java | 8 +- .../walkers/annotator/VariantAnnotator.java | 5 +- .../DiploidGenotypeCalculationModel.java | 4 +- .../genotyper/EMGenotypeCalculationModel.java | 2 +- ...PointEstimateGenotypeCalculationModel.java | 4 +- .../genotyper/PooledCalculationModel.java | 2 +- .../walkers/indels/IntervalCleanerWalker.java | 5 +- ...seTransitionTableCalculatorJavaWalker.java | 10 +- .../gatk/walkers/HLAcaller/CallHLAWalker.java | 2 +- .../walkers/HapmapPoolAllelicInfoWalker.java | 4 +- .../Recalibration/MinimumNQSWalker.java | 2 +- .../FindContaminatingReadGroupsWalker.java | 4 +- .../papergenotyper/GATKPaperGenotyper.java | 2 +- .../poolseq/PowerBelowFrequencyWalker.java | 4 +- .../sting/utils/BasicPileup.java | 121 +------ .../sting/utils/pileup/ReadBackedPileup.java | 325 +++++++++++++----- 22 files changed, 385 insertions(+), 348 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java index 9e3fbec81..e5fbe7574 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java @@ -28,6 +28,8 @@ package org.broadinstitute.sting.gatk.contexts; import net.sf.picard.reference.ReferenceSequence; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.util.*; @@ -42,8 +44,7 @@ import java.util.*; */ public class AlignmentContext { protected GenomeLoc loc = null; - protected List reads = null; - protected List offsets = null; + protected ReadBackedPileup pileup = null; /** * The number of bases we've skipped over in the reference since the last map invocation. @@ -65,31 +66,45 @@ public class AlignmentContext { * @param reads * @param offsets */ + @Deprecated public AlignmentContext(GenomeLoc loc, List reads, List offsets) { - //assert loc != null; - //assert loc.getContig() != null; - //assert reads != null; - //assert offsets != null; - - this.loc = loc; - this.reads = reads; - this.offsets = offsets; + this(loc, reads, offsets, 0); } + @Deprecated public AlignmentContext(GenomeLoc loc, List reads, List offsets, long skippedBases ) { + if ( loc == null ) throw new StingException("BUG: GenomeLoc in Alignment context is null"); + if ( skippedBases < 0 ) throw new StingException("BUG: skippedBases is -1 in Alignment context"); + this.loc = loc; - this.reads = reads; - this.offsets = offsets; + this.pileup = new ReadBackedPileup(loc, reads, offsets); this.skippedBases = skippedBases; } + public AlignmentContext(GenomeLoc loc, ReadBackedPileup pileup ) { + this(loc, pileup, 0); + } + + + public AlignmentContext(GenomeLoc loc, ReadBackedPileup pileup, long skippedBases ) { + if ( loc == null ) throw new StingException("BUG: GenomeLoc in Alignment context is null"); + if ( pileup == null ) throw new StingException("BUG: ReadBackedPileup in Alignment context is null"); + if ( skippedBases < 0 ) throw new StingException("BUG: skippedBases is -1 in Alignment context"); + + this.loc = loc; + this.pileup = pileup; + this.skippedBases = skippedBases; + } + + public ReadBackedPileup getPileup() { return pileup; } /** * get all of the reads within this context * * @return */ - public List getReads() { return reads; } + @Deprecated + public List getReads() { return pileup.getReads(); } /** * Are there any reads associated with this locus? @@ -97,16 +112,15 @@ public class AlignmentContext { * @return */ public boolean hasReads() { - return reads != null; + return pileup.size() > 0; } /** * How many reads cover this locus? * @return */ - public int numReads() { - assert( reads != null ); - return reads.size(); + public int size() { + return pileup.size(); } /** @@ -114,89 +128,17 @@ public class AlignmentContext { * * @return */ + @Deprecated public List getOffsets() { - return offsets; + return pileup.getOffsets(); } public String getContig() { return getLocation().getContig(); } public long getPosition() { return getLocation().getStart(); } public GenomeLoc getLocation() { return loc; } - //public void setLocation(GenomeLoc loc) { - // this.loc = loc.clone(); - //} - public void downsampleToCoverage(int coverage) { - if ( numReads() <= coverage ) - return; - - // randomly choose numbers corresponding to positions in the reads list - Random generator = new Random(); - TreeSet positions = new TreeSet(); - int i = 0; - while ( i < coverage ) { - if (positions.add(new Integer(generator.nextInt(reads.size())))) - i++; - } - - ArrayList downsampledReads = new ArrayList(); - ArrayList downsampledOffsets = new ArrayList(); - Iterator positionIter = positions.iterator(); - Iterator readsIter = reads.iterator(); - Iterator offsetsIter = offsets.iterator(); - int currentRead = 0; - while ( positionIter.hasNext() ) { - int nextReadToKeep = (Integer)positionIter.next(); - - // fast-forward to the right read - while ( currentRead < nextReadToKeep ) { - readsIter.next(); - offsetsIter.next(); - currentRead++; - } - - downsampledReads.add(readsIter.next()); - downsampledOffsets.add(offsetsIter.next()); - currentRead++; - } - - reads = downsampledReads; - offsets = downsampledOffsets; - } - - /** - * Returns only the reads in ac that do not contain spanning deletions of this locus - * - * @param ac - * @return - */ - public static AlignmentContext withoutSpanningDeletions( AlignmentContext ac ) { - return subsetDeletions( ac, true ); - } - - /** - * Returns only the reads in ac that do contain spanning deletions of this locus - * - * @param ac - * @return - */ - public static AlignmentContext withSpanningDeletions( AlignmentContext ac ) { - return subsetDeletions( ac, false ); - } - - private static AlignmentContext subsetDeletions( AlignmentContext ac, boolean readsWithoutDeletions ) { - ArrayList reads = new ArrayList(ac.getReads().size()); - ArrayList offsets = new ArrayList(ac.getReads().size()); - for ( int i = 0; i < ac.getReads().size(); i++ ) { - SAMRecord read = ac.getReads().get(i); - int offset = ac.getOffsets().get(i); - if ( (offset == -1 && ! readsWithoutDeletions) || (offset != -1 && readsWithoutDeletions) ) { - reads.add(read); - offsets.add(offset); - } - } - - return new AlignmentContext(ac.getLocation(), reads, offsets); + pileup = pileup.getDownsampledPileup(coverage); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index baf1f2e90..e1aff00f3 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -32,6 +32,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.util.*; @@ -217,8 +219,7 @@ public class LocusIteratorByState extends LocusIterator { // printState(); //} - ArrayList reads = new ArrayList(readStates.size()); - ArrayList offsets = new ArrayList(readStates.size()); + ArrayList pile = new ArrayList(readStates.size()); // keep iterating forward until we encounter a reference position that has something "real" hanging over it // (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true) @@ -229,11 +230,9 @@ public class LocusIteratorByState extends LocusIterator { for ( SAMRecordState state : readStates ) { if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) { // System.out.println("Location: "+getLocation()+"; Read "+state.getRead().getReadName()+"; offset="+state.getReadOffset()); - reads.add(state.getRead()); - offsets.add(state.getReadOffset()); + pile.add(new PileupElement(state.getRead(), state.getReadOffset())); } else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) { - reads.add(state.getRead()); - offsets.add(-1); + pile.add(new PileupElement(state.getRead(), -1)); } } GenomeLoc loc = getLocation(); @@ -245,10 +244,49 @@ public class LocusIteratorByState extends LocusIterator { // printState(); //} // if we got reads with non-D/N over the current position, we are done - if ( reads.size() != 0 ) return new AlignmentContext(loc, reads, offsets); + if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile)); } } + // old implementation -- uses lists of reads and offsets +// public AlignmentContext next() { +// //if (DEBUG) { +// // logger.debug("in Next:"); +// // printState(); +// //} +// +// ArrayList reads = new ArrayList(readStates.size()); +// ArrayList offsets = new ArrayList(readStates.size()); +// +// // keep iterating forward until we encounter a reference position that has something "real" hanging over it +// // (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true) +// while(true) { +// collectPendingReads(readInfo.getMaxReadsAtLocus()); +// +// // todo -- performance problem -- should be lazy, really +// for ( SAMRecordState state : readStates ) { +// if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) { +//// System.out.println("Location: "+getLocation()+"; Read "+state.getRead().getReadName()+"; offset="+state.getReadOffset()); +// reads.add(state.getRead()); +// offsets.add(state.getReadOffset()); +// } else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) { +// reads.add(state.getRead()); +// offsets.add(-1); +// } +// } +// GenomeLoc loc = getLocation(); +// +// updateReadStates(); // critical - must be called after we get the current state offsets and location +// +// //if (DEBUG) { +// // logger.debug("DONE WITH NEXT, updating read states, current state is:"); +// // printState(); +// //} +// // if we got reads with non-D/N over the current position, we are done +// if ( reads.size() != 0 ) return new AlignmentContext(loc, reads, offsets); +// } +// } + private void collectPendingReads(final int maximumPileupSize) { //if (DEBUG) { // logger.debug(String.format("entering collectPendingReads..., hasNext=%b", it.hasNext())); diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java index 9b345018d..5e924f93d 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java @@ -10,8 +10,10 @@ import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.Pair; import java.util.ArrayList; +import java.util.List; /** * Created by IntelliJ IDEA. @@ -40,32 +42,32 @@ public class TraverseLocusWindows extends TraversalEngine { LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider ); ReferenceOrderedView referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider ); - AlignmentContext locus = getLocusContext(readView.iterator(), interval); + Pair> locus = getLocusContext(readView.iterator(), interval); // The TraverseByLocusWindow expands intervals to cover all reads in a non-standard way. // TODO: Convert this approach to the standard. - GenomeLoc expandedInterval = locus.getLocation(); + GenomeLoc expandedInterval = locus.getFirst(); String referenceSubsequence = new String(referenceView.getReferenceBases(expandedInterval)); // Iterate forward to get all reference ordered data covering this interval - final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation()); + final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getFirst()); // // Execute our contract with the walker. Call filter, map, and reduce // - final boolean keepMeP = locusWindowWalker.filter(tracker, referenceSubsequence, locus); - if (keepMeP) { - M x = locusWindowWalker.map(tracker, referenceSubsequence, locus); - sum = locusWindowWalker.reduce(x, sum); - } + //final boolean keepMeP = locusWindowWalker.filter(tracker, referenceSubsequence, locus); + //if (keepMeP) { + M x = locusWindowWalker.map(tracker, referenceSubsequence, locus.getFirst(), locus.getSecond()); + sum = locusWindowWalker.reduce(x, sum); + //} - printProgress(LOCUS_WINDOW_STRING, locus.getLocation()); + printProgress(LOCUS_WINDOW_STRING, locus.getFirst()); return sum; } - private AlignmentContext getLocusContext(StingSAMIterator readIter, GenomeLoc interval) { + private Pair> getLocusContext(StingSAMIterator readIter, GenomeLoc interval) { ArrayList reads = new ArrayList(); boolean done = false; long leftmostIndex = interval.getStart(), @@ -86,11 +88,11 @@ public class TraverseLocusWindows extends TraversalEngine { } GenomeLoc window = GenomeLocParser.createGenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex); - AlignmentContext locus = new AlignmentContext(window, reads, null); - if ( readIter.getSourceInfo().getDownsampleToCoverage() != null ) - locus.downsampleToCoverage(readIter.getSourceInfo().getDownsampleToCoverage()); +// AlignmentContext locus = new AlignmentContext(window, reads, null); +// if ( readIter.getSourceInfo().getDownsampleToCoverage() != null ) +// locus.downsampleToCoverage(readIter.getSourceInfo().getDownsampleToCoverage()); - return locus; + return new Pair>(window, reads); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/LocusWindowWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/LocusWindowWalker.java index 80520dd76..92478e94e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/LocusWindowWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/LocusWindowWalker.java @@ -2,6 +2,10 @@ package org.broadinstitute.sting.gatk.walkers; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.utils.GenomeLoc; +import net.sf.samtools.SAMRecord; + +import java.util.List; /** * Created by IntelliJ IDEA. @@ -12,13 +16,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; */ @Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES}) public abstract class LocusWindowWalker extends Walker { - // Do we actually want to operate on the context? - public boolean filter(RefMetaDataTracker tracker, String ref, AlignmentContext context) { - return true; // We are keeping all the intervals - } - // Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext - public abstract MapType map(RefMetaDataTracker tracker, String ref, AlignmentContext context); + public abstract MapType map(RefMetaDataTracker tracker, String ref, GenomeLoc loc, List reads); // Given result of map function public abstract ReduceType reduceInit(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java index 76608cedd..0c851488b 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java @@ -66,14 +66,14 @@ public class PileupWalker extends LocusWalker implements TreeR } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context); + ReadBackedPileup pileup = context.getPileup(); String secondBasePileup = ""; if(shouldShowSecondaryBasePileup(pileup)) secondBasePileup = getSecondBasePileup(pileup); String rods = getReferenceOrderedData( tracker ); - out.printf("%s%s %s%n", pileup.getPileupString(qualsAsInts), secondBasePileup, rods); + out.printf("%s%s %s%n", pileup.getPileupString(ref.getBase(), qualsAsInts), secondBasePileup, rods); return 1; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/ValidatingPileupWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/ValidatingPileupWalker.java index bd0a69493..9cdeb6851 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/ValidatingPileupWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/ValidatingPileupWalker.java @@ -25,11 +25,11 @@ public class ValidatingPileupWalker extends LocusWalker annotate(ReferenceContext ref, ReadBackedPileup pileupWithDel, Variation variation, List genotypes) { ReadBackedPileup pileup = pileupWithDel; // .getPileupWithoutDeletions(); - Pair depthProp = getSecondaryPileupNonrefEstimator(pileup,genotypes); + Pair depthProp = getSecondaryPileupNonrefEstimator(ref.getBase(), pileup,genotypes); if ( depthProp == null ) { return null; } else { @@ -48,10 +48,10 @@ public class SecondBaseSkew implements VariantAnnotation { return proportion / ( Math.sqrt ( proportion*(1-proportion)/depth ) ); } - private Pair getSecondaryPileupNonrefEstimator(ReadBackedPileup p, List genotypes) { + private Pair getSecondaryPileupNonrefEstimator(char ref, ReadBackedPileup p, List genotypes) { char snp; try { - snp = getNonref(genotypes, p.getRef()); + snp = getNonref(genotypes, ref); } catch ( IllegalStateException e ) { // tri-allelic site // System.out.println("Illegal State Exception caught at "+p.getLocation().toString()+" 2bb skew annotation suppressed ("+e.getLocalizedMessage()+")"); @@ -67,7 +67,7 @@ public class SecondBaseSkew implements VariantAnnotation { if ( BaseUtils.isRegularBase((char)sbase) && BaseUtils.basesAreEqual(pbase, (byte) snp) ) { variantDepth++; - if ( BaseUtils.basesAreEqual(sbase, (byte)p.getRef()) ) { + if ( BaseUtils.basesAreEqual(sbase, (byte)ref) ) { variantsWithRefSecondBase++; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 010af5ce8..89905a9fc 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -187,8 +187,9 @@ public class VariantAnnotator extends RodWalker { public static Map getAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation, List genotypes, Collection annotations) { // set up the pileup for the full collection of reads at this position - ReadBackedPileup fullPileup = new ReadBackedPileup(ref.getBase(), context); + ReadBackedPileup fullPileup = context.getPileup(); + // todo -- reimplement directly using ReadBackedPileups, which is vastly more efficient // also, set up the pileup for the mapping-quality-zero-free context List reads = context.getReads(); List offsets = context.getOffsets(); @@ -204,7 +205,7 @@ public class VariantAnnotator extends RodWalker { MQ0freeOffsets.add(offset); } } - ReadBackedPileup MQ0freePileup = new ReadBackedPileup(context.getLocation(), ref.getBase(), MQ0freeReads, MQ0freeOffsets); + ReadBackedPileup MQ0freePileup = new ReadBackedPileup(context.getLocation(), MQ0freeReads, MQ0freeOffsets); HashMap results = new HashMap(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index c96f3dd90..d04334b0c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -33,7 +33,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul int index = 0; for ( String sample : contexts.keySet() ) { AlignmentContextBySample context = contexts.get(sample); - ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType)); + ReadBackedPileup pileup = context.getContext(contextType).getPileup(); // create the GenotypeLikelihoods object GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform); @@ -82,7 +82,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc); if ( call instanceof ReadBacked ) { - ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(sample).getContext(StratifiedContext.OVERALL)); + ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedContext.OVERALL).getPileup(); ((ReadBacked)call).setPileup(pileup); } if ( call instanceof SampleBacked ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java index 3452f33fe..4e3ea057c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java @@ -96,7 +96,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation()); if ( call instanceof ReadBacked ) { - ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(sample).getContext(StratifiedContext.OVERALL)); + ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedContext.OVERALL).getPileup(); ((ReadBacked)call).setPileup(pileup); } if ( call instanceof SampleBacked ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java index 1f740eef5..16b44008c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java @@ -108,7 +108,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation private Pair getSingleSampleLikelihoods(char ref, AlignmentContextBySample sampleContext, DiploidGenotypePriors priors, StratifiedContext contextType) { // create the pileup AlignmentContext myContext = sampleContext.getContext(contextType); - ReadBackedPileup pileup = new ReadBackedPileup(ref, myContext); + ReadBackedPileup pileup = myContext.getPileup(); // create the GenotypeLikelihoods object GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform); @@ -132,7 +132,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation for ( String sample : contexts.keySet() ) { AlignmentContextBySample context = contexts.get(sample); - ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType)); + ReadBackedPileup pileup = context.getContext(contextType).getPileup(); // create the GenotypeLikelihoods object GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, AFPriors, defaultPlatform); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java index 3cd452fe1..ba38be29c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java @@ -77,7 +77,7 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int nChromosomes, HashMap contexts, StratifiedContext contextType) { AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME); - ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType)); + ReadBackedPileup pileup = context.getContext(contextType).getPileup(); int refIndex = BaseUtils.simpleBaseToBaseIndex(ref); int altIndex = BaseUtils.simpleBaseToBaseIndex(alt); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java index 56be3e39e..99c29243b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java @@ -106,8 +106,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker } } - public Integer map(RefMetaDataTracker tracker, String ref, AlignmentContext context) { - List reads = context.getReads(); + public Integer map(RefMetaDataTracker tracker, String ref, GenomeLoc loc, List reads) { ArrayList goodReads = new ArrayList(); for ( SAMRecord read : reads ) { if ( !read.getReadUnmappedFlag() && @@ -121,7 +120,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker readsToWrite.add(new ComparableSAMRecord(read)); } - clean(goodReads, ref, context.getLocation()); + clean(goodReads, ref, loc); //bruteForceClean(goodReads, ref, context.getLocation().getStart()); //testCleanWithDeletion(); //testCleanWithInsertion(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java index 1d543f6c3..ab17f1fb2 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java @@ -76,7 +76,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { - ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(),context); + ReadBackedPileup pileup = context.getPileup(); Set newCounts = null; //System.out.println(pileup.getBases()); if ( baseIsUsable(tracker, ref, pileup, context) ) { @@ -279,7 +279,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker>{ GenomeLoc Gloc = context.getLocation(); //Create pileup of reads at this locus - ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context); + ReadBackedPileup pileup = context.getPileup(); long loc = context.getPosition(); if( context.getReads().size() > 0 ) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HapmapPoolAllelicInfoWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HapmapPoolAllelicInfoWalker.java index 22317216a..4b9610ac0 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HapmapPoolAllelicInfoWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HapmapPoolAllelicInfoWalker.java @@ -77,7 +77,7 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker> matchingQualityNQSPairs = new ArrayList>(); ArrayList> mismatchingQualityNQSPairs = new ArrayList>(); if ( (Variation) tracker.lookup("dbsnp",null) == null ) { - for ( int r = 0; r < context.numReads(); r ++ ) { + for ( int r = 0; r < context.size(); r ++ ) { SAMRecord read = context.getReads().get(r); int offset = context.getOffsets().get(r); int quality = read.getBaseQualities()[offset]; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/contamination/FindContaminatingReadGroupsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/contamination/FindContaminatingReadGroupsWalker.java index 3a1ef76ce..08f0b09a1 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/contamination/FindContaminatingReadGroupsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/contamination/FindContaminatingReadGroupsWalker.java @@ -69,7 +69,7 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker impleme public SimpleCall map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (ref.getBase() == 'N' || ref.getBase() == 'n') return null; // we don't deal with the N ref base case - ReadBackedPileup pileup = new ReadBackedPileup(context.getLocation(), ref.getBase(), context.getReads(), context.getOffsets()); + ReadBackedPileup pileup = context.getPileup(); double likelihoods[] = DiploidGenotypePriors.getReferencePolarizedPriors(ref.getBase(), DiploidGenotypePriors.HUMAN_HETEROZYGOSITY, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/PowerBelowFrequencyWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/PowerBelowFrequencyWalker.java index 14b7019bd..f2853a965 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/PowerBelowFrequencyWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/PowerBelowFrequencyWalker.java @@ -111,7 +111,7 @@ public class PowerBelowFrequencyWalker extends LocusWalker { } public double calculatePowerAtFrequency( AlignmentContext context, int alleles ) { - return theoreticalPower( context.numReads(), getMeanQ(context), alleles, lodThresh ); + return theoreticalPower( context.size(), getMeanQ(context), alleles, lodThresh ); } public byte getMeanQ( AlignmentContext context ) { @@ -126,7 +126,7 @@ public class PowerBelowFrequencyWalker extends LocusWalker { } public double expectedMatchRate(AlignmentContext context) { - int nReads = context.numReads(); + int nReads = context.size(); double matches = 0.0; for ( int r = 0; r < nReads; r ++ ) { matches += QualityUtils.qualToProb(context.getReads().get(r).getBaseQualities()[context.getOffsets().get(r)]); diff --git a/java/src/org/broadinstitute/sting/utils/BasicPileup.java b/java/src/org/broadinstitute/sting/utils/BasicPileup.java index 159082f43..c197b4d1b 100755 --- a/java/src/org/broadinstitute/sting/utils/BasicPileup.java +++ b/java/src/org/broadinstitute/sting/utils/BasicPileup.java @@ -18,8 +18,11 @@ import java.util.Random; abstract public class BasicPileup { public static final char DELETION_CHAR = 'D'; + @Deprecated abstract GenomeLoc getLocation(); + @Deprecated abstract char getRef(); + @Deprecated abstract int size(); @@ -28,6 +31,7 @@ abstract public class BasicPileup { * * @return */ + @Deprecated byte[] getBases() { return null; } /** @@ -35,6 +39,7 @@ abstract public class BasicPileup { * * @return */ + @Deprecated byte[] getQuals() { return null; } /** @@ -57,50 +62,6 @@ abstract public class BasicPileup { return String.format("%s: %s %s %s", getLocation(), getRef(), getBasesAsString(), getQualsAsString()); } - public static String basePileupAsString( List reads, List offsets ) { - StringBuilder bases = new StringBuilder(); - for ( byte base : getBasesAsArrayList(reads, offsets)) { - bases.append((char)base); - } - return bases.toString(); - } - - public static String baseWithStrandPileupAsString( List reads, List offsets ) { - StringBuilder bases = new StringBuilder(); - - for ( int i = 0; i < reads.size(); i++ ) { - SAMRecord read = reads.get(i); - int offset = offsets.get(i); - - char base; - if ( offset == -1 ) { - base = DELETION_CHAR; - } else { - base = (char) read.getReadBases()[offset]; - } - - base = Character.toUpperCase(base); - if (read.getReadNegativeStrandFlag()) { - base = Character.toLowerCase(base); - } - - bases.append(base); - } - - return bases.toString(); - } - - // - // byte[] methods - // - public static byte[] getBases( List reads, List offsets ) { - return getBasesAsArray(reads,offsets); - } - - public static byte[] getQuals( List reads, List offsets ) { - return getQualsAsArray( reads, offsets ); - } - // // ArrayList methods // @@ -119,7 +80,7 @@ abstract public class BasicPileup { return array; } - + @Deprecated public static ArrayList getBasesAsArrayList( List reads, List offsets ) { ArrayList bases = new ArrayList(reads.size()); for (byte value : getBasesAsArray(reads, offsets)) @@ -127,6 +88,7 @@ abstract public class BasicPileup { return bases; } + @Deprecated public static ArrayList getQualsAsArrayList( List reads, List offsets ) { ArrayList quals = new ArrayList(reads.size()); for (byte value : getQualsAsArray(reads, offsets)) @@ -134,6 +96,7 @@ abstract public class BasicPileup { return quals; } + @Deprecated public static byte[] getQualsAsArray( List reads, List offsets ) { byte array[] = new byte[reads.size()]; int index = 0; @@ -151,6 +114,7 @@ abstract public class BasicPileup { return array; } + @Deprecated public static ArrayList mappingQualPileup( List reads) { ArrayList quals = new ArrayList(reads.size()); for ( int i = 0; i < reads.size(); i++ ) { @@ -161,73 +125,6 @@ abstract public class BasicPileup { return quals; } - public static String mappingQualPileupAsString( List reads) { - return quals2String(mappingQualPileup(reads)); - } - - public static String quals2String( List quals ) { - StringBuilder qualStr = new StringBuilder(); - for ( int qual : quals ) { - qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea - char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63 - qualStr.append(qualChar); - } - - return qualStr.toString(); - } - - public static String qualPileupAsString( List reads, List offsets ) { - return quals2String(getQualsAsArrayList(reads, offsets)); - } - - - public static ArrayList getSecondaryBasesAsArrayList( List reads, List offsets ) { - ArrayList bases2 = new ArrayList(reads.size()); - boolean hasAtLeastOneSQorE2Field = false; - - for ( int i = 0; i < reads.size(); i++ ) { - SAMRecord read = reads.get(i); - int offset = offsets.get(i); - byte base2 = BaseUtils.getSecondBase(read, offset); - hasAtLeastOneSQorE2Field = hasAtLeastOneSQorE2Field || BaseUtils.simpleBaseToBaseIndex((char)base2) != -1; - bases2.add(base2); - } - - return (hasAtLeastOneSQorE2Field ? bases2 : null); - } - - public static String getSecondaryBasePileupAsString( List reads, List offsets ) { - StringBuilder bases2 = new StringBuilder(); - ArrayList sbases = getSecondaryBasesAsArrayList(reads, offsets); - - if (sbases == null) { return null; } - - ArrayList pbases = getBasesAsArrayList(reads, offsets); - - //Random generator = new Random(); - - if ( sbases.size() != pbases.size() ) { - throw new StingException("BUG in conversion of secondary bases: primary and secondary base vectors are different sizes!"); - } - - for (int pileupIndex = 0; pileupIndex < sbases.size(); pileupIndex++) { - byte pbase = pbases.get(pileupIndex); - byte sbase = sbases.get(pileupIndex); - - if ( sbase == pbase ) { - throw new StingException("BUG in conversion of secondary bases!"); - } - -// while (sbase == pbase) { // TODO why is here? -// sbase = (byte) BaseUtils.baseIndexToSimpleBase(generator.nextInt(4)); -// } - - bases2.append((char) sbase); - } - - return bases2.toString(); - } - @Deprecated // todo -- delete me public static String[] indelPileup( List reads, List offsets ) { diff --git a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java index 83a52611c..030612b7a 100755 --- a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java @@ -8,9 +8,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.BaseUtils; -import java.util.List; -import java.util.ArrayList; -import java.util.Iterator; +import java.util.*; import net.sf.samtools.SAMRecord; @@ -21,26 +19,74 @@ import net.sf.samtools.SAMRecord; */ public class ReadBackedPileup implements Iterable { private GenomeLoc loc = null; - private char ref = 0; private ArrayList pileup = null; + + private int size = 0; // cached value of the size of the pileup + private int nDeletions = 0; // cached value of the number of deletions + private int[] counts = new int[4]; // cached value of counts - public ReadBackedPileup(char ref, AlignmentContext context ) { - this(context.getLocation(), ref, context.getReads(), context.getOffsets()); + /** + * Create a new version of a read backed pileup at loc, using the reads and their corresponding + * offsets. This pileup will contain a list, in order of the reads, of the piled bases at + * reads[i] for all i in offsets. Does not make a copy of the data, so it's not safe to + * go changing the reads. + * + * @param loc + * @param reads + * @param offsets + */ + public ReadBackedPileup(GenomeLoc loc, List reads, List offsets ) { + this(loc, readsOffsets2Pileup(reads, offsets)); } - public ReadBackedPileup(GenomeLoc loc, char ref, List reads, List offsets ) { - this(loc, ref, readsOffsets2Pileup(reads, offsets)); - } - public ReadBackedPileup(GenomeLoc loc, char ref, ArrayList pileup ) { + /** + * Create a new version of a read backed pileup at loc, using the reads and their corresponding + * offsets. This lower level constructure assumes pileup is well-formed and merely keeps a + * pointer to pileup. Don't go changing the data in pileup. + * + */ + public ReadBackedPileup(GenomeLoc loc, ArrayList pileup ) { if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2"); if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2"); this.loc = loc; - this.ref = ref; this.pileup = pileup; + + calculatedCachedData(); } + /** + * Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront, + * so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling + * sizes, nDeletion, etc. over and over potentially. + */ + private void calculatedCachedData() { + size = 0; + nDeletions = 0; + counts[0] = 0; counts[1] = 0; counts[2] = 0; counts[3] = 0; + + for ( PileupElement p : this ) { + size++; + if ( p.isDeletion() ) { + nDeletions++; + } else { + int index = BaseUtils.simpleBaseToBaseIndex((char)p.getBase()); + if (index == -1) + continue; + counts[index]++; + } + } + } + + + /** + * Helper routine for converting reads and offset lists to a PileupElement list. + * + * @param reads + * @param offsets + * @return + */ private static ArrayList readsOffsets2Pileup(List reads, List offsets ) { if ( reads == null ) throw new StingException("Illegal null read list in ReadBackedPileup2"); if ( offsets == null ) throw new StingException("Illegal null offsets list in ReadBackedPileup2"); @@ -54,25 +100,19 @@ public class ReadBackedPileup implements Iterable { return pileup; } + // -------------------------------------------------------- // - // iterators + // Special 'constructors' // - public Iterator iterator() { - return pileup.iterator(); - } - - // todo -- reimplement efficiently - public IterableIterator extendedForeachIterator() { - ArrayList x = new ArrayList(size()); - int i = 0; - for ( PileupElement pile : this ) { - x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this)); - } - - return new IterableIterator(x.iterator()); - } - + // -------------------------------------------------------- + /** + * Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this + * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy + * of the pileup (just returns this) if there are no deletions in the pileup. + * + * @return + */ public ReadBackedPileup getPileupWithoutDeletions() { // todo -- fixme if ( getNumberOfDeletions() > 0 ) { // todo -- remember number of deletions @@ -85,80 +125,144 @@ public class ReadBackedPileup implements Iterable { newOffsets.add(getOffsets().get(i)); } } - return new ReadBackedPileup(loc, ref, newReads, newOffsets); + return new ReadBackedPileup(loc, newReads, newOffsets); } else { return this; } } - public int getNumberOfDeletions() { - int n = 0; + /** + * Returns a pileup randomly downsampled to the desiredCoverage. + * + * @param desiredCoverage + * @return + */ + public ReadBackedPileup getDownsampledPileup(int desiredCoverage) { + if ( size() <= desiredCoverage ) + return this; - for ( int i = 0; i < size(); i++ ) { - if ( getOffsets().get(i) != -1 ) { n++; } + // randomly choose numbers corresponding to positions in the reads list + Random generator = new Random(); + TreeSet positions = new TreeSet(); + for ( int i = 0; i < desiredCoverage; /* no update */ ) { + if ( positions.add(generator.nextInt(pileup.size())) ) + i++; } - return n; + Iterator positionIter = positions.iterator(); + ArrayList downsampledPileup = new ArrayList(); + + while ( positionIter.hasNext() ) { + int nextReadToKeep = (Integer)positionIter.next(); + downsampledPileup.add(pileup.get(nextReadToKeep)); + } + + return new ReadBackedPileup(getLocation(), downsampledPileup); } + // -------------------------------------------------------- + // + // iterators + // + // -------------------------------------------------------- + + /** + * The best way to access PileupElements where you only care about the bases and quals in the pileup. + * + * for (PileupElement p : this) { doSomething(p); } + * + * Provides efficient iteration of the data. + * + * @return + */ + public Iterator iterator() { + return pileup.iterator(); + } + + + /** + * The best way to access PileupElements where you only care not only about bases and quals in the pileup + * but also need access to the index of the pileup element in the pile. + * + * for (ExtendedPileupElement p : this) { doSomething(p); } + * + * Provides efficient iteration of the data. + * + * @return + */ + + // todo -- reimplement efficiently + public IterableIterator extendedForeachIterator() { + ArrayList x = new ArrayList(size()); + int i = 0; + for ( PileupElement pile : this ) { + x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this)); + } + + return new IterableIterator(x.iterator()); + } + + /** + * Simple useful routine to count the number of deletion bases in this pileup + * + * @return + */ + public int getNumberOfDeletions() { + return nDeletions; + } + +// public int getNumberOfDeletions() { +// int n = 0; +// +// for ( int i = 0; i < size(); i++ ) { +// if ( getOffsets().get(i) != -1 ) { n++; } +// } +// +// return n; +// } + // todo -- optimize me + /** + * @return the number of elements in this pileup + */ public int size() { - return pileup.size(); + return size; + //return pileup.size(); } + /** + * @return the location of this pileup + */ public GenomeLoc getLocation() { return loc; } - public char getRef() { - return ref; - } - - public List getReads() { - List reads = new ArrayList(size()); - for ( PileupElement pile : this ) { reads.add(pile.getRead()); } - return reads; - } - - public List getOffsets() { - List offsets = new ArrayList(size()); - for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); } - return offsets; - } - - public byte[] getBases() { - byte[] v = new byte[size()]; - for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); } - return v; - } - - public byte[] getSecondaryBases() { - byte[] v = new byte[size()]; - for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); } - return v; - } - - public byte[] getQuals() { - byte[] v = new byte[size()]; - for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); } - return v; - } - + /** + * Get counts of A, C, G, T in order, which returns a int[4] vector with counts according + * to BaseUtils.simpleBaseToBaseIndex for each base. + * + * @return + */ public int[] getBaseCounts() { - int[] counts = new int[4]; - for ( PileupElement pile : this ) { - // skip deletion sites - if ( ! pile.isDeletion() ) { - char base = Character.toUpperCase((char)(pile.getBase())); - if (BaseUtils.simpleBaseToBaseIndex(base) == -1) - continue; - counts[BaseUtils.simpleBaseToBaseIndex(base)]++; - } - } - return counts; +// int[] counts = new int[4]; +// for ( PileupElement pile : this ) { +// // skip deletion sites +// if ( ! pile.isDeletion() ) { +// int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase()); +// if (index == -1) +// continue; +// counts[index]++; +// } +// } +// +// return counts; } + /** + * Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated + * @return + */ public boolean hasSecondaryBases() { for ( PileupElement pile : this ) { // skip deletion sites @@ -169,14 +273,69 @@ public class ReadBackedPileup implements Iterable { return false; } - public String getPileupString(boolean qualsAsInts) { + public String getPileupString(char ref, boolean qualsAsInts) { // In the pileup format, each line represents a genomic position, consisting of chromosome name, // coordinate, reference base, read bases, read qualities and alignment mapping qualities. - - //return String.format("%s %s %s %s", getLocation(), getRef(), getBases(), getQuals()); return String.format("%s %s %s %s", getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate - getRef(), // reference base + ref, // reference base new String(getBases())); } + + + // -------------------------------------------------------- + // + // Convenience functions that may be slow + // + // -------------------------------------------------------- + + /** + * Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time + * @return + */ + public List getReads() { + List reads = new ArrayList(size()); + for ( PileupElement pile : this ) { reads.add(pile.getRead()); } + return reads; + } + + /** + * Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time + * @return + */ + public List getOffsets() { + List offsets = new ArrayList(size()); + for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); } + return offsets; + } + + /** + * Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time + * @return + */ + public byte[] getBases() { + byte[] v = new byte[size()]; + for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); } + return v; + } + + /** + * Returns an array of the secondary bases in this pileup. Note this call costs O(n) and allocates fresh array each time + * @return + */ + public byte[] getSecondaryBases() { + byte[] v = new byte[size()]; + for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); } + return v; + } + + /** + * Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time + * @return + */ + public byte[] getQuals() { + byte[] v = new byte[size()]; + for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); } + return v; + } }