diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java index 871d9d514..3651433ab 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java @@ -30,7 +30,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; -import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup; + import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker; import java.util.*; @@ -81,7 +81,7 @@ public class AlignmentContext { if ( skippedBases < 0 ) throw new StingException("BUG: skippedBases is -1 in Alignment context"); this.loc = loc; - this.basePileup = new UnifiedReadBackedPileup(loc, reads, offsets); + this.basePileup = new ReadBackedPileupImpl(loc, reads, offsets); this.skippedBases = skippedBases; } diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java index 9df20b3bf..e14fec436 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java @@ -38,7 +38,7 @@ import java.util.*; * User: ebanks * Modified: chartl (split by read group) */ -public class StratifiedAlignmentContext { +public class StratifiedAlignmentContext { // Definitions: // COMPLETE = full alignment context @@ -55,7 +55,7 @@ public class StratifiedAlignmentContext { // private ArrayList[] reads = new ArrayList[StratifiedContextType.values().length]; // private ArrayList[] offsets = new ArrayList[StratifiedContextType.values().length]; - private ArrayList[] pileupElems = new ArrayList[StratifiedContextType.values().length]; + private ArrayList[] pileupElems = new ArrayList[StratifiedContextType.values().length]; // // accessors // @@ -63,7 +63,7 @@ public class StratifiedAlignmentContext { // public ArrayList getReads(StratifiedContextType type) { return reads[type.ordinal()]; } // public ArrayList getOffsets(StratifiedContextType type) { return offsets[type.ordinal()]; } - public ArrayList getPileupElements(StratifiedContextType type) { + public ArrayList getPileupElements(StratifiedContextType type) { return pileupElems[type.ordinal()]; } @@ -81,8 +81,8 @@ public class StratifiedAlignmentContext { this.loc = loc; this.isExtended = isExtended; for ( int i = 0; i < StratifiedContextType.values().length; i++) { - if ( isExtended ) pileupElems[i] = new ArrayList(); - else pileupElems[i] = new ArrayList(); + if ( isExtended ) pileupElems[i] = new ArrayList(); + else pileupElems[i] = new ArrayList(); } } @@ -90,9 +90,9 @@ public class StratifiedAlignmentContext { int index = type.ordinal(); if ( contexts[index] == null ) { if ( isExtended ) { - contexts[index] = new AlignmentContext(loc , new UnifiedReadBackedExtendedEventPileup(loc, (ArrayList)((ArrayList)getPileupElements(type)))); + contexts[index] = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc,(List)getPileupElements(type))); } else { - contexts[index] = new AlignmentContext(loc, new UnifiedReadBackedPileup(loc, getPileupElements(type))); + contexts[index] = new AlignmentContext(loc, new ReadBackedPileupImpl(loc,(List)getPileupElements(type))); } } return contexts[index]; @@ -101,14 +101,14 @@ public class StratifiedAlignmentContext { public void add(SAMRecord read, int offset) { if ( isExtended ) throw new StingException("Can not add read/offset without event type specified to the context holding extended events"); if ( read.getReadNegativeStrandFlag() ) { - pileupElems[StratifiedContextType.REVERSE.ordinal()].add(new PileupElement(read,offset)); + pileupElems[StratifiedContextType.REVERSE.ordinal()].add((PE)new PileupElement(read,offset)); } else { - pileupElems[StratifiedContextType.FORWARD.ordinal()].add(new PileupElement(read,offset)); + pileupElems[StratifiedContextType.FORWARD.ordinal()].add((PE)new PileupElement(read,offset)); } - pileupElems[StratifiedContextType.COMPLETE.ordinal()].add(new PileupElement(read,offset)); + pileupElems[StratifiedContextType.COMPLETE.ordinal()].add((PE)new PileupElement(read,offset)); } - public void add(PileupElement p) { + public void add(PE p) { // if ( isExtended ) throw new StingException("Can not add simple pileup element to the context holding extended events"); SAMRecord read = p.getRead(); if ( read.getReadNegativeStrandFlag() ) { @@ -122,11 +122,11 @@ public class StratifiedAlignmentContext { public void add(SAMRecord read, int offset, int length, byte [] bases) { if ( ! isExtended ) throw new StingException("Can not add read/offset with event type specified to the context holding simple events"); if ( read.getReadNegativeStrandFlag() ) { - pileupElems[StratifiedContextType.REVERSE.ordinal()].add(new ExtendedEventPileupElement(read,offset,length,bases)); + pileupElems[StratifiedContextType.REVERSE.ordinal()].add((PE)new ExtendedEventPileupElement(read,offset,length,bases)); } else { - pileupElems[StratifiedContextType.FORWARD.ordinal()].add(new ExtendedEventPileupElement(read,offset,length,bases)); + pileupElems[StratifiedContextType.FORWARD.ordinal()].add((PE)new ExtendedEventPileupElement(read,offset,length,bases)); } - pileupElems[StratifiedContextType.COMPLETE.ordinal()].add(new ExtendedEventPileupElement(read,offset,length,bases)); + pileupElems[StratifiedContextType.COMPLETE.ordinal()].add((PE)new ExtendedEventPileupElement(read,offset,length,bases)); } // public void add(ExtendedEventPileupElement p) { @@ -276,8 +276,8 @@ public class StratifiedAlignmentContext { return contexts; } - public static AlignmentContext joinContexts(Collection contexts, StratifiedContextType type) { - ArrayList pe = new ArrayList(); + public static AlignmentContext joinContexts(Collection contexts, StratifiedContextType type) { + ArrayList pe = new ArrayList(); if ( contexts.size() == 0 ) throw new StingException("BUG: joinContexts requires at least one context to join"); @@ -300,7 +300,7 @@ public class StratifiedAlignmentContext { // dirty trick below. generics do not allow to cast pe (ArrayList) directly to ArrayList, // so we first cast to "? extends" wildcard, then to what we actually need. - if ( isExtended ) return new AlignmentContext(loc, new UnifiedReadBackedExtendedEventPileup(loc, (ArrayList< ExtendedEventPileupElement>)((ArrayList)pe)) ); - else return new AlignmentContext(loc, new UnifiedReadBackedPileup(loc,pe)); + if ( isExtended ) return new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc,(List)pe) ); + else return new AlignmentContext(loc, new ReadBackedPileupImpl(loc,(List)pe)); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index 072ad3ab0..75e293374 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -33,8 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.MergingIterator; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.sting.gatk.iterators.LocusOverflowTracker; import java.util.*; @@ -137,7 +136,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { // calculate the number of skipped bases, and update lastLoc so we can do that again in the next() long skippedBases = getSkippedBases( rodSite ); lastLoc = site; - return new AlignmentContext(site, new UnifiedReadBackedPileup(site), skippedBases); + return new AlignmentContext(site, new ReadBackedPileupImpl(site), skippedBases); } public LocusOverflowTracker getLocusOverflowTracker() { diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/DownsamplingLocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/DownsamplingLocusIteratorByState.java index 68e7af0d2..4e866a136 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/DownsamplingLocusIteratorByState.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/DownsamplingLocusIteratorByState.java @@ -359,7 +359,7 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { // In this case, the subsequent call to next() will emit the normal pileup at the current base // and shift the position. if (readInfo.generateExtendedEvents() && hasExtendedEvents) { - Map fullExtendedEventPileup = new HashMap(); + Map> fullExtendedEventPileup = new HashMap>(); SAMRecordState our1stState = readStates.getFirst(); // get current location on the reference and decrement it by 1: the indels we just stepped over @@ -412,16 +412,16 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { nMQ0Reads++; } // TODO: sample split! - if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sampleName,new UnifiedReadBackedExtendedEventPileup(loc,indelPile,size,maxDeletionLength,nDeletions,nInsertions,nMQ0Reads)); + if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sampleName,new ReadBackedExtendedEventPileupImpl(loc,indelPile,size,maxDeletionLength,nDeletions,nInsertions,nMQ0Reads)); } } hasExtendedEvents = false; // we are done with extended events prior to current ref base // System.out.println("Indel(s) at "+loc); // for ( ExtendedEventPileupElement pe : indelPile ) { if ( pe.isIndel() ) System.out.println(" "+pe.toString()); } - nextAlignmentContext = new AlignmentContext(loc, new SampleSplitReadBackedExtendedEventPileup(loc, fullExtendedEventPileup)); + nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup)); } else { GenomeLoc location = getLocation(); - Map fullPileup = new HashMap(); + Map> fullPileup = new HashMap>(); // todo -- performance problem -- should be lazy, really for(String sampleName: sampleNames) { @@ -456,12 +456,12 @@ public class DownsamplingLocusIteratorByState extends LocusIterator { nMQ0Reads++; } } - if( pile.size() != 0 ) fullPileup.put(sampleName,new UnifiedReadBackedPileup(location,pile,size,nDeletions,nMQ0Reads)); + if( pile.size() != 0 ) fullPileup.put(sampleName,new ReadBackedPileupImpl(location,pile,size,nDeletions,nMQ0Reads)); } updateReadStates(); // critical - must be called after we get the current state offsets and location // if we got reads with non-D/N over the current position, we are done - if ( !fullPileup.isEmpty() ) nextAlignmentContext = new AlignmentContext(location, new SampleSplitReadBackedPileup(location, fullPileup)); + if ( !fullPileup.isEmpty() ) nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location,fullPileup)); } } } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index e0a4e59d5..d608efe10 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -396,7 +396,7 @@ public class LocusIteratorByState extends LocusIterator { GenomeLoc loc = GenomeLocParser.incPos(our1stState.getLocation(),-1); // System.out.println("Indel(s) at "+loc); // for ( ExtendedEventPileupElement pe : indelPile ) { if ( pe.isIndel() ) System.out.println(" "+pe.toString()); } - nextAlignmentContext = new AlignmentContext(loc, new UnifiedReadBackedExtendedEventPileup(loc, indelPile, size, maxDeletionLength, nInsertions, nDeletions, nMQ0Reads)); + nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, indelPile, size, maxDeletionLength, nInsertions, nDeletions, nMQ0Reads)); } else { ArrayList pile = new ArrayList(readStates.size()); @@ -427,7 +427,7 @@ public class LocusIteratorByState extends LocusIterator { GenomeLoc loc = getLocation(); updateReadStates(); // critical - must be called after we get the current state offsets and location // if we got reads with non-D/N over the current position, we are done - if ( pile.size() != 0 ) nextAlignmentContext = new AlignmentContext(loc, new UnifiedReadBackedPileup(loc, pile, size, nDeletions, nMQ0Reads)); + if ( pile.size() != 0 ) nextAlignmentContext = new AlignmentContext(loc, new ReadBackedPileupImpl(loc, pile, size, nDeletions, nMQ0Reads)); } } } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java index b2cc34b0b..199daf911 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java @@ -36,8 +36,7 @@ import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.gatk.walkers.DuplicateWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import java.util.*; @@ -184,7 +183,7 @@ public class TraverseDuplicates extends TraversalEngine extends TraversalEngine,Locu long nSkipped = rodLocusView.getLastSkippedBases(); if ( nSkipped > 0 ) { GenomeLoc site = rodLocusView.getLocOneBeyondShard(); - AlignmentContext ac = new AlignmentContext(site, new UnifiedReadBackedPileup(site), nSkipped); + AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileupImpl(site), nSkipped); M x = walker.map(null, null, ac); sum = walker.reduce(x, sum); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index fd92fb108..d660e901f 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -68,7 +68,7 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, ExperimentalA countsBySize.put(al.length(),0); } - for ( ExtendedEventPileupElement e : pileup ) { + for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) { if ( countsBySize.keySet().contains(e.getEventLength()) ) { // if proper length if ( e.isDeletion() && vc.isDeletion() || e.isInsertion() && vc.isInsertion() ) { countsBySize.put(e.getEventLength(),countsBySize.get(e.getEventLength())+1); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index 41641e4ef..159da7025 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -34,10 +34,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.ExtendedPileupElement; -import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.*; import java.util.*; import net.sf.samtools.SAMRecord; @@ -266,7 +263,7 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation { } } - return new UnifiedReadBackedPileup(pileup.getLocation(), reads, offsets); + return new ReadBackedPileupImpl(pileup.getLocation(), reads, offsets); } private static ReadBackedPileup getPileupOfAllele( Allele allele, ReadBackedPileup pileup ) { @@ -279,7 +276,7 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation { } } - return new UnifiedReadBackedPileup(pileup.getLocation(), filteredPileup); + return new ReadBackedPileupImpl(pileup.getLocation(), filteredPileup); } public List getKeyNames() { return Arrays.asList("HaplotypeScore"); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java index bc27aeec9..6aed600b5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java @@ -38,7 +38,7 @@ import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.vcf.*; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broad.tribble.vcf.VCFHeaderLine; import java.util.*; @@ -159,7 +159,7 @@ public class BatchedCallsMerger extends LocusWalker imp if ( readGroup != null && samples.contains(readGroup.getSample()) ) newPileup.add(p); } - return new AlignmentContext(pileup.getLocation(), new UnifiedReadBackedPileup(pileup.getLocation(), newPileup)); + return new AlignmentContext(pileup.getLocation(), new ReadBackedPileupImpl(pileup.getLocation(), newPileup)); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 2ffb2b36e..9f81c516d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -238,19 +238,19 @@ public class UnifiedGenotyperEngine { AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES ) filteredPileup.add(p); } - return new UnifiedReadBackedPileup(pileup.getLocation(), filteredPileup); + return new ReadBackedPileupImpl(pileup.getLocation(), filteredPileup); } // filter based on maximum mismatches and bad mates private ReadBackedExtendedEventPileup filterPileup(ReadBackedExtendedEventPileup pileup, ReferenceContext refContext) { ArrayList filteredPileup = new ArrayList(); - for ( ExtendedEventPileupElement p : pileup ) { + for ( ExtendedEventPileupElement p : pileup.toExtendedIterable() ) { if ( (UAC.USE_BADLY_MATED_READS || !p.getRead().getReadPairedFlag() || p.getRead().getMateUnmappedFlag() || p.getRead().getMateReferenceIndex() == p.getRead().getReferenceIndex()) && AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES ) filteredPileup.add(p); } - return new UnifiedReadBackedExtendedEventPileup(pileup.getLocation(), filteredPileup); + return new ReadBackedExtendedEventPileupImpl(pileup.getLocation(), filteredPileup); } private static boolean isValidDeletionFraction(double d) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index fedc39efc..24cc8790b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -91,7 +91,7 @@ public class RealignerTargetCreator extends LocusWalker 0 ) { hasIndel = hasInsertion = true; // check the ends of the reads to see how far they extend - for (ExtendedEventPileupElement p : pileup ) + for (ExtendedEventPileupElement p : pileup.toExtendedIterable() ) furthestStopPos = Math.max(furthestStopPos, p.getRead().getAlignmentEnd()); } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelErrorRateWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelErrorRateWalker.java index e413b66d6..ffe6b983a 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelErrorRateWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelErrorRateWalker.java @@ -86,7 +86,7 @@ public class IndelErrorRateWalker extends LocusWalker { } private void countIndels(ReadBackedExtendedEventPileup p) { - for ( ExtendedEventPileupElement pe : p ) { + for ( ExtendedEventPileupElement pe : p.toExtendedIterable() ) { if ( ! pe.isIndel() ) continue; if ( pe.getEventLength() > MAX_LENGTH ) continue; if ( pe.isInsertion() ) insCounts[pe.getEventLength()-1]++; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java index c4a102817..038097d93 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java @@ -529,7 +529,7 @@ public class MendelianViolationClassifier extends LocusWalker context) { int numParental = 0; int total = 0; if ( context != null ) { diff --git a/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java b/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java index f40b1051b..85e131c1c 100644 --- a/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java +++ b/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java @@ -30,7 +30,7 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.UnifiedReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import java.util.List; import java.util.Arrays; @@ -115,7 +115,7 @@ public class DupUtils { private static Pair combineBaseProbs(List duplicates, int readOffset, int maxQScore) { GenomeLoc loc = GenomeLocParser.createGenomeLoc(duplicates.get(0)); - ReadBackedPileup pileup = new UnifiedReadBackedPileup(loc, duplicates, readOffset); + ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, duplicates, readOffset); final boolean debug = false; diff --git a/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java new file mode 100644 index 000000000..b8dd2ed36 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java @@ -0,0 +1,709 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.pileup; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.gatk.iterators.IterableIterator; + +import java.util.*; + +import net.sf.samtools.SAMRecord; + +/** + * A generic implementation of read-backed pileups. + * + * @author mhanna + * @version 0.1 + */ +public abstract class AbstractReadBackedPileup implements ReadBackedPileup { + protected final GenomeLoc loc; + protected final PileupElementTracker pileupElementTracker; + + protected int size = 0; // cached value of the size of the pileup + protected int nDeletions = 0; // cached value of the number of deletions + protected int nMQ0Reads = 0; // cached value of the number of MQ0 reads + + /** + * Create a new version of a read backed pileup at loc, using the reads and their corresponding + * offsets. This pileup will contain a list, in order of the reads, of the piled bases at + * reads[i] for all i in offsets. Does not make a copy of the data, so it's not safe to + * go changing the reads. + * + * @param loc + * @param reads + * @param offsets + */ + public AbstractReadBackedPileup(GenomeLoc loc, List reads, List offsets ) { + this.loc = loc; + this.pileupElementTracker = readsOffsets2Pileup(reads,offsets); + } + + public AbstractReadBackedPileup(GenomeLoc loc, List reads, int offset ) { + this.loc = loc; + this.pileupElementTracker = readsOffsets2Pileup(reads,offset); + } + + /** + * Create a new version of a read backed pileup at loc without any aligned reads + * + */ + public AbstractReadBackedPileup(GenomeLoc loc) { + this(loc, new UnifiedPileupElementTracker()); + } + + /** + * Create a new version of a read backed pileup at loc, using the reads and their corresponding + * offsets. This lower level constructure assumes pileup is well-formed and merely keeps a + * pointer to pileup. Don't go changing the data in pileup. + * + */ + public AbstractReadBackedPileup(GenomeLoc loc, List pileup ) { + if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup"); + if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup"); + + this.loc = loc; + this.pileupElementTracker = new UnifiedPileupElementTracker(pileup); + calculateCachedData(); + } + + /** + * Optimization of above constructor where all of the cached data is provided + * @param loc + * @param pileup + */ + public AbstractReadBackedPileup(GenomeLoc loc, List pileup, int size, int nDeletions, int nMQ0Reads ) { + if ( loc == null ) throw new StingException("Illegal null genomeloc in UnifiedReadBackedPileup"); + if ( pileup == null ) throw new StingException("Illegal null pileup in UnifiedReadBackedPileup"); + + this.loc = loc; + this.pileupElementTracker = new UnifiedPileupElementTracker(pileup); + this.size = size; + this.nDeletions = nDeletions; + this.nMQ0Reads = nMQ0Reads; + } + + + protected AbstractReadBackedPileup(GenomeLoc loc, PileupElementTracker tracker) { + this.loc = loc; + this.pileupElementTracker = tracker; + calculateCachedData(); + } + + /** + * Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront, + * so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling + * sizes, nDeletion, etc. over and over potentially. + */ + protected void calculateCachedData() { + size = 0; + nDeletions = 0; + nMQ0Reads = 0; + + for ( PileupElement p : pileupElementTracker ) { + size++; + if ( p.isDeletion() ) { + nDeletions++; + } + if ( p.getRead().getMappingQuality() == 0 ) { + nMQ0Reads++; + } + } + } + + /** + * Helper routine for converting reads and offset lists to a PileupElement list. + * + * @param reads + * @param offsets + * @return + */ + private PileupElementTracker readsOffsets2Pileup(List reads, List offsets ) { + if ( reads == null ) throw new StingException("Illegal null read list in UnifiedReadBackedPileup"); + if ( offsets == null ) throw new StingException("Illegal null offsets list in UnifiedReadBackedPileup"); + if ( reads.size() != offsets.size() ) throw new StingException("Reads and offset lists have different sizes!"); + + UnifiedPileupElementTracker pileup = new UnifiedPileupElementTracker(); + for ( int i = 0; i < reads.size(); i++ ) { + pileup.add(createNewPileupElement(reads.get(i),offsets.get(i))); + } + + return pileup; + } + + /** + * Helper routine for converting reads and a single offset to a PileupElement list. + * + * @param reads + * @param offset + * @return + */ + private PileupElementTracker readsOffsets2Pileup(List reads, int offset ) { + if ( reads == null ) throw new StingException("Illegal null read list in UnifiedReadBackedPileup"); + if ( offset < 0 ) throw new StingException("Illegal offset < 0 UnifiedReadBackedPileup"); + + UnifiedPileupElementTracker pileup = new UnifiedPileupElementTracker(); + for ( int i = 0; i < reads.size(); i++ ) { + pileup.add(createNewPileupElement( reads.get(i), offset )); + } + + return pileup; + } + + protected abstract RBP createNewPileup(GenomeLoc loc, PileupElementTracker pileupElementTracker); + protected abstract PE createNewPileupElement(SAMRecord read, int offset); + + // -------------------------------------------------------- + // + // Special 'constructors' + // + // -------------------------------------------------------- + + /** + * Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this + * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy + * of the pileup (just returns this) if there are no deletions in the pileup. + * + * @return + */ + @Override + public RBP getPileupWithoutDeletions() { + if ( getNumberOfDeletions() > 0 ) { + if(pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); + + for(String sampleName: tracker.getSamples()) { + PileupElementTracker perSampleElements = tracker.getElements(sampleName); + AbstractReadBackedPileup pileup = (AbstractReadBackedPileup)createNewPileup(loc,perSampleElements).getPileupWithoutDeletions(); + filteredTracker.addElements(sampleName,pileup.pileupElementTracker); + } + return createNewPileup(loc,filteredTracker); + + } + else { + UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker)pileupElementTracker; + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + + for ( PE p : tracker ) { + if ( !p.isDeletion() ) { + filteredTracker.add(p); + } + } + return createNewPileup(loc, filteredTracker); + } + } else { + return (RBP)this; + } + } + + /** + * Returns a new ReadBackedPileup where only one read from an overlapping read + * pair is retained. If the two reads in question disagree to their basecall, + * neither read is retained. If they agree on the base, the read with the higher + * quality observation is retained + * + * @return the newly filtered pileup + */ + @Override + public RBP getOverlappingFragmentFilteredPileup() { + if(pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); + + for(String sampleName: tracker.getSamples()) { + PileupElementTracker perSampleElements = tracker.getElements(sampleName); + AbstractReadBackedPileup pileup = (AbstractReadBackedPileup)createNewPileup(loc,perSampleElements).getOverlappingFragmentFilteredPileup(); + filteredTracker.addElements(sampleName,pileup.pileupElementTracker); + } + return createNewPileup(loc,filteredTracker); + } + else { + Map filteredPileup = new HashMap(); + + for ( PE p : pileupElementTracker ) { + String readName = p.getRead().getReadName(); + + // if we've never seen this read before, life is good + if (!filteredPileup.containsKey(readName)) { + filteredPileup.put(readName, p); + } else { + PileupElement existing = filteredPileup.get(readName); + + // if the reads disagree at this position, throw them both out. Otherwise + // keep the element with the higher quality score + if (existing.getBase() != p.getBase()) { + filteredPileup.remove(readName); + } else { + if (existing.getQual() < p.getQual()) { + filteredPileup.put(readName, p); + } + } + } + } + + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + for(PE filteredElement: filteredPileup.values()) + filteredTracker.add(filteredElement); + + return createNewPileup(loc,filteredTracker); + } + } + + + /** + * Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this + * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy + * of the pileup (just returns this) if there are no MQ0 reads in the pileup. + * + * @return + */ + @Override + public RBP getPileupWithoutMappingQualityZeroReads() { + if ( getNumberOfMappingQualityZeroReads() > 0 ) { + if(pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); + + for(String sampleName: tracker.getSamples()) { + PileupElementTracker perSampleElements = tracker.getElements(sampleName); + AbstractReadBackedPileup pileup = (AbstractReadBackedPileup)createNewPileup(loc,perSampleElements).getPileupWithoutMappingQualityZeroReads(); + filteredTracker.addElements(sampleName,pileup.pileupElementTracker); + } + return createNewPileup(loc,filteredTracker); + + } + else { + UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker)pileupElementTracker; + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + + for ( PE p : tracker ) { + if ( p.getRead().getMappingQuality() > 0 ) { + filteredTracker.add(p); + } + } + return createNewPileup(loc, filteredTracker); + } + } else { + return (RBP)this; + } + } + + /** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from + * reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup. + * @param minBaseQ + * @param minMapQ + * @return + */ + @Override + public RBP getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ ) { + if(pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); + + for(String sampleName: tracker.getSamples()) { + PileupElementTracker perSampleElements = tracker.getElements(sampleName); + AbstractReadBackedPileup pileup = (AbstractReadBackedPileup)createNewPileup(loc,perSampleElements).getBaseAndMappingFilteredPileup(minBaseQ,minMapQ); + filteredTracker.addElements(sampleName,pileup.pileupElementTracker); + } + + return createNewPileup(loc,filteredTracker); + } + else { + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + + for ( PE p : pileupElementTracker ) { + if ( p.getRead().getMappingQuality() >= minMapQ && (p.isDeletion() || p.getQual() >= minBaseQ) ) { + filteredTracker.add(p); + } + } + + return createNewPileup(loc, filteredTracker); + } + } + + /** Returns subset of this pileup that contains only bases with quality >= minBaseQ. + * This method allocates and returns a new instance of ReadBackedPileup. + * @param minBaseQ + * @return + */ + @Override + public RBP getBaseFilteredPileup( int minBaseQ ) { + return getBaseAndMappingFilteredPileup(minBaseQ, -1); + } + + /** Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ. + * This method allocates and returns a new instance of ReadBackedPileup. + * @param minMapQ + * @return + */ + @Override + public RBP getMappingFilteredPileup( int minMapQ ) { + return getBaseAndMappingFilteredPileup(-1, minMapQ); + } + + public Collection getSamples() { + if(pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + return tracker.getSamples(); + } + else { + Collection sampleNames = new HashSet(); + for(PileupElement p: this) { + SAMRecord read = p.getRead(); + String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null; + sampleNames.add(sampleName); + } + return sampleNames; + } + } + + /** + * Returns a pileup randomly downsampled to the desiredCoverage. + * + * @param desiredCoverage + * @return + */ + @Override + public RBP getDownsampledPileup(int desiredCoverage) { + if ( size() <= desiredCoverage ) + return (RBP)this; + + // randomly choose numbers corresponding to positions in the reads list + Random generator = new Random(); + TreeSet positions = new TreeSet(); + for ( int i = 0; i < desiredCoverage; /* no update */ ) { + if ( positions.add(generator.nextInt(size)) ) + i++; + } + + if(pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); + + int current = 0; + + for(String sampleName: tracker.getSamples()) { + PileupElementTracker perSampleElements = tracker.getElements(sampleName); + + List filteredPileup = new ArrayList(); + for(PileupElement p: perSampleElements) { + if(positions.contains(current)) + filteredPileup.add(p); + } + + if(!filteredPileup.isEmpty()) { + AbstractReadBackedPileup pileup = (AbstractReadBackedPileup)createNewPileup(loc,perSampleElements); + filteredTracker.addElements(sampleName,pileup.pileupElementTracker); + } + + current++; + } + + return createNewPileup(loc,filteredTracker); + } + else { + UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker)pileupElementTracker; + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + + Iterator positionIter = positions.iterator(); + + while ( positionIter.hasNext() ) { + int nextReadToKeep = (Integer)positionIter.next(); + filteredTracker.add(tracker.get(nextReadToKeep)); + } + + return createNewPileup(getLocation(), filteredTracker); + } + } + + @Override + public RBP getPileupForSample(String sampleName) { + if(pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + return createNewPileup(loc,tracker.getElements(sampleName)); + } + else { + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + for(PE p: pileupElementTracker) { + SAMRecord read = p.getRead(); + if(sampleName != null) { + if(read.getReadGroup() != null && sampleName.equals(read.getReadGroup().getSample())) + filteredTracker.add(p); + } + else { + if(read.getReadGroup() == null || read.getReadGroup().getSample() == null) + filteredTracker.add(p); + } + } + return filteredTracker.size()>0 ? createNewPileup(loc,filteredTracker) : null; + } + } + + // -------------------------------------------------------- + // + // iterators + // + // -------------------------------------------------------- + + /** + * The best way to access PileupElements where you only care about the bases and quals in the pileup. + * + * for (PileupElement p : this) { doSomething(p); } + * + * Provides efficient iteration of the data. + * + * @return + */ + @Override + public Iterator iterator() { + return new Iterator() { + private final Iterator wrappedIterator = pileupElementTracker.iterator(); + + public boolean hasNext() { return wrappedIterator.hasNext(); } + public PileupElement next() { return wrappedIterator.next(); } + public void remove() { throw new UnsupportedOperationException("Cannot remove from a pileup element iterator"); } + }; + } + + /** + * The best way to access PileupElements where you only care not only about bases and quals in the pileup + * but also need access to the index of the pileup element in the pile. + * + * for (ExtendedPileupElement p : this) { doSomething(p); } + * + * Provides efficient iteration of the data. + * + * @return + */ + + // todo -- reimplement efficiently + // todo -- why is this public? + @Override + public IterableIterator extendedForeachIterator() { + ArrayList x = new ArrayList(size()); + int i = 0; + for ( PileupElement pile : this ) { + x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this)); + } + + return new IterableIterator(x.iterator()); + } + + /** + * Simple useful routine to count the number of deletion bases in this pileup + * + * @return + */ + @Override + public int getNumberOfDeletions() { + return nDeletions; + } + + @Override + public int getNumberOfMappingQualityZeroReads() { + return nMQ0Reads; + } + + /** + * @return the number of elements in this pileup + */ + @Override + public int size() { + return size; + } + + /** + * @return the location of this pileup + */ + @Override + public GenomeLoc getLocation() { + return loc; + } + + /** + * Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated + * @return + */ + @Override + public boolean hasSecondaryBases() { + if(pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + boolean hasSecondaryBases = false; + + for(String sampleName: tracker.getSamples()) { + hasSecondaryBases |= createNewPileup(loc,tracker.getElements(sampleName)).hasSecondaryBases(); + } + + return hasSecondaryBases; + } + else { + for ( PileupElement pile : this ) { + // skip deletion sites + if ( ! pile.isDeletion() && BaseUtils.isRegularBase((char)pile.getSecondBase()) ) + return true; + } + } + + return false; + } + + /** + * Get counts of A, C, G, T in order, which returns a int[4] vector with counts according + * to BaseUtils.simpleBaseToBaseIndex for each base. + * + * @return + */ + @Override + public int[] getBaseCounts() { + int[] counts = new int[4]; + + if(pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker)pileupElementTracker; + for(String sampleName: tracker.getSamples()) { + int[] countsBySample = createNewPileup(loc,tracker.getElements(sampleName)).getBaseCounts(); + for(int i = 0; i < counts.length; i++) + counts[i] += countsBySample[i]; + } + } + else { + for ( PileupElement pile : this ) { + // skip deletion sites + if ( ! pile.isDeletion() ) { + int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase()); + if (index != -1) + counts[index]++; + } + } + } + + return counts; + } + + @Override + public String getPileupString(Character ref) { + // In the pileup format, each line represents a genomic position, consisting of chromosome name, + // coordinate, reference base, read bases, read qualities and alignment mapping qualities. + return String.format("%s %s %c %s %s", + getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate + ref, // reference base + new String(getBases()), + getQualsString()); + } + + // -------------------------------------------------------- + // + // Convenience functions that may be slow + // + // -------------------------------------------------------- + + /** + * Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time + * @return + */ + @Override + public List getReads() { + List reads = new ArrayList(size()); + for ( PileupElement pile : this ) { reads.add(pile.getRead()); } + return reads; + } + + /** + * Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time + * @return + */ + @Override + public List getOffsets() { + List offsets = new ArrayList(size()); + for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); } + return offsets; + } + + /** + * Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time + * @return + */ + @Override + public byte[] getBases() { + byte[] v = new byte[size()]; + int pos = 0; + for ( PileupElement pile : pileupElementTracker ) { v[pos++] = pile.getBase(); } + return v; + } + + /** + * Returns an array of the secondary bases in this pileup. Note this call costs O(n) and allocates fresh array each time + * @return + */ + @Override + public byte[] getSecondaryBases() { + byte[] v = new byte[size()]; + int pos = 0; + for ( PileupElement pile : pileupElementTracker ) { v[pos++] = pile.getSecondBase(); } + return v; + } + + /** + * Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time + * @return + */ + @Override + public byte[] getQuals() { + byte[] v = new byte[size()]; + int pos = 0; + for ( PileupElement pile : pileupElementTracker ) { v[pos++] = pile.getQual(); } + return v; + } + + /** + * Get an array of the mapping qualities + * @return + */ + @Override + public byte[] getMappingQuals() { + byte[] v = new byte[size()]; + int pos = 0; + for ( PileupElement pile : pileupElementTracker ) { v[pos++] = (byte)pile.getRead().getMappingQuality(); } + return v; + } + + static String quals2String( byte[] quals ) { + StringBuilder qualStr = new StringBuilder(); + for ( int qual : quals ) { + qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea + char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63 + qualStr.append(qualChar); + } + + return qualStr.toString(); + } + + private String getQualsString() { + return quals2String(getQuals()); + } + +} + diff --git a/java/src/org/broadinstitute/sting/utils/pileup/MergingPileupElementIterator.java b/java/src/org/broadinstitute/sting/utils/pileup/MergingPileupElementIterator.java index a23a2e3be..d41cecdb9 100644 --- a/java/src/org/broadinstitute/sting/utils/pileup/MergingPileupElementIterator.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/MergingPileupElementIterator.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.utils.pileup; import net.sf.picard.util.PeekableIterator; import java.util.PriorityQueue; -import java.util.Map; import java.util.Comparator; import java.util.Iterator; @@ -37,22 +36,15 @@ import java.util.Iterator; * @author mhanna * @version 0.1 */ -class MergingPileupElementIterator implements Iterator { - private final PriorityQueue> perSampleIterators; +class MergingPileupElementIterator implements Iterator { + private final PriorityQueue> perSampleIterators; - public MergingPileupElementIterator(Map pileupsPerSample) { - perSampleIterators = new PriorityQueue>(pileupsPerSample.size(),new PileupElementIteratorComparator()); - for(Object pileupForSample: pileupsPerSample.values()) { - if(pileupForSample instanceof ReadBackedPileup) { - ReadBackedPileup pileup = (ReadBackedPileup)pileupForSample; - if(pileup.size() != 0) - perSampleIterators.add(new PeekableIterator(pileup.iterator())); - } - else if(pileupForSample instanceof ReadBackedExtendedEventPileup) { - ReadBackedExtendedEventPileup pileup = (ReadBackedExtendedEventPileup)pileupForSample; - if(pileup.size() != 0) - perSampleIterators.add(new PeekableIterator(pileup.iterator())); - } + public MergingPileupElementIterator(PerSamplePileupElementTracker tracker) { + perSampleIterators = new PriorityQueue>(tracker.getSamples().size(),new PileupElementIteratorComparator()); + for(String sampleName: tracker.getSamples()) { + PileupElementTracker trackerPerSample = tracker.getElements(sampleName); + if(trackerPerSample.size() != 0) + perSampleIterators.add(new PeekableIterator(tracker.iterator())); } } @@ -60,9 +52,9 @@ class MergingPileupElementIterator implements Iterator { return !perSampleIterators.isEmpty(); } - public PileupElement next() { - PeekableIterator currentIterator = perSampleIterators.remove(); - PileupElement current = currentIterator.next(); + public PE next() { + PeekableIterator currentIterator = perSampleIterators.remove(); + PE current = currentIterator.next(); if(currentIterator.hasNext()) perSampleIterators.add(currentIterator); return current; @@ -75,8 +67,8 @@ class MergingPileupElementIterator implements Iterator { /** * Compares two peekable iterators consisting of pileup elements. */ - private class PileupElementIteratorComparator implements Comparator> { - public int compare(PeekableIterator lhs, PeekableIterator rhs) { + private class PileupElementIteratorComparator implements Comparator> { + public int compare(PeekableIterator lhs, PeekableIterator rhs) { return rhs.peek().getOffset() - lhs.peek().getOffset(); } } diff --git a/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java b/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java new file mode 100644 index 000000000..047f5b0ee --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.pileup; + +import java.util.*; + +/** + * Javadoc goes here. + * + * @author mhanna + * @version 0.1 + */ +abstract class PileupElementTracker implements Iterable { + public abstract int size(); +} + +class UnifiedPileupElementTracker extends PileupElementTracker { + private final List pileup; + + public UnifiedPileupElementTracker() { pileup = new LinkedList(); } + public UnifiedPileupElementTracker(List pileup) { this.pileup = pileup; } + + public void add(PE element) { + pileup.add(element); + } + + public PE get(int index) { + return pileup.get(index); + } + + public int size() { + return pileup.size(); + } + + public Iterator iterator() { return pileup.iterator(); } +} + +class PerSamplePileupElementTracker extends PileupElementTracker { + private final Map> pileup; + private int size = 0; + + public PerSamplePileupElementTracker() { + pileup = new HashMap>(); + } + + public PerSamplePileupElementTracker(Map> pileupsBySample) { + pileup = new HashMap>(); + for(Map.Entry> entry: pileupsBySample.entrySet()) { + String sampleName = entry.getKey(); + AbstractReadBackedPileup pileupBySample = entry.getValue(); + pileup.put(sampleName,pileupBySample.pileupElementTracker); + } + } + + /** + * Gets a list of all the samples stored in this pileup. + * @return List of samples in this pileup. + */ + public Collection getSamples() { + return pileup.keySet(); + } + + public PileupElementTracker getElements(final String sample) { + return pileup.get(sample); + } + + public void addElements(final String sample, PileupElementTracker elements) { + pileup.put(sample,elements); + size += elements.size(); + } + + public Iterator iterator() { return new MergingPileupElementIterator(this); } + + public int size() { + return size; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java index 7d424bb8c..5bbfaa836 100644 --- a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java @@ -39,7 +39,26 @@ import net.sf.samtools.SAMRecord; * @author mhanna * @version 0.1 */ -public interface ReadBackedExtendedEventPileup extends Iterable { +public interface ReadBackedExtendedEventPileup extends ReadBackedPileup { + /** + * Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this + * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy + * of the pileup (just returns this) if there are no deletions in the pileup. + * + * @return + */ + public ReadBackedExtendedEventPileup getPileupWithoutDeletions(); + + /** + * Returns a new ReadBackedPileup where only one read from an overlapping read + * pair is retained. If the two reads in question disagree to their basecall, + * neither read is retained. If they agree on the base, the read with the higher + * quality observation is retained + * + * @return the newly filtered pileup + */ + public ReadBackedExtendedEventPileup getOverlappingFragmentFilteredPileup(); + /** * Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy @@ -49,6 +68,26 @@ public interface ReadBackedExtendedEventPileup extends Iterable= minBaseQ, coming from + * reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup. + * @param minBaseQ + * @param minMapQ + * @return + */ + public ReadBackedExtendedEventPileup getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ ); + + /** Returns subset of this pileup that contains only bases with quality >= minBaseQ. + * This method allocates and returns a new instance of ReadBackedPileup. + * @param minBaseQ + * @return + */ + public ReadBackedExtendedEventPileup getBaseFilteredPileup( int minBaseQ ); + + /** Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ. + * This method allocates and returns a new instance of ReadBackedPileup. + * @param minMapQ + * @return + */ public ReadBackedExtendedEventPileup getMappingFilteredPileup( int minMapQ ); /** @@ -70,7 +109,9 @@ public interface ReadBackedExtendedEventPileup extends Iterable toExtendedIterable(); /** * Returns the number of deletion events in this pileup @@ -110,8 +151,6 @@ public interface ReadBackedExtendedEventPileup extends Iterable> getEventStringsWithCounts(byte[] refBases); + public String getShortPileupString(); + /** * Get an array of the mapping qualities * @return diff --git a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java new file mode 100644 index 000000000..b7f1e418a --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +package org.broadinstitute.sting.utils.pileup; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.collections.Pair; + +import java.util.*; + +import net.sf.samtools.SAMRecord; + +public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup implements ReadBackedExtendedEventPileup { + private int nInsertions = 0; + private int maxDeletionLength = 0; // cached value of the length of the longest deletion observed at the site + + public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, List pileupElements) { + super(loc,pileupElements); + } + + public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, PileupElementTracker tracker) { + super(loc,tracker); + } + + /** + * Optimization of above constructor where all of the cached data is provided + * @param loc + * @param pileup + */ + public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, List pileup, int size, + int maxDeletionLength, int nInsertions, int nDeletions, int nMQ0Reads ) { + super(loc,pileup,size,nDeletions,nMQ0Reads); + this.maxDeletionLength = maxDeletionLength; + this.nInsertions = nInsertions; + } + + public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map> pileupElementsBySample) { + super(loc,new PerSamplePileupElementTracker(pileupElementsBySample)); + } + + /** + * Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront, + * so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling + * sizes, nDeletion, etc. over and over potentially. + */ + @Override + protected void calculateCachedData() { + super.calculateCachedData(); + + nDeletions = 0; + maxDeletionLength = 0; + for ( ExtendedEventPileupElement p : this.toExtendedIterable()) { + if ( p.isDeletion() ) { + nDeletions++; + maxDeletionLength = Math.max(maxDeletionLength, p.getEventLength()); + } + } + } + + @Override + protected ReadBackedExtendedEventPileup createNewPileup(GenomeLoc loc, PileupElementTracker tracker) { + return new ReadBackedExtendedEventPileupImpl(loc,tracker); + } + + @Override + protected ExtendedEventPileupElement createNewPileupElement(SAMRecord read, int offset) { + throw new UnsupportedOperationException("Not enough information provided to create a new pileup element"); + } + + + /** + * Returns the number of insertion events in this pileup + * + * @return + */ + @Override + public int getNumberOfInsertions() { + return nInsertions; + } + + /** Returns the length of the longest deletion observed at the site this + * pileup is associated with (NOTE: by convention, both insertions and deletions + * are associated with genomic location immediately before the actual event). If + * there are no deletions at the site, returns 0. + * @return + */ + @Override + public int getMaxDeletionLength() { + return maxDeletionLength; + } + + public Iterable toExtendedIterable() { + return new Iterable() { + public Iterator iterator() { return pileupElementTracker.iterator(); } + }; + } + + /** + * Returns an array of the events in this pileup ('I', 'D', or '.'). Note this call costs O(n) and allocates fresh array each time + * @return + */ + @Override + public byte[] getEvents() { + byte[] v = new byte[size()]; + int i = 0; + for ( ExtendedEventPileupElement e : this.toExtendedIterable() ) { + switch ( e.getType() ) { + case INSERTION: v[i] = 'I'; break; + case DELETION: v[i] = 'D'; break; + case NOEVENT: v[i] = '.'; break; + default: throw new StingException("Unknown event type encountered: "+e.getType()); + } + i++; + } + return v; + } + + /** A shortcut for getEventStringsWithCounts(null); + * + * @return + */ + @Override + public List> getEventStringsWithCounts() { + return getEventStringsWithCounts(null); + } + + @Override + public String getShortPileupString() { + // In the pileup format, each extended event line has genomic position (chromosome name and offset), + // reference "base" (always set to "E" for E(xtended)), then 'I','D', or '.' symbol for each read representing + // insertion, deletion or no-event, respectively. + return String.format("%s %s E %s", + getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate + new String(getEvents()) ); + } + + /** Returns String representation of all distinct extended events (indels) at the site along with + * observation counts (numbers of reads) for each distinct event. If refBases is null, a simple string representation for + * deletions will be generated as "D" (i.e. "5D"); if the reference bases are provided, the actual + * deleted sequence will be used in the string representation (e.g. "-AAC"). + * @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and + * extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+) + * @return list of distinct events; first element of a pair is a string representation of the event, second element + * gives the number of reads, in which that event was observed + */ + @Override + public List> getEventStringsWithCounts(byte[] refBases) { + Map events = new HashMap(); + + for ( ExtendedEventPileupElement e : this.toExtendedIterable() ) { + Integer cnt; + String indel = null; + switch ( e.getType() ) { + case INSERTION: + indel = "+"+e.getEventBases(); + break; + case DELETION: + indel = getDeletionString(e.getEventLength(),refBases); + break; + case NOEVENT: continue; + default: throw new StingException("Unknown event type encountered: "+e.getType()); + } + + cnt = events.get(indel); + if ( cnt == null ) events.put(indel,1); + else events.put(indel,cnt.intValue()+1); + } + + List> eventList = new ArrayList>(events.size()); + for ( Map.Entry m : events.entrySet() ) { + eventList.add( new Pair(m.getKey(),m.getValue())); + } + return eventList; + } + + /** + * Builds string representation of the deletion event. If refBases is null, the representation will be + * "D" (e.g. "5D"); if the reference bases are provided, a verbose representation (e.g. "-AAC") + * will be generated. NOTE: refBases must start with the base prior to the actual deletion (i.e. deleted + * base(s) are refBase[1], refBase[2], ...), and the length of the passed array must be sufficient to accomodate the + * deletion length (i.e. size of refBase must be at least length+1). + * @param length + * @param refBases + * @return + */ + private String getDeletionString(int length, byte[] refBases) { + if ( refBases == null ) { + return Integer.toString(length)+"D"; // if we do not have reference bases, we can only report something like "5D" + } else { + return "-"+new String(refBases,1,length).toUpperCase(); + } + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java index 0a024cb4d..58f09039e 100644 --- a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java @@ -145,7 +145,7 @@ public interface ReadBackedPileup extends Iterable { */ public boolean hasSecondaryBases(); - public String getPileupString(char ref); + public String getPileupString(Character ref); /** * Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time diff --git a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java new file mode 100644 index 000000000..b32fb8f06 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +package org.broadinstitute.sting.utils.pileup; + +import org.broadinstitute.sting.utils.GenomeLoc; +import net.sf.samtools.SAMRecord; + +import java.util.List; +import java.util.Map; + +public class ReadBackedPileupImpl extends AbstractReadBackedPileup implements ReadBackedPileup { + + public ReadBackedPileupImpl(GenomeLoc loc) { + super(loc); + } + + public ReadBackedPileupImpl(GenomeLoc loc, List reads, List offsets ) { + super(loc,reads,offsets); + } + + public ReadBackedPileupImpl(GenomeLoc loc, List reads, int offset ) { + super(loc,reads,offset); + } + + public ReadBackedPileupImpl(GenomeLoc loc, List pileupElements) { + super(loc,pileupElements); + } + + public ReadBackedPileupImpl(GenomeLoc loc, Map> pileupElementsBySample) { + super(loc,new PerSamplePileupElementTracker(pileupElementsBySample)); + } + + /** + * Optimization of above constructor where all of the cached data is provided + * @param loc + * @param pileup + */ + public ReadBackedPileupImpl(GenomeLoc loc, List pileup, int size, int nDeletions, int nMQ0Reads ) { + super(loc,pileup,size,nDeletions,nMQ0Reads); + } + + protected ReadBackedPileupImpl(GenomeLoc loc, PileupElementTracker tracker) { + super(loc,tracker); + } + + @Override + protected ReadBackedPileup createNewPileup(GenomeLoc loc, PileupElementTracker tracker) { + return new ReadBackedPileupImpl(loc,tracker); + } + + @Override + protected PileupElement createNewPileupElement(SAMRecord read, int offset) { + return new PileupElement(read,offset); + } +} diff --git a/java/src/org/broadinstitute/sting/utils/pileup/SampleSplitReadBackedExtendedEventPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/SampleSplitReadBackedExtendedEventPileup.java deleted file mode 100644 index c01d203ef..000000000 --- a/java/src/org/broadinstitute/sting/utils/pileup/SampleSplitReadBackedExtendedEventPileup.java +++ /dev/null @@ -1,366 +0,0 @@ -/* - * Copyright (c) 2010, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.pileup; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.collections.Pair; - -import java.util.*; - -import net.sf.samtools.SAMRecord; - -/** - * Extended event pileups, split out by sample. - * - * @author mhanna - * @version 0.1 - */ -public class SampleSplitReadBackedExtendedEventPileup implements ReadBackedExtendedEventPileup { - private GenomeLoc loc = null; - private Map pileupBySample = null; - - private int size = 0; // cached value of the size of the pileup - private int maxDeletionLength = 0; // cached value of the length of the longest deletion observed at the site - private int nDeletions = 0; // cached value of the number of deletions - private int nInsertions = 0; - private int nMQ0Reads = 0; // cached value of the number of MQ0 reads - - public SampleSplitReadBackedExtendedEventPileup(GenomeLoc loc) { - this(loc,new HashMap()); - } - - /** - * Optimization of above constructor where all of the cached data is provided - * @param loc Location of this pileup. - * @param pileup Pileup data, split by sample name. - */ - public SampleSplitReadBackedExtendedEventPileup(GenomeLoc loc, Map pileup) { - if ( loc == null ) throw new StingException("Illegal null genomeloc in UnifiedReadBackedPileup"); - if ( pileup == null ) throw new StingException("Illegal null pileup in UnifiedReadBackedPileup"); - - this.loc = loc; - this.pileupBySample = pileup; - for(ReadBackedExtendedEventPileup pileupBySample: pileup.values()) { - this.size += pileupBySample.size(); - this.nDeletions += pileupBySample.getNumberOfDeletions(); - this.nMQ0Reads += pileupBySample.getNumberOfMappingQualityZeroReads(); - } - } - - - - private void addPileup(final String sampleName, ReadBackedExtendedEventPileup pileup) { - pileupBySample.put(sampleName,pileup); - - size += pileup.size(); - nDeletions += pileup.getNumberOfDeletions(); - nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads(); - } - - @Override - public ReadBackedExtendedEventPileup getPileupWithoutMappingQualityZeroReads() { - if ( getNumberOfMappingQualityZeroReads() > 0 ) { - SampleSplitReadBackedExtendedEventPileup filteredPileup = new SampleSplitReadBackedExtendedEventPileup(loc); - - for (Map.Entry entry: pileupBySample.entrySet()) { - String sampleName = entry.getKey(); - ReadBackedExtendedEventPileup pileup = entry.getValue(); - filteredPileup.addPileup(sampleName,pileup.getPileupWithoutMappingQualityZeroReads()); - } - - return filteredPileup; - } else { - return this; - } - } - - @Override - public ReadBackedExtendedEventPileup getMappingFilteredPileup( int minMapQ ) { - SampleSplitReadBackedExtendedEventPileup filteredPileup = new SampleSplitReadBackedExtendedEventPileup(loc); - - for (Map.Entry entry: pileupBySample.entrySet()) { - String sampleName = entry.getKey(); - ReadBackedExtendedEventPileup pileup = entry.getValue(); - filteredPileup.addPileup(sampleName,pileup.getMappingFilteredPileup(minMapQ)); - } - - return filteredPileup; - } - - /** - * Returns a pileup randomly downsampled to the desiredCoverage. - * - * @param desiredCoverage - * @return - */ - @Override - public ReadBackedExtendedEventPileup getDownsampledPileup(int desiredCoverage) { - if ( size() <= desiredCoverage ) - return this; - - // randomly choose numbers corresponding to positions in the reads list - Random generator = new Random(); - TreeSet positions = new TreeSet(); - for ( int i = 0; i < desiredCoverage; /* no update */ ) { - if ( positions.add(generator.nextInt(size)) ) - i++; - } - - SampleSplitReadBackedExtendedEventPileup downsampledPileup = new SampleSplitReadBackedExtendedEventPileup(getLocation()); - int current = 0; - - for(Map.Entry entry: this.pileupBySample.entrySet()) { - String sampleName = entry.getKey(); - ReadBackedExtendedEventPileup pileup = entry.getValue(); - - List filteredPileup = new ArrayList(); - for(ExtendedEventPileupElement p: pileup) { - if(positions.contains(current)) - filteredPileup.add(p); - } - - if(!filteredPileup.isEmpty()) - downsampledPileup.addPileup(sampleName,new UnifiedReadBackedExtendedEventPileup(loc,filteredPileup)); - - current++; - } - - return downsampledPileup; - } - - /** - * Gets a list of all the samples stored in this pileup. - * @return List of samples in this pileup. - */ - public Collection getSamples() { - return pileupBySample.keySet(); - } - - /** - * Gets the particular subset of this pileup with the given sample name. - * @param sampleName Name of the sample to use. - * @return A subset of this pileup containing only reads with the given sample. - */ - public ReadBackedExtendedEventPileup getPileupForSample(String sampleName) { - return pileupBySample.get(sampleName); - } - - - @Override - public Iterator iterator() { - return new ExtendedEventCastingIterator(new MergingPileupElementIterator(pileupBySample)); - } - - /** - * Returns the number of deletion events in this pileup - * - * @return - */ - @Override - public int getNumberOfDeletions() { - return nDeletions; - } - - /** - * Returns the number of insertion events in this pileup - * - * @return - */ - @Override - public int getNumberOfInsertions() { - return nInsertions; - } - - /** Returns the length of the longest deletion observed at the site this - * pileup is associated with (NOTE: by convention, both insertions and deletions - * are associated with genomic location immediately before the actual event). If - * there are no deletions at the site, returns 0. - * @return - */ - @Override - public int getMaxDeletionLength() { - int maxDeletionLength = 0; - for(ReadBackedExtendedEventPileup pileup: pileupBySample.values()) - maxDeletionLength = Math.max(maxDeletionLength,pileup.getMaxDeletionLength()); - return maxDeletionLength; - } - - /** - * Returns the number of mapping quality zero reads in this pileup. - * @return - */ - @Override - public int getNumberOfMappingQualityZeroReads() { - return nMQ0Reads; - } - - /** - * @return the number of elements in this pileup - */ - @Override - public int size() { - return size; - } - - /** - * @return the location of this pileup - */ - @Override - public GenomeLoc getLocation() { - return loc; - } - - @Override - public String getShortPileupString() { - // In the pileup format, each extended event line has genomic position (chromosome name and offset), - // reference "base" (always set to "E" for E(xtended)), then 'I','D', or '.' symbol for each read representing - // insertion, deletion or no-event, respectively. - return String.format("%s %s E %s", - getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate - new String(getEvents()) ); - } - - // -------------------------------------------------------- - // - // Convenience functions that may be slow - // - // -------------------------------------------------------- - - /** - * Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time - * @return - */ - @Override - public List getReads() { - List reads = new ArrayList(size()); - for ( ExtendedEventPileupElement pile : this ) { reads.add(pile.getRead()); } - return reads; - } - - /** - * Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time - * @return - */ - @Override - public List getOffsets() { - List offsets = new ArrayList(size()); - for ( ExtendedEventPileupElement pile : this ) { offsets.add(pile.getOffset()); } - return offsets; - } - - /** - * Returns an array of the events in this pileup ('I', 'D', or '.'). Note this call costs O(n) and allocates fresh array each time - * @return - */ - @Override - public byte[] getEvents() { - byte[] v = new byte[size()]; - int i = 0; - for ( ExtendedEventPileupElement e : this ) { - switch ( e.getType() ) { - case INSERTION: v[i] = 'I'; break; - case DELETION: v[i] = 'D'; break; - case NOEVENT: v[i] = '.'; break; - default: throw new StingException("Unknown event type encountered: "+e.getType()); - } - i++; - } - return v; - } - - /** A shortcut for getEventStringsWithCounts(null); - * - * @return - */ - @Override - public List> getEventStringsWithCounts() { - return getEventStringsWithCounts(null); - } - - /** Returns String representation of all distinct extended events (indels) at the site along with - * observation counts (numbers of reads) for each distinct event. If refBases is null, a simple string representation for - * deletions will be generated as "D" (i.e. "5D"); if the reference bases are provided, the actual - * deleted sequence will be used in the string representation (e.g. "-AAC"). - * @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and - * extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+) - * @return list of distinct events; first element of a pair is a string representation of the event, second element - * gives the number of reads, in which that event was observed - */ - @Override - public List> getEventStringsWithCounts(byte[] refBases) { - Map events = new HashMap(); - - for ( ExtendedEventPileupElement e : this ) { - Integer cnt; - String indel = null; - switch ( e.getType() ) { - case INSERTION: - indel = "+"+e.getEventBases(); - break; - case DELETION: - indel = UnifiedReadBackedExtendedEventPileup.getDeletionString(e.getEventLength(),refBases); - break; - case NOEVENT: continue; - default: throw new StingException("Unknown event type encountered: "+e.getType()); - } - - cnt = events.get(indel); - if ( cnt == null ) events.put(indel,1); - else events.put(indel,cnt.intValue()+1); - } - - List> eventList = new ArrayList>(events.size()); - for ( Map.Entry m : events.entrySet() ) { - eventList.add( new Pair(m.getKey(),m.getValue())); - } - return eventList; - } - - /** - * Get an array of the mapping qualities - * @return - */ - @Override - public byte[] getMappingQuals() { - byte[] v = new byte[size()]; - int i = 0; - for ( ExtendedEventPileupElement e : this ) { v[i++] = (byte)e.getRead().getMappingQuality(); } - return v; - } - - private class ExtendedEventCastingIterator implements Iterator { - private final Iterator wrappedIterator; - - public ExtendedEventCastingIterator(Iterator iterator) { - this.wrappedIterator = iterator; - } - - public boolean hasNext() { return wrappedIterator.hasNext(); } - public ExtendedEventPileupElement next() { return (ExtendedEventPileupElement)wrappedIterator.next(); } - public void remove() { wrappedIterator.remove(); } - } - -} diff --git a/java/src/org/broadinstitute/sting/utils/pileup/SampleSplitReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/SampleSplitReadBackedPileup.java deleted file mode 100644 index 29f34dc00..000000000 --- a/java/src/org/broadinstitute/sting/utils/pileup/SampleSplitReadBackedPileup.java +++ /dev/null @@ -1,418 +0,0 @@ -/* - * Copyright (c) 2010, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.pileup; - -import net.sf.picard.util.PeekableIterator; -import net.sf.samtools.SAMRecord; - -import java.util.*; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.gatk.iterators.IterableIterator; - -/** - * A read-backed pileup that internally divides the dataset based - * on sample, for super efficient per-sample access. - * - * TODO: there are a few functions that are shared between UnifiedReadBackedPileup and SampleSplitReadBackedPileup. - * TODO: refactor these into a common class. - * - * @author mhanna - * @version 0.1 - */ -public class SampleSplitReadBackedPileup implements ReadBackedPileup { - private GenomeLoc loc = null; - private Map pileupBySample = null; - - private int size = 0; // cached value of the size of the pileup - private int nDeletions = 0; // cached value of the number of deletions - private int nMQ0Reads = 0; // cached value of the number of MQ0 reads - - public SampleSplitReadBackedPileup(GenomeLoc loc) { - this(loc,new HashMap()); - } - - /** - * Optimization of above constructor where all of the cached data is provided - * @param loc Location of this pileup. - * @param pileup Pileup data, split by sample name. - */ - public SampleSplitReadBackedPileup(GenomeLoc loc, Map pileup) { - if ( loc == null ) throw new StingException("Illegal null genomeloc in UnifiedReadBackedPileup"); - if ( pileup == null ) throw new StingException("Illegal null pileup in UnifiedReadBackedPileup"); - - this.loc = loc; - this.pileupBySample = pileup; - for(ReadBackedPileup pileupBySample: pileup.values()) { - this.size += pileupBySample.size(); - this.nDeletions += pileupBySample.getNumberOfDeletions(); - this.nMQ0Reads += pileupBySample.getNumberOfMappingQualityZeroReads(); - } - } - - private void addPileup(final String sampleName, ReadBackedPileup pileup) { - pileupBySample.put(sampleName,pileup); - - size += pileup.size(); - nDeletions += pileup.getNumberOfDeletions(); - nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads(); - } - - /** - * Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this - * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy - * of the pileup (just returns this) if there are no deletions in the pileup. - * - * @return - */ - @Override - public ReadBackedPileup getPileupWithoutDeletions() { - if ( getNumberOfDeletions() > 0 ) { - SampleSplitReadBackedPileup filteredPileup = new SampleSplitReadBackedPileup(loc); - - for (Map.Entry entry: pileupBySample.entrySet()) { - String sampleName = entry.getKey(); - ReadBackedPileup pileup = entry.getValue(); - filteredPileup.addPileup(sampleName,pileup.getPileupWithoutDeletions()); - } - - return filteredPileup; - } else { - return this; - } - } - - /** - * Returns a new ReadBackedPileup where only one read from an overlapping read - * pair is retained. If the two reads in question disagree to their basecall, - * neither read is retained. If they agree on the base, the read with the higher - * quality observation is retained - * - * @return the newly filtered pileup - */ - @Override - public ReadBackedPileup getOverlappingFragmentFilteredPileup() { - SampleSplitReadBackedPileup filteredPileup = new SampleSplitReadBackedPileup(loc); - - for (Map.Entry entry: pileupBySample.entrySet()) { - String sampleName = entry.getKey(); - ReadBackedPileup pileup = entry.getValue(); - filteredPileup.addPileup(sampleName,pileup.getOverlappingFragmentFilteredPileup()); - } - - return filteredPileup; - } - - /** - * Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this - * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy - * of the pileup (just returns this) if there are no MQ0 reads in the pileup. - * - * @return - */ - public ReadBackedPileup getPileupWithoutMappingQualityZeroReads() { - SampleSplitReadBackedPileup filteredPileup = new SampleSplitReadBackedPileup(loc); - - for (Map.Entry entry: pileupBySample.entrySet()) { - String sampleName = entry.getKey(); - ReadBackedPileup pileup = entry.getValue(); - filteredPileup.addPileup(sampleName,pileup.getPileupWithoutMappingQualityZeroReads()); - } - - return filteredPileup; - } - - /** - * Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from - * reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup. - * @param minBaseQ - * @param minMapQ - * @return - */ - @Override - public ReadBackedPileup getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ ) { - SampleSplitReadBackedPileup filteredPileup = new SampleSplitReadBackedPileup(loc); - - for (Map.Entry entry: pileupBySample.entrySet()) { - String sampleName = entry.getKey(); - ReadBackedPileup pileup = entry.getValue(); - filteredPileup.addPileup(sampleName,pileup.getBaseAndMappingFilteredPileup(minBaseQ,minMapQ)); - } - - return filteredPileup; - } - - /** Returns subset of this pileup that contains only bases with quality >= minBaseQ. - * This method allocates and returns a new instance of ReadBackedPileup. - * @param minBaseQ - * @return - */ - @Override - public ReadBackedPileup getBaseFilteredPileup( int minBaseQ ) { - return getBaseAndMappingFilteredPileup(minBaseQ, -1); - } - - /** Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ. - * This method allocates and returns a new instance of ReadBackedPileup. - * @param minMapQ - * @return - */ - @Override - public ReadBackedPileup getMappingFilteredPileup( int minMapQ ) { - return getBaseAndMappingFilteredPileup(-1, minMapQ); - } - - /** - * Returns a pileup randomly downsampled to the desiredCoverage. - * - * @param desiredCoverage - * @return - */ - @Override - public ReadBackedPileup getDownsampledPileup(int desiredCoverage) { - if ( size() <= desiredCoverage ) - return this; - - // randomly choose numbers corresponding to positions in the reads list - Random generator = new Random(); - TreeSet positions = new TreeSet(); - for ( int i = 0; i < desiredCoverage; /* no update */ ) { - if ( positions.add(generator.nextInt(size)) ) - i++; - } - - SampleSplitReadBackedPileup downsampledPileup = new SampleSplitReadBackedPileup(getLocation()); - int current = 0; - - for(Map.Entry entry: this.pileupBySample.entrySet()) { - String sampleName = entry.getKey(); - ReadBackedPileup pileup = entry.getValue(); - - List filteredPileup = new ArrayList(); - for(PileupElement p: pileup) { - if(positions.contains(current)) - filteredPileup.add(p); - } - - if(!filteredPileup.isEmpty()) - downsampledPileup.addPileup(sampleName,new UnifiedReadBackedPileup(loc,filteredPileup)); - - current++; - } - - return downsampledPileup; - } - - public Collection getSamples() { - return pileupBySample.keySet(); - } - - @Override - public ReadBackedPileup getPileupForSample(String sampleName) { - return pileupBySample.containsKey(sampleName) ? pileupBySample.get(sampleName) : null; - } - - // -------------------------------------------------------- - // - // iterators - // - // -------------------------------------------------------- - - /** - * The best way to access PileupElements where you only care about the bases and quals in the pileup. - * - * for (PileupElement p : this) { doSomething(p); } - * - * Provides efficient iteration of the data. - * - * @return - */ - @Override - public Iterator iterator() { - return new MergingPileupElementIterator(pileupBySample); - } - - // todo -- why is this public? - @Override - public IterableIterator extendedForeachIterator() { - ArrayList x = new ArrayList(size()); - int i = 0; - Iterator iterator = new MergingPileupElementIterator(pileupBySample); - while(iterator.hasNext()) { - PileupElement pile = iterator.next(); - x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this)); - } - - return new IterableIterator(x.iterator()); - } - - /** - * Simple useful routine to count the number of deletion bases in this pileup - * - * @return - */ - @Override - public int getNumberOfDeletions() { - return nDeletions; - } - - @Override - public int getNumberOfMappingQualityZeroReads() { - return nMQ0Reads; - } - - /** - * @return the number of elements in this pileup - */ - @Override - public int size() { - return size; - } - - /** - * @return the location of this pileup - */ - @Override - public GenomeLoc getLocation() { - return loc; - } - - /** - * Get counts of A, C, G, T in order, which returns a int[4] vector with counts according - * to BaseUtils.simpleBaseToBaseIndex for each base. - * - * @return - */ - @Override - public int[] getBaseCounts() { - int[] counts = new int[4]; - for(ReadBackedPileup pileup: pileupBySample.values()) { - int[] countsBySample = pileup.getBaseCounts(); - for(int i = 0; i < counts.length; i++) - counts[i] += countsBySample[i]; - } - return counts; - } - - /** - * Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated - * @return - */ - @Override - public boolean hasSecondaryBases() { - boolean hasSecondaryBases = false; - for(ReadBackedPileup pileup: pileupBySample.values()) - hasSecondaryBases |= pileup.hasSecondaryBases(); - return hasSecondaryBases; - } - - @Override - public String getPileupString(char ref) { - // In the pileup format, each line represents a genomic position, consisting of chromosome name, - // coordinate, reference base, read bases, read qualities and alignment mapping qualities. - return String.format("%s %s %c %s %s", - getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate - ref, // reference base - new String(getBases()), - getQualsString()); - } - - // -------------------------------------------------------- - // - // Convenience functions that may be slow - // - // -------------------------------------------------------- - - /** - * Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time - * @return - */ - @Override - public List getReads() { - List reads = new ArrayList(size()); - for ( PileupElement pile : this ) { reads.add(pile.getRead()); } - return reads; - } - - /** - * Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time - * @return - */ - @Override - public List getOffsets() { - List offsets = new ArrayList(size()); - for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); } - return offsets; - } - - /** - * Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time - * @return - */ - @Override - public byte[] getBases() { - byte[] v = new byte[size()]; - for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); } - return v; - } - - /** - * Returns an array of the secondary bases in this pileup. Note this call costs O(n) and allocates fresh array each time - * @return - */ - @Override - public byte[] getSecondaryBases() { - byte[] v = new byte[size()]; - for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); } - return v; - } - - /** - * Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time - * @return - */ - @Override - public byte[] getQuals() { - byte[] v = new byte[size()]; - for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); } - return v; - } - - /** - * Get an array of the mapping qualities - * @return - */ - @Override - public byte[] getMappingQuals() { - byte[] v = new byte[size()]; - for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = (byte)pile.getRead().getMappingQuality(); } - return v; - } - - private String getQualsString() { - return UnifiedReadBackedPileup.quals2String(getQuals()); - } -} diff --git a/java/src/org/broadinstitute/sting/utils/pileup/UnifiedReadBackedExtendedEventPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/UnifiedReadBackedExtendedEventPileup.java deleted file mode 100644 index 6f0b4af79..000000000 --- a/java/src/org/broadinstitute/sting/utils/pileup/UnifiedReadBackedExtendedEventPileup.java +++ /dev/null @@ -1,448 +0,0 @@ -/* - * Copyright (c) 2010, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.pileup; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.collections.Pair; - -import java.util.*; - -import net.sf.samtools.SAMRecord; - -/** - * Created by IntelliJ IDEA. - * User: asivache - * Date: Dec 29, 2009 - * Time: 12:25:55 PM - * To change this template use File | Settings | File Templates. - */ -public class UnifiedReadBackedExtendedEventPileup implements ReadBackedExtendedEventPileup { - private GenomeLoc loc = null; - private List pileup = null; - - private int size = 0; // cached value of the size of the pileup - private int maxDeletionLength = 0; // cached value of the length of the longest deletion observed at the site - private int nDeletions = 0; // cached value of the number of deletions - private int nInsertions = 0; - private int nMQ0Reads = 0; // cached value of the number of MQ0 reads - - /** - * Create a new version of a read backed pileup at loc without any aligned reads - * - */ - public UnifiedReadBackedExtendedEventPileup(GenomeLoc loc ) { - this(loc, new ArrayList(0)); - } - - /** - * Create a new version of a read backed pileup at loc, using the reads and their corresponding - * offsets. This lower level constructure assumes pileup is well-formed and merely keeps a - * pointer to pileup. Don't go changing the data in pileup. - * - */ - public UnifiedReadBackedExtendedEventPileup(GenomeLoc loc, List pileup ) { - if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedExtendedEventPileup"); - if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedExtendedEventPileup"); - - this.loc = loc; - this.pileup = pileup; - - calculatedCachedData(); - } - - /** - * Optimization of above constructor where all of the cached data is provided - * @param loc - * @param pileup - */ - public UnifiedReadBackedExtendedEventPileup(GenomeLoc loc, List pileup, int size, - int maxDeletionLength, int nInsertions, int nDeletions, int nMQ0Reads ) { - if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedExtendedEventPileup"); - if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedExtendedEventPileup"); - - this.loc = loc; - this.pileup = pileup; - this.size = size; - this.maxDeletionLength = maxDeletionLength; - this.nDeletions = nDeletions; - this.nInsertions = nInsertions; - this.nMQ0Reads = nMQ0Reads; - } - - - /** - * Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront, - * so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling - * sizes, nDeletion, etc. over and over potentially. - */ - private void calculatedCachedData() { - size = 0; - nDeletions = 0; - nInsertions = 0; - nMQ0Reads = 0; - - for ( ExtendedEventPileupElement p : this ) { - - size++; - if ( p.isDeletion() ) { - nDeletions++; - maxDeletionLength = Math.max(maxDeletionLength, p.getEventLength()); - } else { - if ( p.isInsertion() ) nInsertions++; - } - - if ( p.getRead().getMappingQuality() == 0 ) { - nMQ0Reads++; - } - } - } - - - // -------------------------------------------------------- - // - // Special 'constructors' - // - // -------------------------------------------------------- - - - /** - * Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this - * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy - * of the pileup (just returns this) if there are no MQ0 reads in the pileup. - * - * @return - */ - @Override - public ReadBackedExtendedEventPileup getPileupWithoutMappingQualityZeroReads() { - - if ( getNumberOfMappingQualityZeroReads() > 0 ) { - ArrayList filteredPileup = new ArrayList(); - for ( ExtendedEventPileupElement p : pileup ) { - if ( p.getRead().getMappingQuality() > 0 ) { - filteredPileup.add(p); - } - } - return new UnifiedReadBackedExtendedEventPileup(loc, filteredPileup); - } else { - return this; - } - } - - @Override - public ReadBackedExtendedEventPileup getMappingFilteredPileup( int minMapQ ) { - ArrayList filteredPileup = new ArrayList(); - - for ( ExtendedEventPileupElement p : pileup ) { - if ( p.getRead().getMappingQuality() >= minMapQ ) { - filteredPileup.add(p); - } - } - - return new UnifiedReadBackedExtendedEventPileup(loc, filteredPileup); - } - - /** - * Returns a pileup randomly downsampled to the desiredCoverage. - * - * @param desiredCoverage - * @return - */ - @Override - public ReadBackedExtendedEventPileup getDownsampledPileup(int desiredCoverage) { - if ( size() <= desiredCoverage ) - return this; - - // randomly choose numbers corresponding to positions in the reads list - Random generator = new Random(); - TreeSet positions = new TreeSet(); - for ( int i = 0; i < desiredCoverage; /* no update */ ) { - if ( positions.add(generator.nextInt(pileup.size())) ) - i++; - } - - Iterator positionIter = positions.iterator(); - ArrayList downsampledPileup = new ArrayList(); - - while ( positionIter.hasNext() ) { - int nextReadToKeep = (Integer)positionIter.next(); - downsampledPileup.add(pileup.get(nextReadToKeep)); - } - - return new UnifiedReadBackedExtendedEventPileup(getLocation(), downsampledPileup); - } - - @Override - public Collection getSamples() { - Collection sampleNames = new HashSet(); - for(PileupElement p: this) { - SAMRecord read = p.getRead(); - String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null; - sampleNames.add(sampleName); - } - return sampleNames; - } - - @Override - public ReadBackedExtendedEventPileup getPileupForSample(String sampleName) { - List filteredPileup = new ArrayList(); - for(ExtendedEventPileupElement p: this) { - SAMRecord read = p.getRead(); - if(sampleName != null) { - if(read.getReadGroup() != null && sampleName.equals(read.getReadGroup().getSample())) - filteredPileup.add(p); - } - else { - if(read.getReadGroup() == null || read.getReadGroup().getSample() == null) - filteredPileup.add(p); - } - } - return filteredPileup.size()>0 ? new UnifiedReadBackedExtendedEventPileup(loc,filteredPileup) : null; - } - - // -------------------------------------------------------- - // - // iterators - // - // -------------------------------------------------------- - - /** - * The best way to access PileupElements where you only care about the bases and quals in the pileup. - * - * for (PileupElement p : this) { doSomething(p); } - * - * Provides efficient iteration of the data. - * - * @return - */ - @Override - public Iterator iterator() { - return pileup.iterator(); - } - - - /** - * Returns the number of deletion events in this pileup - * - * @return - */ - @Override - public int getNumberOfDeletions() { - return nDeletions; - } - - /** - * Returns the number of insertion events in this pileup - * - * @return - */ - @Override - public int getNumberOfInsertions() { - return nInsertions; - } - - - /** Returns the length of the longest deletion observed at the site this - * pileup is associated with (NOTE: by convention, both insertions and deletions - * are associated with genomic location immediately before the actual event). If - * there are no deletions at the site, returns 0. - * @return - */ - @Override - public int getMaxDeletionLength() { - return maxDeletionLength; - } - /** - * Returns the number of mapping quality zero reads in this pileup. - * @return - */ - @Override - public int getNumberOfMappingQualityZeroReads() { - return nMQ0Reads; - } - -// public int getNumberOfDeletions() { -// int n = 0; -// -// for ( int i = 0; i < size(); i++ ) { -// if ( getOffsets().get(i) != -1 ) { n++; } -// } -// -// return n; -// } - - /** - * @return the number of elements in this pileup - */ - @Override - public int size() { - return size; - } - - /** - * @return the location of this pileup - */ - @Override - public GenomeLoc getLocation() { - return loc; - } - - @Override - public String getShortPileupString() { - // In the pileup format, each extended event line has genomic position (chromosome name and offset), - // reference "base" (always set to "E" for E(xtended)), then 'I','D', or '.' symbol for each read representing - // insertion, deletion or no-event, respectively. - return String.format("%s %s E %s", - getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate - new String(getEvents()) ); - } - - - // -------------------------------------------------------- - // - // Convenience functions that may be slow - // - // -------------------------------------------------------- - - /** - * Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time - * @return - */ - @Override - public List getReads() { - List reads = new ArrayList(size()); - for ( ExtendedEventPileupElement pile : this ) { reads.add(pile.getRead()); } - return reads; - } - - /** - * Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time - * @return - */ - @Override - public List getOffsets() { - List offsets = new ArrayList(size()); - for ( ExtendedEventPileupElement pile : this ) { offsets.add(pile.getOffset()); } - return offsets; - } - - /** - * Returns an array of the events in this pileup ('I', 'D', or '.'). Note this call costs O(n) and allocates fresh array each time - * @return - */ - @Override - public byte[] getEvents() { - byte[] v = new byte[size()]; - int i = 0; - for ( ExtendedEventPileupElement e : this ) { - switch ( e.getType() ) { - case INSERTION: v[i] = 'I'; break; - case DELETION: v[i] = 'D'; break; - case NOEVENT: v[i] = '.'; break; - default: throw new StingException("Unknown event type encountered: "+e.getType()); - } - i++; - } - return v; - } - - /** A shortcut for getEventStringsWithCounts(null); - * - * @return - */ - @Override - public List> getEventStringsWithCounts() { - return getEventStringsWithCounts(null); - } - - /** Returns String representation of all distinct extended events (indels) at the site along with - * observation counts (numbers of reads) for each distinct event. If refBases is null, a simple string representation for - * deletions will be generated as "D" (i.e. "5D"); if the reference bases are provided, the actual - * deleted sequence will be used in the string representation (e.g. "-AAC"). - * @param refBases reference bases, starting with the current locus (i.e. the one immediately before the indel), and - * extending far enough to accomodate the longest deletion (i.e. size of refBases must be at least 1+) - * @return list of distinct events; first element of a pair is a string representation of the event, second element - * gives the number of reads, in which that event was observed - */ - @Override - public List> getEventStringsWithCounts(byte[] refBases) { - Map events = new HashMap(); - - for ( ExtendedEventPileupElement e : this ) { - Integer cnt; - String indel = null; - switch ( e.getType() ) { - case INSERTION: - indel = "+"+e.getEventBases(); - break; - case DELETION: - indel = getDeletionString(e.getEventLength(),refBases); - break; - case NOEVENT: continue; - default: throw new StingException("Unknown event type encountered: "+e.getType()); - } - - cnt = events.get(indel); - if ( cnt == null ) events.put(indel,1); - else events.put(indel,cnt.intValue()+1); - } - - List> eventList = new ArrayList>(events.size()); - for ( Map.Entry m : events.entrySet() ) { - eventList.add( new Pair(m.getKey(),m.getValue())); - } - return eventList; - } - - /** - * Builds string representation of the deletion event. If refBases is null, the representation will be - * "D" (e.g. "5D"); if the reference bases are provided, a verbose representation (e.g. "-AAC") - * will be generated. NOTE: refBases must start with the base prior to the actual deletion (i.e. deleted - * base(s) are refBase[1], refBase[2], ...), and the length of the passed array must be sufficient to accomodate the - * deletion length (i.e. size of refBase must be at least length+1). - * @param length - * @param refBases - * @return - */ - static String getDeletionString(int length, byte[] refBases) { - if ( refBases == null ) { - return Integer.toString(length)+"D"; // if we do not have reference bases, we can only report something like "5D" - } else { - return "-"+new String(refBases,1,length).toUpperCase(); - } - } - - /** - * Get an array of the mapping qualities - * @return - */ - @Override - public byte[] getMappingQuals() { - byte[] v = new byte[size()]; - int i = 0; - for ( ExtendedEventPileupElement e : this ) { v[i++] = (byte)e.getRead().getMappingQuality(); } - return v; - } -} diff --git a/java/src/org/broadinstitute/sting/utils/pileup/UnifiedReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/UnifiedReadBackedPileup.java deleted file mode 100755 index ab0cf3287..000000000 --- a/java/src/org/broadinstitute/sting/utils/pileup/UnifiedReadBackedPileup.java +++ /dev/null @@ -1,561 +0,0 @@ -package org.broadinstitute.sting.utils.pileup; - -import org.broadinstitute.sting.gatk.iterators.IterableIterator; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.BaseUtils; - -import java.util.*; - -import net.sf.samtools.SAMRecord; - -/** - * Version two file implementing pileups of bases in reads at a locus. - * - * @author Mark DePristo - */ -public class UnifiedReadBackedPileup implements ReadBackedPileup { - private GenomeLoc loc = null; - private List pileup = null; - - private int size = 0; // cached value of the size of the pileup - private int nDeletions = 0; // cached value of the number of deletions - private int nMQ0Reads = 0; // cached value of the number of MQ0 reads - - /** - * Create a new version of a read backed pileup at loc, using the reads and their corresponding - * offsets. This pileup will contain a list, in order of the reads, of the piled bases at - * reads[i] for all i in offsets. Does not make a copy of the data, so it's not safe to - * go changing the reads. - * - * @param loc - * @param reads - * @param offsets - */ - public UnifiedReadBackedPileup(GenomeLoc loc, List reads, List offsets ) { - this(loc, readsOffsets2Pileup(reads, offsets)); - } - - public UnifiedReadBackedPileup(GenomeLoc loc, List reads, int offset ) { - this(loc, readsOffsets2Pileup(reads, offset)); - } - - /** - * Create a new version of a read backed pileup at loc without any aligned reads - * - */ - public UnifiedReadBackedPileup(GenomeLoc loc ) { - this(loc, new ArrayList(0)); - } - - /** - * Create a new version of a read backed pileup at loc, using the reads and their corresponding - * offsets. This lower level constructure assumes pileup is well-formed and merely keeps a - * pointer to pileup. Don't go changing the data in pileup. - * - */ - public UnifiedReadBackedPileup(GenomeLoc loc, List pileup ) { - if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup"); - if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup"); - - this.loc = loc; - this.pileup = pileup; - - calculatedCachedData(); - } - - /** - * Optimization of above constructor where all of the cached data is provided - * @param loc - * @param pileup - */ - public UnifiedReadBackedPileup(GenomeLoc loc, List pileup, int size, int nDeletions, int nMQ0Reads ) { - if ( loc == null ) throw new StingException("Illegal null genomeloc in UnifiedReadBackedPileup"); - if ( pileup == null ) throw new StingException("Illegal null pileup in UnifiedReadBackedPileup"); - - this.loc = loc; - this.pileup = pileup; - this.size = size; - this.nDeletions = nDeletions; - this.nMQ0Reads = nMQ0Reads; - } - - - /** - * Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront, - * so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling - * sizes, nDeletion, etc. over and over potentially. - */ - private void calculatedCachedData() { - size = 0; - nDeletions = 0; - nMQ0Reads = 0; - - for ( PileupElement p : this ) { - size++; - if ( p.isDeletion() ) { - nDeletions++; - } - if ( p.getRead().getMappingQuality() == 0 ) { - nMQ0Reads++; - } - } - } - - - /** - * Helper routine for converting reads and offset lists to a PileupElement list. - * - * @param reads - * @param offsets - * @return - */ - private static ArrayList readsOffsets2Pileup(List reads, List offsets ) { - if ( reads == null ) throw new StingException("Illegal null read list in UnifiedReadBackedPileup"); - if ( offsets == null ) throw new StingException("Illegal null offsets list in UnifiedReadBackedPileup"); - if ( reads.size() != offsets.size() ) throw new StingException("Reads and offset lists have different sizes!"); - - ArrayList pileup = new ArrayList(reads.size()); - for ( int i = 0; i < reads.size(); i++ ) { - pileup.add( new PileupElement( reads.get(i), offsets.get(i) ) ); - } - - return pileup; - } - - /** - * Helper routine for converting reads and a single offset to a PileupElement list. - * - * @param reads - * @param offset - * @return - */ - private static ArrayList readsOffsets2Pileup(List reads, int offset ) { - if ( reads == null ) throw new StingException("Illegal null read list in UnifiedReadBackedPileup"); - if ( offset < 0 ) throw new StingException("Illegal offset < 0 UnifiedReadBackedPileup"); - - ArrayList pileup = new ArrayList(reads.size()); - for ( int i = 0; i < reads.size(); i++ ) { - pileup.add( new PileupElement( reads.get(i), offset ) ); - } - - return pileup; - } - - // -------------------------------------------------------- - // - // Special 'constructors' - // - // -------------------------------------------------------- - - /** - * Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this - * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy - * of the pileup (just returns this) if there are no deletions in the pileup. - * - * @return - */ - @Override - public ReadBackedPileup getPileupWithoutDeletions() { - if ( getNumberOfDeletions() > 0 ) { - ArrayList filteredPileup = new ArrayList(); - - for ( PileupElement p : pileup ) { - if ( !p.isDeletion() ) { - filteredPileup.add(p); - } - } - return new UnifiedReadBackedPileup(loc, filteredPileup); - } else { - return this; - } - } - - /** - * Returns a new ReadBackedPileup where only one read from an overlapping read - * pair is retained. If the two reads in question disagree to their basecall, - * neither read is retained. If they agree on the base, the read with the higher - * quality observation is retained - * - * @return the newly filtered pileup - */ - @Override - public ReadBackedPileup getOverlappingFragmentFilteredPileup() { - Map filteredPileup = new HashMap(); - - for ( PileupElement p : pileup ) { - String readName = p.getRead().getReadName(); - - // if we've never seen this read before, life is good - if (!filteredPileup.containsKey(readName)) { - filteredPileup.put(readName, p); - } else { - PileupElement existing = filteredPileup.get(readName); - - // if the reads disagree at this position, throw them both out. Otherwise - // keep the element with the higher quality score - if (existing.getBase() != p.getBase()) { - filteredPileup.remove(readName); - } else { - if (existing.getQual() < p.getQual()) { - filteredPileup.put(readName, p); - } - } - } - } - - return new UnifiedReadBackedPileup(loc, new ArrayList(filteredPileup.values())); - } - - /** - * Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this - * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy - * of the pileup (just returns this) if there are no MQ0 reads in the pileup. - * - * @return - */ - @Override - public ReadBackedPileup getPileupWithoutMappingQualityZeroReads() { - - if ( getNumberOfMappingQualityZeroReads() > 0 ) { - ArrayList filteredPileup = new ArrayList(); - for ( PileupElement p : pileup ) { - if ( p.getRead().getMappingQuality() > 0 ) { - filteredPileup.add(p); - } - } - return new UnifiedReadBackedPileup(loc, filteredPileup); - } else { - return this; - } - } - - /** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from - * reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup. - * @param minBaseQ - * @param minMapQ - * @return - */ - @Override - public ReadBackedPileup getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ ) { - ArrayList filteredPileup = new ArrayList(); - - for ( PileupElement p : pileup ) { - if ( p.getRead().getMappingQuality() >= minMapQ && (p.isDeletion() || p.getQual() >= minBaseQ) ) { - filteredPileup.add(p); - } - } - - return new UnifiedReadBackedPileup(loc, filteredPileup); - } - - /** Returns subset of this pileup that contains only bases with quality >= minBaseQ. - * This method allocates and returns a new instance of ReadBackedPileup. - * @param minBaseQ - * @return - */ - @Override - public ReadBackedPileup getBaseFilteredPileup( int minBaseQ ) { - return getBaseAndMappingFilteredPileup(minBaseQ, -1); - } - - /** Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ. - * This method allocates and returns a new instance of ReadBackedPileup. - * @param minMapQ - * @return - */ - @Override - public ReadBackedPileup getMappingFilteredPileup( int minMapQ ) { - return getBaseAndMappingFilteredPileup(-1, minMapQ); - } - - /** - * Returns a pileup randomly downsampled to the desiredCoverage. - * - * @param desiredCoverage - * @return - */ - @Override - public ReadBackedPileup getDownsampledPileup(int desiredCoverage) { - if ( size() <= desiredCoverage ) - return this; - - // randomly choose numbers corresponding to positions in the reads list - Random generator = new Random(); - TreeSet positions = new TreeSet(); - for ( int i = 0; i < desiredCoverage; /* no update */ ) { - if ( positions.add(generator.nextInt(pileup.size())) ) - i++; - } - - Iterator positionIter = positions.iterator(); - ArrayList downsampledPileup = new ArrayList(); - - while ( positionIter.hasNext() ) { - int nextReadToKeep = (Integer)positionIter.next(); - downsampledPileup.add(pileup.get(nextReadToKeep)); - } - - return new UnifiedReadBackedPileup(getLocation(), downsampledPileup); - } - - @Override - public Collection getSamples() { - Collection sampleNames = new HashSet(); - for(PileupElement p: this) { - SAMRecord read = p.getRead(); - String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null; - sampleNames.add(sampleName); - } - return sampleNames; - } - - @Override - public ReadBackedPileup getPileupForSample(String sampleName) { - List filteredPileup = new ArrayList(); - for(PileupElement p: this) { - SAMRecord read = p.getRead(); - if(sampleName != null) { - if(read.getReadGroup() != null && sampleName.equals(read.getReadGroup().getSample())) - filteredPileup.add(p); - } - else { - if(read.getReadGroup() == null || read.getReadGroup().getSample() == null) - filteredPileup.add(p); - } - } - return filteredPileup.size()>0 ? new UnifiedReadBackedPileup(loc,filteredPileup) : null; - } - - // -------------------------------------------------------- - // - // iterators - // - // -------------------------------------------------------- - - /** - * The best way to access PileupElements where you only care about the bases and quals in the pileup. - * - * for (PileupElement p : this) { doSomething(p); } - * - * Provides efficient iteration of the data. - * - * @return - */ - @Override - public Iterator iterator() { - return pileup.iterator(); - } - - - /** - * The best way to access PileupElements where you only care not only about bases and quals in the pileup - * but also need access to the index of the pileup element in the pile. - * - * for (ExtendedPileupElement p : this) { doSomething(p); } - * - * Provides efficient iteration of the data. - * - * @return - */ - - // todo -- reimplement efficiently - // todo -- why is this public? - @Override - public IterableIterator extendedForeachIterator() { - ArrayList x = new ArrayList(size()); - int i = 0; - for ( PileupElement pile : this ) { - x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this)); - } - - return new IterableIterator(x.iterator()); - } - - /** - * Simple useful routine to count the number of deletion bases in this pileup - * - * @return - */ - @Override - public int getNumberOfDeletions() { - return nDeletions; - } - - @Override - public int getNumberOfMappingQualityZeroReads() { - return nMQ0Reads; - } - - /** - * @return the number of elements in this pileup - */ - @Override - public int size() { - return size; - } - - /** - * @return the location of this pileup - */ - @Override - public GenomeLoc getLocation() { - return loc; - } - - /** - * Get counts of A, C, G, T in order, which returns a int[4] vector with counts according - * to BaseUtils.simpleBaseToBaseIndex for each base. - * - * @return - */ - @Override - public int[] getBaseCounts() { - int[] counts = new int[4]; - for ( PileupElement pile : this ) { - // skip deletion sites - if ( ! pile.isDeletion() ) { - int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase()); - if (index != -1) - counts[index]++; - } - } - - return counts; - } - - /** - * Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated - * @return - */ - @Override - public boolean hasSecondaryBases() { - for ( PileupElement pile : this ) { - // skip deletion sites - if ( ! pile.isDeletion() && BaseUtils.isRegularBase((char)pile.getSecondBase()) ) - return true; - } - - return false; - } - - @Override - public String getPileupString(char ref) { - // In the pileup format, each line represents a genomic position, consisting of chromosome name, - // coordinate, reference base, read bases, read qualities and alignment mapping qualities. - return String.format("%s %s %c %s %s", - getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate - ref, // reference base - new String(getBases()), - getQualsString()); - } - - - // -------------------------------------------------------- - // - // Convenience functions that may be slow - // - // -------------------------------------------------------- - - /** - * Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time - * @return - */ - @Override - public List getReads() { - List reads = new ArrayList(size()); - for ( PileupElement pile : this ) { reads.add(pile.getRead()); } - return reads; - } - - /** - * Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time - * @return - */ - @Override - public List getOffsets() { - List offsets = new ArrayList(size()); - for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); } - return offsets; - } - - /** - * Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time - * @return - */ - @Override - public byte[] getBases() { - byte[] v = new byte[size()]; - for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); } - return v; - } - - /** - * Returns an array of the secondary bases in this pileup. Note this call costs O(n) and allocates fresh array each time - * @return - */ - @Override - public byte[] getSecondaryBases() { - byte[] v = new byte[size()]; - for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); } - return v; - } - - /** - * Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time - * @return - */ - @Override - public byte[] getQuals() { - byte[] v = new byte[size()]; - for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); } - return v; - } - - /** - * Get an array of the mapping qualities - * @return - */ - @Override - public byte[] getMappingQuals() { - byte[] v = new byte[size()]; - for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = (byte)pile.getRead().getMappingQuality(); } - return v; - } - - - // - // Private functions for printing pileups - // - private String getMappingQualsString() { - return quals2String(getMappingQuals()); - } - - static String quals2String( byte[] quals ) { - StringBuilder qualStr = new StringBuilder(); - for ( int qual : quals ) { - qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea - char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63 - qualStr.append(qualChar); - } - - return qualStr.toString(); - } - - private String getQualsString() { - return quals2String(getQuals()); - } - - @Override - public String toString() { - StringBuilder s = new StringBuilder(); - s.append(getLocation()); - s.append(": "); - - for ( PileupElement p : this ) { - s.append((char)p.getBase()); - } - - return s.toString(); - } -}