ReadBackedPileup in all its glory. Documented, aligned with the output of LocusIteratorByState, and caching common outputs for performance
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2165 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b44363d20a
commit
db40e28e54
|
|
@ -28,6 +28,8 @@ package org.broadinstitute.sting.gatk.contexts;
|
|||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -42,8 +44,7 @@ import java.util.*;
|
|||
*/
|
||||
public class AlignmentContext {
|
||||
protected GenomeLoc loc = null;
|
||||
protected List<SAMRecord> reads = null;
|
||||
protected List<Integer> offsets = null;
|
||||
protected ReadBackedPileup pileup = null;
|
||||
|
||||
/**
|
||||
* The number of bases we've skipped over in the reference since the last map invocation.
|
||||
|
|
@ -65,31 +66,45 @@ public class AlignmentContext {
|
|||
* @param reads
|
||||
* @param offsets
|
||||
*/
|
||||
@Deprecated
|
||||
public AlignmentContext(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets) {
|
||||
//assert loc != null;
|
||||
//assert loc.getContig() != null;
|
||||
//assert reads != null;
|
||||
//assert offsets != null;
|
||||
|
||||
this.loc = loc;
|
||||
this.reads = reads;
|
||||
this.offsets = offsets;
|
||||
this(loc, reads, offsets, 0);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public AlignmentContext(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets, long skippedBases ) {
|
||||
if ( loc == null ) throw new StingException("BUG: GenomeLoc in Alignment context is null");
|
||||
if ( skippedBases < 0 ) throw new StingException("BUG: skippedBases is -1 in Alignment context");
|
||||
|
||||
this.loc = loc;
|
||||
this.reads = reads;
|
||||
this.offsets = offsets;
|
||||
this.pileup = new ReadBackedPileup(loc, reads, offsets);
|
||||
this.skippedBases = skippedBases;
|
||||
}
|
||||
|
||||
public AlignmentContext(GenomeLoc loc, ReadBackedPileup pileup ) {
|
||||
this(loc, pileup, 0);
|
||||
}
|
||||
|
||||
|
||||
public AlignmentContext(GenomeLoc loc, ReadBackedPileup pileup, long skippedBases ) {
|
||||
if ( loc == null ) throw new StingException("BUG: GenomeLoc in Alignment context is null");
|
||||
if ( pileup == null ) throw new StingException("BUG: ReadBackedPileup in Alignment context is null");
|
||||
if ( skippedBases < 0 ) throw new StingException("BUG: skippedBases is -1 in Alignment context");
|
||||
|
||||
this.loc = loc;
|
||||
this.pileup = pileup;
|
||||
this.skippedBases = skippedBases;
|
||||
}
|
||||
|
||||
public ReadBackedPileup getPileup() { return pileup; }
|
||||
|
||||
/**
|
||||
* get all of the reads within this context
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public List<SAMRecord> getReads() { return reads; }
|
||||
@Deprecated
|
||||
public List<SAMRecord> getReads() { return pileup.getReads(); }
|
||||
|
||||
/**
|
||||
* Are there any reads associated with this locus?
|
||||
|
|
@ -97,16 +112,15 @@ public class AlignmentContext {
|
|||
* @return
|
||||
*/
|
||||
public boolean hasReads() {
|
||||
return reads != null;
|
||||
return pileup.size() > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* How many reads cover this locus?
|
||||
* @return
|
||||
*/
|
||||
public int numReads() {
|
||||
assert( reads != null );
|
||||
return reads.size();
|
||||
public int size() {
|
||||
return pileup.size();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -114,89 +128,17 @@ public class AlignmentContext {
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
@Deprecated
|
||||
public List<Integer> getOffsets() {
|
||||
return offsets;
|
||||
return pileup.getOffsets();
|
||||
}
|
||||
|
||||
public String getContig() { return getLocation().getContig(); }
|
||||
public long getPosition() { return getLocation().getStart(); }
|
||||
public GenomeLoc getLocation() { return loc; }
|
||||
|
||||
//public void setLocation(GenomeLoc loc) {
|
||||
// this.loc = loc.clone();
|
||||
//}
|
||||
|
||||
public void downsampleToCoverage(int coverage) {
|
||||
if ( numReads() <= coverage )
|
||||
return;
|
||||
|
||||
// randomly choose numbers corresponding to positions in the reads list
|
||||
Random generator = new Random();
|
||||
TreeSet positions = new TreeSet();
|
||||
int i = 0;
|
||||
while ( i < coverage ) {
|
||||
if (positions.add(new Integer(generator.nextInt(reads.size()))))
|
||||
i++;
|
||||
}
|
||||
|
||||
ArrayList<SAMRecord> downsampledReads = new ArrayList<SAMRecord>();
|
||||
ArrayList<Integer> downsampledOffsets = new ArrayList<Integer>();
|
||||
Iterator positionIter = positions.iterator();
|
||||
Iterator<SAMRecord> readsIter = reads.iterator();
|
||||
Iterator<Integer> offsetsIter = offsets.iterator();
|
||||
int currentRead = 0;
|
||||
while ( positionIter.hasNext() ) {
|
||||
int nextReadToKeep = (Integer)positionIter.next();
|
||||
|
||||
// fast-forward to the right read
|
||||
while ( currentRead < nextReadToKeep ) {
|
||||
readsIter.next();
|
||||
offsetsIter.next();
|
||||
currentRead++;
|
||||
}
|
||||
|
||||
downsampledReads.add(readsIter.next());
|
||||
downsampledOffsets.add(offsetsIter.next());
|
||||
currentRead++;
|
||||
}
|
||||
|
||||
reads = downsampledReads;
|
||||
offsets = downsampledOffsets;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns only the reads in ac that do not contain spanning deletions of this locus
|
||||
*
|
||||
* @param ac
|
||||
* @return
|
||||
*/
|
||||
public static AlignmentContext withoutSpanningDeletions( AlignmentContext ac ) {
|
||||
return subsetDeletions( ac, true );
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns only the reads in ac that do contain spanning deletions of this locus
|
||||
*
|
||||
* @param ac
|
||||
* @return
|
||||
*/
|
||||
public static AlignmentContext withSpanningDeletions( AlignmentContext ac ) {
|
||||
return subsetDeletions( ac, false );
|
||||
}
|
||||
|
||||
private static AlignmentContext subsetDeletions( AlignmentContext ac, boolean readsWithoutDeletions ) {
|
||||
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>(ac.getReads().size());
|
||||
ArrayList<Integer> offsets = new ArrayList<Integer>(ac.getReads().size());
|
||||
for ( int i = 0; i < ac.getReads().size(); i++ ) {
|
||||
SAMRecord read = ac.getReads().get(i);
|
||||
int offset = ac.getOffsets().get(i);
|
||||
if ( (offset == -1 && ! readsWithoutDeletions) || (offset != -1 && readsWithoutDeletions) ) {
|
||||
reads.add(read);
|
||||
offsets.add(offset);
|
||||
}
|
||||
}
|
||||
|
||||
return new AlignmentContext(ac.getLocation(), reads, offsets);
|
||||
pileup = pileup.getDownsampledPileup(coverage);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -32,6 +32,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -217,8 +219,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
// printState();
|
||||
//}
|
||||
|
||||
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>(readStates.size());
|
||||
ArrayList<Integer> offsets = new ArrayList<Integer>(readStates.size());
|
||||
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
|
||||
|
||||
// keep iterating forward until we encounter a reference position that has something "real" hanging over it
|
||||
// (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true)
|
||||
|
|
@ -229,11 +230,9 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
for ( SAMRecordState state : readStates ) {
|
||||
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||
// System.out.println("Location: "+getLocation()+"; Read "+state.getRead().getReadName()+"; offset="+state.getReadOffset());
|
||||
reads.add(state.getRead());
|
||||
offsets.add(state.getReadOffset());
|
||||
pile.add(new PileupElement(state.getRead(), state.getReadOffset()));
|
||||
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||
reads.add(state.getRead());
|
||||
offsets.add(-1);
|
||||
pile.add(new PileupElement(state.getRead(), -1));
|
||||
}
|
||||
}
|
||||
GenomeLoc loc = getLocation();
|
||||
|
|
@ -245,10 +244,49 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
// printState();
|
||||
//}
|
||||
// if we got reads with non-D/N over the current position, we are done
|
||||
if ( reads.size() != 0 ) return new AlignmentContext(loc, reads, offsets);
|
||||
if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile));
|
||||
}
|
||||
}
|
||||
|
||||
// old implementation -- uses lists of reads and offsets
|
||||
// public AlignmentContext next() {
|
||||
// //if (DEBUG) {
|
||||
// // logger.debug("in Next:");
|
||||
// // printState();
|
||||
// //}
|
||||
//
|
||||
// ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>(readStates.size());
|
||||
// ArrayList<Integer> offsets = new ArrayList<Integer>(readStates.size());
|
||||
//
|
||||
// // keep iterating forward until we encounter a reference position that has something "real" hanging over it
|
||||
// // (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true)
|
||||
// while(true) {
|
||||
// collectPendingReads(readInfo.getMaxReadsAtLocus());
|
||||
//
|
||||
// // todo -- performance problem -- should be lazy, really
|
||||
// for ( SAMRecordState state : readStates ) {
|
||||
// if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||
//// System.out.println("Location: "+getLocation()+"; Read "+state.getRead().getReadName()+"; offset="+state.getReadOffset());
|
||||
// reads.add(state.getRead());
|
||||
// offsets.add(state.getReadOffset());
|
||||
// } else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||
// reads.add(state.getRead());
|
||||
// offsets.add(-1);
|
||||
// }
|
||||
// }
|
||||
// GenomeLoc loc = getLocation();
|
||||
//
|
||||
// updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||
//
|
||||
// //if (DEBUG) {
|
||||
// // logger.debug("DONE WITH NEXT, updating read states, current state is:");
|
||||
// // printState();
|
||||
// //}
|
||||
// // if we got reads with non-D/N over the current position, we are done
|
||||
// if ( reads.size() != 0 ) return new AlignmentContext(loc, reads, offsets);
|
||||
// }
|
||||
// }
|
||||
|
||||
private void collectPendingReads(final int maximumPileupSize) {
|
||||
//if (DEBUG) {
|
||||
// logger.debug(String.format("entering collectPendingReads..., hasNext=%b", it.hasNext()));
|
||||
|
|
|
|||
|
|
@ -10,8 +10,10 @@ import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -40,32 +42,32 @@ public class TraverseLocusWindows extends TraversalEngine {
|
|||
LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||
ReferenceOrderedView referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
|
||||
|
||||
AlignmentContext locus = getLocusContext(readView.iterator(), interval);
|
||||
Pair<GenomeLoc, List<SAMRecord>> locus = getLocusContext(readView.iterator(), interval);
|
||||
|
||||
// The TraverseByLocusWindow expands intervals to cover all reads in a non-standard way.
|
||||
// TODO: Convert this approach to the standard.
|
||||
GenomeLoc expandedInterval = locus.getLocation();
|
||||
GenomeLoc expandedInterval = locus.getFirst();
|
||||
|
||||
String referenceSubsequence = new String(referenceView.getReferenceBases(expandedInterval));
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this interval
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation());
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getFirst());
|
||||
|
||||
//
|
||||
// Execute our contract with the walker. Call filter, map, and reduce
|
||||
//
|
||||
final boolean keepMeP = locusWindowWalker.filter(tracker, referenceSubsequence, locus);
|
||||
if (keepMeP) {
|
||||
M x = locusWindowWalker.map(tracker, referenceSubsequence, locus);
|
||||
sum = locusWindowWalker.reduce(x, sum);
|
||||
}
|
||||
//final boolean keepMeP = locusWindowWalker.filter(tracker, referenceSubsequence, locus);
|
||||
//if (keepMeP) {
|
||||
M x = locusWindowWalker.map(tracker, referenceSubsequence, locus.getFirst(), locus.getSecond());
|
||||
sum = locusWindowWalker.reduce(x, sum);
|
||||
//}
|
||||
|
||||
printProgress(LOCUS_WINDOW_STRING, locus.getLocation());
|
||||
printProgress(LOCUS_WINDOW_STRING, locus.getFirst());
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
private AlignmentContext getLocusContext(StingSAMIterator readIter, GenomeLoc interval) {
|
||||
private Pair<GenomeLoc, List<SAMRecord>> getLocusContext(StingSAMIterator readIter, GenomeLoc interval) {
|
||||
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
boolean done = false;
|
||||
long leftmostIndex = interval.getStart(),
|
||||
|
|
@ -86,11 +88,11 @@ public class TraverseLocusWindows extends TraversalEngine {
|
|||
}
|
||||
|
||||
GenomeLoc window = GenomeLocParser.createGenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex);
|
||||
AlignmentContext locus = new AlignmentContext(window, reads, null);
|
||||
if ( readIter.getSourceInfo().getDownsampleToCoverage() != null )
|
||||
locus.downsampleToCoverage(readIter.getSourceInfo().getDownsampleToCoverage());
|
||||
// AlignmentContext locus = new AlignmentContext(window, reads, null);
|
||||
// if ( readIter.getSourceInfo().getDownsampleToCoverage() != null )
|
||||
// locus.downsampleToCoverage(readIter.getSourceInfo().getDownsampleToCoverage());
|
||||
|
||||
return locus;
|
||||
return new Pair<GenomeLoc, List<SAMRecord>>(window, reads);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -2,6 +2,10 @@ package org.broadinstitute.sting.gatk.walkers;
|
|||
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -12,13 +16,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
*/
|
||||
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
||||
public abstract class LocusWindowWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(RefMetaDataTracker tracker, String ref, AlignmentContext context) {
|
||||
return true; // We are keeping all the intervals
|
||||
}
|
||||
|
||||
// Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
|
||||
public abstract MapType map(RefMetaDataTracker tracker, String ref, AlignmentContext context);
|
||||
public abstract MapType map(RefMetaDataTracker tracker, String ref, GenomeLoc loc, List<SAMRecord> reads);
|
||||
|
||||
// Given result of map function
|
||||
public abstract ReduceType reduceInit();
|
||||
|
|
|
|||
|
|
@ -66,14 +66,14 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
|||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
|
||||
String secondBasePileup = "";
|
||||
if(shouldShowSecondaryBasePileup(pileup))
|
||||
secondBasePileup = getSecondBasePileup(pileup);
|
||||
String rods = getReferenceOrderedData( tracker );
|
||||
|
||||
out.printf("%s%s %s%n", pileup.getPileupString(qualsAsInts), secondBasePileup, rods);
|
||||
out.printf("%s%s %s%n", pileup.getPileupString(ref.getBase(), qualsAsInts), secondBasePileup, rods);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,11 +25,11 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
|
|||
public boolean CONTINUE_AFTER_AN_ERROR = false;
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
SAMPileupRecord truePileup = getTruePileup( tracker );
|
||||
|
||||
if ( truePileup == null ) {
|
||||
out.printf("No truth pileup data available at %s%n", pileup.getPileupString(false));
|
||||
out.printf("No truth pileup data available at %s%n", pileup.getPileupString(ref.getBase(), false));
|
||||
if ( ! CONTINUE_AFTER_AN_ERROR ) {
|
||||
Utils.scareUser(String.format("No pileup data available at %s given GATK's output of %s -- this walker requires samtools pileup data over all bases",
|
||||
context.getLocation(), new String(pileup.getBases())));
|
||||
|
|
@ -37,7 +37,7 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
|
|||
} else {
|
||||
String pileupDiff = pileupDiff(pileup, truePileup, true);
|
||||
if ( pileupDiff != null ) {
|
||||
out.printf("%s vs. %s%n", pileup.getPileupString(true), truePileup.getPileupString());
|
||||
out.printf("%s vs. %s%n", pileup.getPileupString(ref.getBase(), true), truePileup.getPileupString());
|
||||
if ( ! CONTINUE_AFTER_AN_ERROR ) {
|
||||
throw new RuntimeException(String.format("Pileups aren't equal: %s", pileupDiff));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class SecondBaseSkew implements VariantAnnotation {
|
|||
|
||||
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileupWithDel, Variation variation, List<Genotype> genotypes) {
|
||||
ReadBackedPileup pileup = pileupWithDel; // .getPileupWithoutDeletions();
|
||||
Pair<Integer,Double> depthProp = getSecondaryPileupNonrefEstimator(pileup,genotypes);
|
||||
Pair<Integer,Double> depthProp = getSecondaryPileupNonrefEstimator(ref.getBase(), pileup,genotypes);
|
||||
if ( depthProp == null ) {
|
||||
return null;
|
||||
} else {
|
||||
|
|
@ -48,10 +48,10 @@ public class SecondBaseSkew implements VariantAnnotation {
|
|||
return proportion / ( Math.sqrt ( proportion*(1-proportion)/depth ) );
|
||||
}
|
||||
|
||||
private Pair<Integer, Double> getSecondaryPileupNonrefEstimator(ReadBackedPileup p, List<Genotype> genotypes) {
|
||||
private Pair<Integer, Double> getSecondaryPileupNonrefEstimator(char ref, ReadBackedPileup p, List<Genotype> genotypes) {
|
||||
char snp;
|
||||
try {
|
||||
snp = getNonref(genotypes, p.getRef());
|
||||
snp = getNonref(genotypes, ref);
|
||||
} catch ( IllegalStateException e ) {
|
||||
// tri-allelic site
|
||||
// System.out.println("Illegal State Exception caught at "+p.getLocation().toString()+" 2bb skew annotation suppressed ("+e.getLocalizedMessage()+")");
|
||||
|
|
@ -67,7 +67,7 @@ public class SecondBaseSkew implements VariantAnnotation {
|
|||
|
||||
if ( BaseUtils.isRegularBase((char)sbase) && BaseUtils.basesAreEqual(pbase, (byte) snp) ) {
|
||||
variantDepth++;
|
||||
if ( BaseUtils.basesAreEqual(sbase, (byte)p.getRef()) ) {
|
||||
if ( BaseUtils.basesAreEqual(sbase, (byte)ref) ) {
|
||||
variantsWithRefSecondBase++;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -187,8 +187,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
public static Map<String, String> getAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation, List<Genotype> genotypes, Collection<VariantAnnotation> annotations) {
|
||||
|
||||
// set up the pileup for the full collection of reads at this position
|
||||
ReadBackedPileup fullPileup = new ReadBackedPileup(ref.getBase(), context);
|
||||
ReadBackedPileup fullPileup = context.getPileup();
|
||||
|
||||
// todo -- reimplement directly using ReadBackedPileups, which is vastly more efficient
|
||||
// also, set up the pileup for the mapping-quality-zero-free context
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
List<Integer> offsets = context.getOffsets();
|
||||
|
|
@ -204,7 +205,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
MQ0freeOffsets.add(offset);
|
||||
}
|
||||
}
|
||||
ReadBackedPileup MQ0freePileup = new ReadBackedPileup(context.getLocation(), ref.getBase(), MQ0freeReads, MQ0freeOffsets);
|
||||
ReadBackedPileup MQ0freePileup = new ReadBackedPileup(context.getLocation(), MQ0freeReads, MQ0freeOffsets);
|
||||
|
||||
HashMap<String, String> results = new HashMap<String, String>();
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
int index = 0;
|
||||
for ( String sample : contexts.keySet() ) {
|
||||
AlignmentContextBySample context = contexts.get(sample);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
|
||||
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
|
||||
|
||||
// create the GenotypeLikelihoods object
|
||||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||
|
|
@ -82,7 +82,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc);
|
||||
|
||||
if ( call instanceof ReadBacked ) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(sample).getContext(StratifiedContext.OVERALL));
|
||||
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedContext.OVERALL).getPileup();
|
||||
((ReadBacked)call).setPileup(pileup);
|
||||
}
|
||||
if ( call instanceof SampleBacked ) {
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
|||
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
|
||||
|
||||
if ( call instanceof ReadBacked ) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(sample).getContext(StratifiedContext.OVERALL));
|
||||
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedContext.OVERALL).getPileup();
|
||||
((ReadBacked)call).setPileup(pileup);
|
||||
}
|
||||
if ( call instanceof SampleBacked ) {
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
private Pair<ReadBackedPileup, GenotypeLikelihoods> getSingleSampleLikelihoods(char ref, AlignmentContextBySample sampleContext, DiploidGenotypePriors priors, StratifiedContext contextType) {
|
||||
// create the pileup
|
||||
AlignmentContext myContext = sampleContext.getContext(contextType);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, myContext);
|
||||
ReadBackedPileup pileup = myContext.getPileup();
|
||||
|
||||
// create the GenotypeLikelihoods object
|
||||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||
|
|
@ -132,7 +132,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
|
||||
for ( String sample : contexts.keySet() ) {
|
||||
AlignmentContextBySample context = contexts.get(sample);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
|
||||
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
|
||||
|
||||
// create the GenotypeLikelihoods object
|
||||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, AFPriors, defaultPlatform);
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
|
|||
protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int nChromosomes, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
||||
|
||||
AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
|
||||
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
|
||||
|
||||
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref);
|
||||
int altIndex = BaseUtils.simpleBaseToBaseIndex(alt);
|
||||
|
|
|
|||
|
|
@ -106,8 +106,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
}
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, String ref, AlignmentContext context) {
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
public Integer map(RefMetaDataTracker tracker, String ref, GenomeLoc loc, List<SAMRecord> reads) {
|
||||
ArrayList<SAMRecord> goodReads = new ArrayList<SAMRecord>();
|
||||
for ( SAMRecord read : reads ) {
|
||||
if ( !read.getReadUnmappedFlag() &&
|
||||
|
|
@ -121,7 +120,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
readsToWrite.add(new ComparableSAMRecord(read));
|
||||
}
|
||||
|
||||
clean(goodReads, ref, context.getLocation());
|
||||
clean(goodReads, ref, loc);
|
||||
//bruteForceClean(goodReads, ref, context.getLocation().getStart());
|
||||
//testCleanWithDeletion();
|
||||
//testCleanWithInsertion();
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
|
|||
}
|
||||
|
||||
public Set<BaseTransitionTable> map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(),context);
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
Set<BaseTransitionTable> newCounts = null;
|
||||
//System.out.println(pileup.getBases());
|
||||
if ( baseIsUsable(tracker, ref, pileup, context) ) {
|
||||
|
|
@ -279,7 +279,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
|
|||
}
|
||||
|
||||
if ( usePileupMismatches ) {
|
||||
conditions.add(countMismatches(pileup));
|
||||
conditions.add(countMismatches(ref.getBase(), pileup));
|
||||
}
|
||||
|
||||
if ( useReadGroup ) {
|
||||
|
|
@ -331,8 +331,8 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
|
|||
return String.format("%s\t%s%n",header,"Counts");
|
||||
}
|
||||
|
||||
public int countMismatches(ReadBackedPileup p) {
|
||||
int refM = p.getBaseCounts()[BaseUtils.simpleBaseToBaseIndex(p.getRef())];
|
||||
public int countMismatches(char ref, ReadBackedPileup p) {
|
||||
int refM = p.getBaseCounts()[BaseUtils.simpleBaseToBaseIndex(ref)];
|
||||
return p.size()-refM;
|
||||
}
|
||||
|
||||
|
|
@ -345,7 +345,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
|
|||
}
|
||||
|
||||
public boolean pileupBelowMismatchThreshold( ReferenceContext ref, ReadBackedPileup pileup ) {
|
||||
return countMismatches(pileup) <= maxNumMismatches;
|
||||
return countMismatches(ref.getBase(), pileup) <= maxNumMismatches;
|
||||
}
|
||||
|
||||
public boolean pileupContainsNoNs(ReadBackedPileup pileup) {
|
||||
|
|
|
|||
|
|
@ -318,7 +318,7 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
GenomeLoc Gloc = context.getLocation();
|
||||
|
||||
//Create pileup of reads at this locus
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
|
||||
long loc = context.getPosition();
|
||||
if( context.getReads().size() > 0 ) {
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker<String, PrintWriter
|
|||
}
|
||||
int numVariantAllele = alleleFreqInfo.getSecond().getFirst();
|
||||
int numChipsObserved = alleleFreqInfo.getSecond().getSecond();
|
||||
int depth = context.numReads();
|
||||
int depth = context.size();
|
||||
double power = powerWalker.calculatePowerAtFrequency(context,numVariantAllele);
|
||||
int called;
|
||||
Variation call = (Variation) tracker.lookup("calls",null);
|
||||
|
|
@ -89,7 +89,7 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker<String, PrintWriter
|
|||
called = 1;
|
||||
}
|
||||
|
||||
ReadBackedPileup p = new ReadBackedPileup(ref.getBase(),context);
|
||||
ReadBackedPileup p = context.getPileup();
|
||||
int support = p.getBaseCounts()[BaseUtils.simpleBaseToBaseIndex(alternate)];
|
||||
|
||||
// sanity check
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ public class MinimumNQSWalker extends LocusWalker<Pair<List<Pair<Integer,Integer
|
|||
ArrayList<Pair<Integer,Integer>> matchingQualityNQSPairs = new ArrayList<Pair<Integer,Integer>>();
|
||||
ArrayList<Pair<Integer,Integer>> mismatchingQualityNQSPairs = new ArrayList<Pair<Integer,Integer>>();
|
||||
if ( (Variation) tracker.lookup("dbsnp",null) == null ) {
|
||||
for ( int r = 0; r < context.numReads(); r ++ ) {
|
||||
for ( int r = 0; r < context.size(); r ++ ) {
|
||||
SAMRecord read = context.getReads().get(r);
|
||||
int offset = context.getOffsets().get(r);
|
||||
int quality = read.getBaseQualities()[offset];
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
|
|||
int altCount = 0;
|
||||
int totalCount = 0;
|
||||
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
|
||||
|
||||
for (byte base : pileup.getBases() ) {
|
||||
|
|
@ -108,7 +108,7 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
|
|||
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
|
||||
String colName = String.format("%s.%d", context.getContig(), context.getPosition());
|
||||
|
||||
for (int i = 0; i < context.numReads(); i++) {
|
||||
for (int i = 0; i < context.size(); i++) {
|
||||
SAMRecord read = context.getReads().get(i);
|
||||
int offset = context.getOffsets().get(i);
|
||||
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> impleme
|
|||
public SimpleCall map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (ref.getBase() == 'N' || ref.getBase() == 'n') return null; // we don't deal with the N ref base case
|
||||
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(context.getLocation(), ref.getBase(), context.getReads(), context.getOffsets());
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
double likelihoods[] = DiploidGenotypePriors.getReferencePolarizedPriors(ref.getBase(),
|
||||
DiploidGenotypePriors.HUMAN_HETEROZYGOSITY,
|
||||
DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ public class PowerBelowFrequencyWalker extends LocusWalker<Integer,Integer> {
|
|||
}
|
||||
|
||||
public double calculatePowerAtFrequency( AlignmentContext context, int alleles ) {
|
||||
return theoreticalPower( context.numReads(), getMeanQ(context), alleles, lodThresh );
|
||||
return theoreticalPower( context.size(), getMeanQ(context), alleles, lodThresh );
|
||||
}
|
||||
|
||||
public byte getMeanQ( AlignmentContext context ) {
|
||||
|
|
@ -126,7 +126,7 @@ public class PowerBelowFrequencyWalker extends LocusWalker<Integer,Integer> {
|
|||
}
|
||||
|
||||
public double expectedMatchRate(AlignmentContext context) {
|
||||
int nReads = context.numReads();
|
||||
int nReads = context.size();
|
||||
double matches = 0.0;
|
||||
for ( int r = 0; r < nReads; r ++ ) {
|
||||
matches += QualityUtils.qualToProb(context.getReads().get(r).getBaseQualities()[context.getOffsets().get(r)]);
|
||||
|
|
|
|||
|
|
@ -18,8 +18,11 @@ import java.util.Random;
|
|||
abstract public class BasicPileup {
|
||||
public static final char DELETION_CHAR = 'D';
|
||||
|
||||
@Deprecated
|
||||
abstract GenomeLoc getLocation();
|
||||
@Deprecated
|
||||
abstract char getRef();
|
||||
@Deprecated
|
||||
abstract int size();
|
||||
|
||||
|
||||
|
|
@ -28,6 +31,7 @@ abstract public class BasicPileup {
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
@Deprecated
|
||||
byte[] getBases() { return null; }
|
||||
|
||||
/**
|
||||
|
|
@ -35,6 +39,7 @@ abstract public class BasicPileup {
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
@Deprecated
|
||||
byte[] getQuals() { return null; }
|
||||
|
||||
/**
|
||||
|
|
@ -57,50 +62,6 @@ abstract public class BasicPileup {
|
|||
return String.format("%s: %s %s %s", getLocation(), getRef(), getBasesAsString(), getQualsAsString());
|
||||
}
|
||||
|
||||
public static String basePileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
StringBuilder bases = new StringBuilder();
|
||||
for ( byte base : getBasesAsArrayList(reads, offsets)) {
|
||||
bases.append((char)base);
|
||||
}
|
||||
return bases.toString();
|
||||
}
|
||||
|
||||
public static String baseWithStrandPileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
StringBuilder bases = new StringBuilder();
|
||||
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
SAMRecord read = reads.get(i);
|
||||
int offset = offsets.get(i);
|
||||
|
||||
char base;
|
||||
if ( offset == -1 ) {
|
||||
base = DELETION_CHAR;
|
||||
} else {
|
||||
base = (char) read.getReadBases()[offset];
|
||||
}
|
||||
|
||||
base = Character.toUpperCase(base);
|
||||
if (read.getReadNegativeStrandFlag()) {
|
||||
base = Character.toLowerCase(base);
|
||||
}
|
||||
|
||||
bases.append(base);
|
||||
}
|
||||
|
||||
return bases.toString();
|
||||
}
|
||||
|
||||
//
|
||||
// byte[] methods
|
||||
//
|
||||
public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
return getBasesAsArray(reads,offsets);
|
||||
}
|
||||
|
||||
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
return getQualsAsArray( reads, offsets );
|
||||
}
|
||||
|
||||
//
|
||||
// ArrayList<Byte> methods
|
||||
//
|
||||
|
|
@ -119,7 +80,7 @@ abstract public class BasicPileup {
|
|||
return array;
|
||||
}
|
||||
|
||||
|
||||
@Deprecated
|
||||
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size());
|
||||
for (byte value : getBasesAsArray(reads, offsets))
|
||||
|
|
@ -127,6 +88,7 @@ abstract public class BasicPileup {
|
|||
return bases;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
|
||||
for (byte value : getQualsAsArray(reads, offsets))
|
||||
|
|
@ -134,6 +96,7 @@ abstract public class BasicPileup {
|
|||
return quals;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public static byte[] getQualsAsArray( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
byte array[] = new byte[reads.size()];
|
||||
int index = 0;
|
||||
|
|
@ -151,6 +114,7 @@ abstract public class BasicPileup {
|
|||
return array;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public static ArrayList<Byte> mappingQualPileup( List<SAMRecord> reads) {
|
||||
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
|
|
@ -161,73 +125,6 @@ abstract public class BasicPileup {
|
|||
return quals;
|
||||
}
|
||||
|
||||
public static String mappingQualPileupAsString( List<SAMRecord> reads) {
|
||||
return quals2String(mappingQualPileup(reads));
|
||||
}
|
||||
|
||||
public static String quals2String( List<Byte> quals ) {
|
||||
StringBuilder qualStr = new StringBuilder();
|
||||
for ( int qual : quals ) {
|
||||
qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea
|
||||
char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63
|
||||
qualStr.append(qualChar);
|
||||
}
|
||||
|
||||
return qualStr.toString();
|
||||
}
|
||||
|
||||
public static String qualPileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
return quals2String(getQualsAsArrayList(reads, offsets));
|
||||
}
|
||||
|
||||
|
||||
public static ArrayList<Byte> getSecondaryBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
ArrayList<Byte> bases2 = new ArrayList<Byte>(reads.size());
|
||||
boolean hasAtLeastOneSQorE2Field = false;
|
||||
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
SAMRecord read = reads.get(i);
|
||||
int offset = offsets.get(i);
|
||||
byte base2 = BaseUtils.getSecondBase(read, offset);
|
||||
hasAtLeastOneSQorE2Field = hasAtLeastOneSQorE2Field || BaseUtils.simpleBaseToBaseIndex((char)base2) != -1;
|
||||
bases2.add(base2);
|
||||
}
|
||||
|
||||
return (hasAtLeastOneSQorE2Field ? bases2 : null);
|
||||
}
|
||||
|
||||
public static String getSecondaryBasePileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
StringBuilder bases2 = new StringBuilder();
|
||||
ArrayList<Byte> sbases = getSecondaryBasesAsArrayList(reads, offsets);
|
||||
|
||||
if (sbases == null) { return null; }
|
||||
|
||||
ArrayList<Byte> pbases = getBasesAsArrayList(reads, offsets);
|
||||
|
||||
//Random generator = new Random();
|
||||
|
||||
if ( sbases.size() != pbases.size() ) {
|
||||
throw new StingException("BUG in conversion of secondary bases: primary and secondary base vectors are different sizes!");
|
||||
}
|
||||
|
||||
for (int pileupIndex = 0; pileupIndex < sbases.size(); pileupIndex++) {
|
||||
byte pbase = pbases.get(pileupIndex);
|
||||
byte sbase = sbases.get(pileupIndex);
|
||||
|
||||
if ( sbase == pbase ) {
|
||||
throw new StingException("BUG in conversion of secondary bases!");
|
||||
}
|
||||
|
||||
// while (sbase == pbase) { // TODO why is here?
|
||||
// sbase = (byte) BaseUtils.baseIndexToSimpleBase(generator.nextInt(4));
|
||||
// }
|
||||
|
||||
bases2.append((char) sbase);
|
||||
}
|
||||
|
||||
return bases2.toString();
|
||||
}
|
||||
|
||||
@Deprecated // todo -- delete me
|
||||
public static String[] indelPileup( List<SAMRecord> reads, List<Integer> offsets )
|
||||
{
|
||||
|
|
|
|||
|
|
@ -8,9 +8,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
|
|
@ -21,26 +19,74 @@ import net.sf.samtools.SAMRecord;
|
|||
*/
|
||||
public class ReadBackedPileup implements Iterable<PileupElement> {
|
||||
private GenomeLoc loc = null;
|
||||
private char ref = 0;
|
||||
private ArrayList<PileupElement> pileup = null;
|
||||
|
||||
private int size = 0; // cached value of the size of the pileup
|
||||
private int nDeletions = 0; // cached value of the number of deletions
|
||||
private int[] counts = new int[4]; // cached value of counts
|
||||
|
||||
public ReadBackedPileup(char ref, AlignmentContext context ) {
|
||||
this(context.getLocation(), ref, context.getReads(), context.getOffsets());
|
||||
/**
|
||||
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
|
||||
* offsets. This pileup will contain a list, in order of the reads, of the piled bases at
|
||||
* reads[i] for all i in offsets. Does not make a copy of the data, so it's not safe to
|
||||
* go changing the reads.
|
||||
*
|
||||
* @param loc
|
||||
* @param reads
|
||||
* @param offsets
|
||||
*/
|
||||
public ReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
this(loc, readsOffsets2Pileup(reads, offsets));
|
||||
}
|
||||
|
||||
public ReadBackedPileup(GenomeLoc loc, char ref, List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
this(loc, ref, readsOffsets2Pileup(reads, offsets));
|
||||
}
|
||||
|
||||
public ReadBackedPileup(GenomeLoc loc, char ref, ArrayList<PileupElement> pileup ) {
|
||||
/**
|
||||
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
|
||||
* offsets. This lower level constructure assumes pileup is well-formed and merely keeps a
|
||||
* pointer to pileup. Don't go changing the data in pileup.
|
||||
*
|
||||
*/
|
||||
public ReadBackedPileup(GenomeLoc loc, ArrayList<PileupElement> pileup ) {
|
||||
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2");
|
||||
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2");
|
||||
|
||||
this.loc = loc;
|
||||
this.ref = ref;
|
||||
this.pileup = pileup;
|
||||
|
||||
calculatedCachedData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
|
||||
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
|
||||
* sizes, nDeletion, etc. over and over potentially.
|
||||
*/
|
||||
private void calculatedCachedData() {
|
||||
size = 0;
|
||||
nDeletions = 0;
|
||||
counts[0] = 0; counts[1] = 0; counts[2] = 0; counts[3] = 0;
|
||||
|
||||
for ( PileupElement p : this ) {
|
||||
size++;
|
||||
if ( p.isDeletion() ) {
|
||||
nDeletions++;
|
||||
} else {
|
||||
int index = BaseUtils.simpleBaseToBaseIndex((char)p.getBase());
|
||||
if (index == -1)
|
||||
continue;
|
||||
counts[index]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Helper routine for converting reads and offset lists to a PileupElement list.
|
||||
*
|
||||
* @param reads
|
||||
* @param offsets
|
||||
* @return
|
||||
*/
|
||||
private static ArrayList<PileupElement> readsOffsets2Pileup(List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
if ( reads == null ) throw new StingException("Illegal null read list in ReadBackedPileup2");
|
||||
if ( offsets == null ) throw new StingException("Illegal null offsets list in ReadBackedPileup2");
|
||||
|
|
@ -54,25 +100,19 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
return pileup;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// iterators
|
||||
// Special 'constructors'
|
||||
//
|
||||
public Iterator<PileupElement> iterator() {
|
||||
return pileup.iterator();
|
||||
}
|
||||
|
||||
// todo -- reimplement efficiently
|
||||
public IterableIterator<ExtendedPileupElement> extendedForeachIterator() {
|
||||
ArrayList<ExtendedPileupElement> x = new ArrayList<ExtendedPileupElement>(size());
|
||||
int i = 0;
|
||||
for ( PileupElement pile : this ) {
|
||||
x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this));
|
||||
}
|
||||
|
||||
return new IterableIterator<ExtendedPileupElement>(x.iterator());
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this
|
||||
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
|
||||
* of the pileup (just returns this) if there are no deletions in the pileup.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public ReadBackedPileup getPileupWithoutDeletions() {
|
||||
// todo -- fixme
|
||||
if ( getNumberOfDeletions() > 0 ) { // todo -- remember number of deletions
|
||||
|
|
@ -85,80 +125,144 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
newOffsets.add(getOffsets().get(i));
|
||||
}
|
||||
}
|
||||
return new ReadBackedPileup(loc, ref, newReads, newOffsets);
|
||||
return new ReadBackedPileup(loc, newReads, newOffsets);
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
public int getNumberOfDeletions() {
|
||||
int n = 0;
|
||||
/**
|
||||
* Returns a pileup randomly downsampled to the desiredCoverage.
|
||||
*
|
||||
* @param desiredCoverage
|
||||
* @return
|
||||
*/
|
||||
public ReadBackedPileup getDownsampledPileup(int desiredCoverage) {
|
||||
if ( size() <= desiredCoverage )
|
||||
return this;
|
||||
|
||||
for ( int i = 0; i < size(); i++ ) {
|
||||
if ( getOffsets().get(i) != -1 ) { n++; }
|
||||
// randomly choose numbers corresponding to positions in the reads list
|
||||
Random generator = new Random();
|
||||
TreeSet<Integer> positions = new TreeSet<Integer>();
|
||||
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
|
||||
if ( positions.add(generator.nextInt(pileup.size())) )
|
||||
i++;
|
||||
}
|
||||
|
||||
return n;
|
||||
Iterator positionIter = positions.iterator();
|
||||
ArrayList<PileupElement> downsampledPileup = new ArrayList<PileupElement>();
|
||||
|
||||
while ( positionIter.hasNext() ) {
|
||||
int nextReadToKeep = (Integer)positionIter.next();
|
||||
downsampledPileup.add(pileup.get(nextReadToKeep));
|
||||
}
|
||||
|
||||
return new ReadBackedPileup(getLocation(), downsampledPileup);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// iterators
|
||||
//
|
||||
// --------------------------------------------------------
|
||||
|
||||
/**
|
||||
* The best way to access PileupElements where you only care about the bases and quals in the pileup.
|
||||
*
|
||||
* for (PileupElement p : this) { doSomething(p); }
|
||||
*
|
||||
* Provides efficient iteration of the data.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Iterator<PileupElement> iterator() {
|
||||
return pileup.iterator();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The best way to access PileupElements where you only care not only about bases and quals in the pileup
|
||||
* but also need access to the index of the pileup element in the pile.
|
||||
*
|
||||
* for (ExtendedPileupElement p : this) { doSomething(p); }
|
||||
*
|
||||
* Provides efficient iteration of the data.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
|
||||
// todo -- reimplement efficiently
|
||||
public IterableIterator<ExtendedPileupElement> extendedForeachIterator() {
|
||||
ArrayList<ExtendedPileupElement> x = new ArrayList<ExtendedPileupElement>(size());
|
||||
int i = 0;
|
||||
for ( PileupElement pile : this ) {
|
||||
x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this));
|
||||
}
|
||||
|
||||
return new IterableIterator<ExtendedPileupElement>(x.iterator());
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple useful routine to count the number of deletion bases in this pileup
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getNumberOfDeletions() {
|
||||
return nDeletions;
|
||||
}
|
||||
|
||||
// public int getNumberOfDeletions() {
|
||||
// int n = 0;
|
||||
//
|
||||
// for ( int i = 0; i < size(); i++ ) {
|
||||
// if ( getOffsets().get(i) != -1 ) { n++; }
|
||||
// }
|
||||
//
|
||||
// return n;
|
||||
// }
|
||||
|
||||
// todo -- optimize me
|
||||
/**
|
||||
* @return the number of elements in this pileup
|
||||
*/
|
||||
public int size() {
|
||||
return pileup.size();
|
||||
return size;
|
||||
//return pileup.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the location of this pileup
|
||||
*/
|
||||
public GenomeLoc getLocation() {
|
||||
return loc;
|
||||
}
|
||||
|
||||
public char getRef() {
|
||||
return ref;
|
||||
}
|
||||
|
||||
public List<SAMRecord> getReads() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
|
||||
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
|
||||
return reads;
|
||||
}
|
||||
|
||||
public List<Integer> getOffsets() {
|
||||
List<Integer> offsets = new ArrayList<Integer>(size());
|
||||
for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); }
|
||||
return offsets;
|
||||
}
|
||||
|
||||
public byte[] getBases() {
|
||||
byte[] v = new byte[size()];
|
||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); }
|
||||
return v;
|
||||
}
|
||||
|
||||
public byte[] getSecondaryBases() {
|
||||
byte[] v = new byte[size()];
|
||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); }
|
||||
return v;
|
||||
}
|
||||
|
||||
public byte[] getQuals() {
|
||||
byte[] v = new byte[size()];
|
||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); }
|
||||
return v;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get counts of A, C, G, T in order, which returns a int[4] vector with counts according
|
||||
* to BaseUtils.simpleBaseToBaseIndex for each base.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int[] getBaseCounts() {
|
||||
int[] counts = new int[4];
|
||||
for ( PileupElement pile : this ) {
|
||||
// skip deletion sites
|
||||
if ( ! pile.isDeletion() ) {
|
||||
char base = Character.toUpperCase((char)(pile.getBase()));
|
||||
if (BaseUtils.simpleBaseToBaseIndex(base) == -1)
|
||||
continue;
|
||||
counts[BaseUtils.simpleBaseToBaseIndex(base)]++;
|
||||
}
|
||||
}
|
||||
|
||||
return counts;
|
||||
// int[] counts = new int[4];
|
||||
// for ( PileupElement pile : this ) {
|
||||
// // skip deletion sites
|
||||
// if ( ! pile.isDeletion() ) {
|
||||
// int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase());
|
||||
// if (index == -1)
|
||||
// continue;
|
||||
// counts[index]++;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// return counts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated
|
||||
* @return
|
||||
*/
|
||||
public boolean hasSecondaryBases() {
|
||||
for ( PileupElement pile : this ) {
|
||||
// skip deletion sites
|
||||
|
|
@ -169,14 +273,69 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
return false;
|
||||
}
|
||||
|
||||
public String getPileupString(boolean qualsAsInts) {
|
||||
public String getPileupString(char ref, boolean qualsAsInts) {
|
||||
// In the pileup format, each line represents a genomic position, consisting of chromosome name,
|
||||
// coordinate, reference base, read bases, read qualities and alignment mapping qualities.
|
||||
|
||||
//return String.format("%s %s %s %s", getLocation(), getRef(), getBases(), getQuals());
|
||||
return String.format("%s %s %s %s",
|
||||
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
|
||||
getRef(), // reference base
|
||||
ref, // reference base
|
||||
new String(getBases()));
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
// Convenience functions that may be slow
|
||||
//
|
||||
// --------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
* @return
|
||||
*/
|
||||
public List<SAMRecord> getReads() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
|
||||
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
|
||||
return reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
* @return
|
||||
*/
|
||||
public List<Integer> getOffsets() {
|
||||
List<Integer> offsets = new ArrayList<Integer>(size());
|
||||
for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); }
|
||||
return offsets;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time
|
||||
* @return
|
||||
*/
|
||||
public byte[] getBases() {
|
||||
byte[] v = new byte[size()];
|
||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); }
|
||||
return v;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of the secondary bases in this pileup. Note this call costs O(n) and allocates fresh array each time
|
||||
* @return
|
||||
*/
|
||||
public byte[] getSecondaryBases() {
|
||||
byte[] v = new byte[size()];
|
||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); }
|
||||
return v;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time
|
||||
* @return
|
||||
*/
|
||||
public byte[] getQuals() {
|
||||
byte[] v = new byte[size()];
|
||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); }
|
||||
return v;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue