ReadBackedPileup in all its glory. Documented, aligned with the output of LocusIteratorByState, and caching common outputs for performance
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2165 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b44363d20a
commit
db40e28e54
|
|
@ -28,6 +28,8 @@ package org.broadinstitute.sting.gatk.contexts;
|
||||||
import net.sf.picard.reference.ReferenceSequence;
|
import net.sf.picard.reference.ReferenceSequence;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -42,8 +44,7 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
public class AlignmentContext {
|
public class AlignmentContext {
|
||||||
protected GenomeLoc loc = null;
|
protected GenomeLoc loc = null;
|
||||||
protected List<SAMRecord> reads = null;
|
protected ReadBackedPileup pileup = null;
|
||||||
protected List<Integer> offsets = null;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The number of bases we've skipped over in the reference since the last map invocation.
|
* The number of bases we've skipped over in the reference since the last map invocation.
|
||||||
|
|
@ -65,31 +66,45 @@ public class AlignmentContext {
|
||||||
* @param reads
|
* @param reads
|
||||||
* @param offsets
|
* @param offsets
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public AlignmentContext(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets) {
|
public AlignmentContext(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets) {
|
||||||
//assert loc != null;
|
this(loc, reads, offsets, 0);
|
||||||
//assert loc.getContig() != null;
|
|
||||||
//assert reads != null;
|
|
||||||
//assert offsets != null;
|
|
||||||
|
|
||||||
this.loc = loc;
|
|
||||||
this.reads = reads;
|
|
||||||
this.offsets = offsets;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
public AlignmentContext(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets, long skippedBases ) {
|
public AlignmentContext(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets, long skippedBases ) {
|
||||||
|
if ( loc == null ) throw new StingException("BUG: GenomeLoc in Alignment context is null");
|
||||||
|
if ( skippedBases < 0 ) throw new StingException("BUG: skippedBases is -1 in Alignment context");
|
||||||
|
|
||||||
this.loc = loc;
|
this.loc = loc;
|
||||||
this.reads = reads;
|
this.pileup = new ReadBackedPileup(loc, reads, offsets);
|
||||||
this.offsets = offsets;
|
|
||||||
this.skippedBases = skippedBases;
|
this.skippedBases = skippedBases;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public AlignmentContext(GenomeLoc loc, ReadBackedPileup pileup ) {
|
||||||
|
this(loc, pileup, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public AlignmentContext(GenomeLoc loc, ReadBackedPileup pileup, long skippedBases ) {
|
||||||
|
if ( loc == null ) throw new StingException("BUG: GenomeLoc in Alignment context is null");
|
||||||
|
if ( pileup == null ) throw new StingException("BUG: ReadBackedPileup in Alignment context is null");
|
||||||
|
if ( skippedBases < 0 ) throw new StingException("BUG: skippedBases is -1 in Alignment context");
|
||||||
|
|
||||||
|
this.loc = loc;
|
||||||
|
this.pileup = pileup;
|
||||||
|
this.skippedBases = skippedBases;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ReadBackedPileup getPileup() { return pileup; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get all of the reads within this context
|
* get all of the reads within this context
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public List<SAMRecord> getReads() { return reads; }
|
@Deprecated
|
||||||
|
public List<SAMRecord> getReads() { return pileup.getReads(); }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Are there any reads associated with this locus?
|
* Are there any reads associated with this locus?
|
||||||
|
|
@ -97,16 +112,15 @@ public class AlignmentContext {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public boolean hasReads() {
|
public boolean hasReads() {
|
||||||
return reads != null;
|
return pileup.size() > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* How many reads cover this locus?
|
* How many reads cover this locus?
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public int numReads() {
|
public int size() {
|
||||||
assert( reads != null );
|
return pileup.size();
|
||||||
return reads.size();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -114,89 +128,17 @@ public class AlignmentContext {
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public List<Integer> getOffsets() {
|
public List<Integer> getOffsets() {
|
||||||
return offsets;
|
return pileup.getOffsets();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getContig() { return getLocation().getContig(); }
|
public String getContig() { return getLocation().getContig(); }
|
||||||
public long getPosition() { return getLocation().getStart(); }
|
public long getPosition() { return getLocation().getStart(); }
|
||||||
public GenomeLoc getLocation() { return loc; }
|
public GenomeLoc getLocation() { return loc; }
|
||||||
|
|
||||||
//public void setLocation(GenomeLoc loc) {
|
|
||||||
// this.loc = loc.clone();
|
|
||||||
//}
|
|
||||||
|
|
||||||
public void downsampleToCoverage(int coverage) {
|
public void downsampleToCoverage(int coverage) {
|
||||||
if ( numReads() <= coverage )
|
pileup = pileup.getDownsampledPileup(coverage);
|
||||||
return;
|
|
||||||
|
|
||||||
// randomly choose numbers corresponding to positions in the reads list
|
|
||||||
Random generator = new Random();
|
|
||||||
TreeSet positions = new TreeSet();
|
|
||||||
int i = 0;
|
|
||||||
while ( i < coverage ) {
|
|
||||||
if (positions.add(new Integer(generator.nextInt(reads.size()))))
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
|
|
||||||
ArrayList<SAMRecord> downsampledReads = new ArrayList<SAMRecord>();
|
|
||||||
ArrayList<Integer> downsampledOffsets = new ArrayList<Integer>();
|
|
||||||
Iterator positionIter = positions.iterator();
|
|
||||||
Iterator<SAMRecord> readsIter = reads.iterator();
|
|
||||||
Iterator<Integer> offsetsIter = offsets.iterator();
|
|
||||||
int currentRead = 0;
|
|
||||||
while ( positionIter.hasNext() ) {
|
|
||||||
int nextReadToKeep = (Integer)positionIter.next();
|
|
||||||
|
|
||||||
// fast-forward to the right read
|
|
||||||
while ( currentRead < nextReadToKeep ) {
|
|
||||||
readsIter.next();
|
|
||||||
offsetsIter.next();
|
|
||||||
currentRead++;
|
|
||||||
}
|
|
||||||
|
|
||||||
downsampledReads.add(readsIter.next());
|
|
||||||
downsampledOffsets.add(offsetsIter.next());
|
|
||||||
currentRead++;
|
|
||||||
}
|
|
||||||
|
|
||||||
reads = downsampledReads;
|
|
||||||
offsets = downsampledOffsets;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns only the reads in ac that do not contain spanning deletions of this locus
|
|
||||||
*
|
|
||||||
* @param ac
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public static AlignmentContext withoutSpanningDeletions( AlignmentContext ac ) {
|
|
||||||
return subsetDeletions( ac, true );
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns only the reads in ac that do contain spanning deletions of this locus
|
|
||||||
*
|
|
||||||
* @param ac
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public static AlignmentContext withSpanningDeletions( AlignmentContext ac ) {
|
|
||||||
return subsetDeletions( ac, false );
|
|
||||||
}
|
|
||||||
|
|
||||||
private static AlignmentContext subsetDeletions( AlignmentContext ac, boolean readsWithoutDeletions ) {
|
|
||||||
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>(ac.getReads().size());
|
|
||||||
ArrayList<Integer> offsets = new ArrayList<Integer>(ac.getReads().size());
|
|
||||||
for ( int i = 0; i < ac.getReads().size(); i++ ) {
|
|
||||||
SAMRecord read = ac.getReads().get(i);
|
|
||||||
int offset = ac.getOffsets().get(i);
|
|
||||||
if ( (offset == -1 && ! readsWithoutDeletions) || (offset != -1 && readsWithoutDeletions) ) {
|
|
||||||
reads.add(read);
|
|
||||||
offsets.add(offset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return new AlignmentContext(ac.getLocation(), reads, offsets);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -217,8 +219,7 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
// printState();
|
// printState();
|
||||||
//}
|
//}
|
||||||
|
|
||||||
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>(readStates.size());
|
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
|
||||||
ArrayList<Integer> offsets = new ArrayList<Integer>(readStates.size());
|
|
||||||
|
|
||||||
// keep iterating forward until we encounter a reference position that has something "real" hanging over it
|
// keep iterating forward until we encounter a reference position that has something "real" hanging over it
|
||||||
// (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true)
|
// (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true)
|
||||||
|
|
@ -229,11 +230,9 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
for ( SAMRecordState state : readStates ) {
|
for ( SAMRecordState state : readStates ) {
|
||||||
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||||
// System.out.println("Location: "+getLocation()+"; Read "+state.getRead().getReadName()+"; offset="+state.getReadOffset());
|
// System.out.println("Location: "+getLocation()+"; Read "+state.getRead().getReadName()+"; offset="+state.getReadOffset());
|
||||||
reads.add(state.getRead());
|
pile.add(new PileupElement(state.getRead(), state.getReadOffset()));
|
||||||
offsets.add(state.getReadOffset());
|
|
||||||
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||||
reads.add(state.getRead());
|
pile.add(new PileupElement(state.getRead(), -1));
|
||||||
offsets.add(-1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
GenomeLoc loc = getLocation();
|
GenomeLoc loc = getLocation();
|
||||||
|
|
@ -245,10 +244,49 @@ public class LocusIteratorByState extends LocusIterator {
|
||||||
// printState();
|
// printState();
|
||||||
//}
|
//}
|
||||||
// if we got reads with non-D/N over the current position, we are done
|
// if we got reads with non-D/N over the current position, we are done
|
||||||
if ( reads.size() != 0 ) return new AlignmentContext(loc, reads, offsets);
|
if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// old implementation -- uses lists of reads and offsets
|
||||||
|
// public AlignmentContext next() {
|
||||||
|
// //if (DEBUG) {
|
||||||
|
// // logger.debug("in Next:");
|
||||||
|
// // printState();
|
||||||
|
// //}
|
||||||
|
//
|
||||||
|
// ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>(readStates.size());
|
||||||
|
// ArrayList<Integer> offsets = new ArrayList<Integer>(readStates.size());
|
||||||
|
//
|
||||||
|
// // keep iterating forward until we encounter a reference position that has something "real" hanging over it
|
||||||
|
// // (i.e. either a real base, or a real base or a deletion if includeReadsWithDeletion is true)
|
||||||
|
// while(true) {
|
||||||
|
// collectPendingReads(readInfo.getMaxReadsAtLocus());
|
||||||
|
//
|
||||||
|
// // todo -- performance problem -- should be lazy, really
|
||||||
|
// for ( SAMRecordState state : readStates ) {
|
||||||
|
// if ( state.getCurrentCigarOperator() != CigarOperator.D && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||||
|
//// System.out.println("Location: "+getLocation()+"; Read "+state.getRead().getReadName()+"; offset="+state.getReadOffset());
|
||||||
|
// reads.add(state.getRead());
|
||||||
|
// offsets.add(state.getReadOffset());
|
||||||
|
// } else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||||
|
// reads.add(state.getRead());
|
||||||
|
// offsets.add(-1);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// GenomeLoc loc = getLocation();
|
||||||
|
//
|
||||||
|
// updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||||
|
//
|
||||||
|
// //if (DEBUG) {
|
||||||
|
// // logger.debug("DONE WITH NEXT, updating read states, current state is:");
|
||||||
|
// // printState();
|
||||||
|
// //}
|
||||||
|
// // if we got reads with non-D/N over the current position, we are done
|
||||||
|
// if ( reads.size() != 0 ) return new AlignmentContext(loc, reads, offsets);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
private void collectPendingReads(final int maximumPileupSize) {
|
private void collectPendingReads(final int maximumPileupSize) {
|
||||||
//if (DEBUG) {
|
//if (DEBUG) {
|
||||||
// logger.debug(String.format("entering collectPendingReads..., hasNext=%b", it.hasNext()));
|
// logger.debug(String.format("entering collectPendingReads..., hasNext=%b", it.hasNext()));
|
||||||
|
|
|
||||||
|
|
@ -10,8 +10,10 @@ import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -40,32 +42,32 @@ public class TraverseLocusWindows extends TraversalEngine {
|
||||||
LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||||
ReferenceOrderedView referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
|
ReferenceOrderedView referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
|
||||||
|
|
||||||
AlignmentContext locus = getLocusContext(readView.iterator(), interval);
|
Pair<GenomeLoc, List<SAMRecord>> locus = getLocusContext(readView.iterator(), interval);
|
||||||
|
|
||||||
// The TraverseByLocusWindow expands intervals to cover all reads in a non-standard way.
|
// The TraverseByLocusWindow expands intervals to cover all reads in a non-standard way.
|
||||||
// TODO: Convert this approach to the standard.
|
// TODO: Convert this approach to the standard.
|
||||||
GenomeLoc expandedInterval = locus.getLocation();
|
GenomeLoc expandedInterval = locus.getFirst();
|
||||||
|
|
||||||
String referenceSubsequence = new String(referenceView.getReferenceBases(expandedInterval));
|
String referenceSubsequence = new String(referenceView.getReferenceBases(expandedInterval));
|
||||||
|
|
||||||
// Iterate forward to get all reference ordered data covering this interval
|
// Iterate forward to get all reference ordered data covering this interval
|
||||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation());
|
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getFirst());
|
||||||
|
|
||||||
//
|
//
|
||||||
// Execute our contract with the walker. Call filter, map, and reduce
|
// Execute our contract with the walker. Call filter, map, and reduce
|
||||||
//
|
//
|
||||||
final boolean keepMeP = locusWindowWalker.filter(tracker, referenceSubsequence, locus);
|
//final boolean keepMeP = locusWindowWalker.filter(tracker, referenceSubsequence, locus);
|
||||||
if (keepMeP) {
|
//if (keepMeP) {
|
||||||
M x = locusWindowWalker.map(tracker, referenceSubsequence, locus);
|
M x = locusWindowWalker.map(tracker, referenceSubsequence, locus.getFirst(), locus.getSecond());
|
||||||
sum = locusWindowWalker.reduce(x, sum);
|
sum = locusWindowWalker.reduce(x, sum);
|
||||||
}
|
//}
|
||||||
|
|
||||||
printProgress(LOCUS_WINDOW_STRING, locus.getLocation());
|
printProgress(LOCUS_WINDOW_STRING, locus.getFirst());
|
||||||
|
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
private AlignmentContext getLocusContext(StingSAMIterator readIter, GenomeLoc interval) {
|
private Pair<GenomeLoc, List<SAMRecord>> getLocusContext(StingSAMIterator readIter, GenomeLoc interval) {
|
||||||
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
|
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||||
boolean done = false;
|
boolean done = false;
|
||||||
long leftmostIndex = interval.getStart(),
|
long leftmostIndex = interval.getStart(),
|
||||||
|
|
@ -86,11 +88,11 @@ public class TraverseLocusWindows extends TraversalEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
GenomeLoc window = GenomeLocParser.createGenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex);
|
GenomeLoc window = GenomeLocParser.createGenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex);
|
||||||
AlignmentContext locus = new AlignmentContext(window, reads, null);
|
// AlignmentContext locus = new AlignmentContext(window, reads, null);
|
||||||
if ( readIter.getSourceInfo().getDownsampleToCoverage() != null )
|
// if ( readIter.getSourceInfo().getDownsampleToCoverage() != null )
|
||||||
locus.downsampleToCoverage(readIter.getSourceInfo().getDownsampleToCoverage());
|
// locus.downsampleToCoverage(readIter.getSourceInfo().getDownsampleToCoverage());
|
||||||
|
|
||||||
return locus;
|
return new Pair<GenomeLoc, List<SAMRecord>>(window, reads);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,10 @@ package org.broadinstitute.sting.gatk.walkers;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -12,13 +16,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
*/
|
*/
|
||||||
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
||||||
public abstract class LocusWindowWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
public abstract class LocusWindowWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||||
// Do we actually want to operate on the context?
|
|
||||||
public boolean filter(RefMetaDataTracker tracker, String ref, AlignmentContext context) {
|
|
||||||
return true; // We are keeping all the intervals
|
|
||||||
}
|
|
||||||
|
|
||||||
// Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
|
// Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
|
||||||
public abstract MapType map(RefMetaDataTracker tracker, String ref, AlignmentContext context);
|
public abstract MapType map(RefMetaDataTracker tracker, String ref, GenomeLoc loc, List<SAMRecord> reads);
|
||||||
|
|
||||||
// Given result of map function
|
// Given result of map function
|
||||||
public abstract ReduceType reduceInit();
|
public abstract ReduceType reduceInit();
|
||||||
|
|
|
||||||
|
|
@ -66,14 +66,14 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
|
ReadBackedPileup pileup = context.getPileup();
|
||||||
|
|
||||||
String secondBasePileup = "";
|
String secondBasePileup = "";
|
||||||
if(shouldShowSecondaryBasePileup(pileup))
|
if(shouldShowSecondaryBasePileup(pileup))
|
||||||
secondBasePileup = getSecondBasePileup(pileup);
|
secondBasePileup = getSecondBasePileup(pileup);
|
||||||
String rods = getReferenceOrderedData( tracker );
|
String rods = getReferenceOrderedData( tracker );
|
||||||
|
|
||||||
out.printf("%s%s %s%n", pileup.getPileupString(qualsAsInts), secondBasePileup, rods);
|
out.printf("%s%s %s%n", pileup.getPileupString(ref.getBase(), qualsAsInts), secondBasePileup, rods);
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,11 +25,11 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
|
||||||
public boolean CONTINUE_AFTER_AN_ERROR = false;
|
public boolean CONTINUE_AFTER_AN_ERROR = false;
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
|
ReadBackedPileup pileup = context.getPileup();
|
||||||
SAMPileupRecord truePileup = getTruePileup( tracker );
|
SAMPileupRecord truePileup = getTruePileup( tracker );
|
||||||
|
|
||||||
if ( truePileup == null ) {
|
if ( truePileup == null ) {
|
||||||
out.printf("No truth pileup data available at %s%n", pileup.getPileupString(false));
|
out.printf("No truth pileup data available at %s%n", pileup.getPileupString(ref.getBase(), false));
|
||||||
if ( ! CONTINUE_AFTER_AN_ERROR ) {
|
if ( ! CONTINUE_AFTER_AN_ERROR ) {
|
||||||
Utils.scareUser(String.format("No pileup data available at %s given GATK's output of %s -- this walker requires samtools pileup data over all bases",
|
Utils.scareUser(String.format("No pileup data available at %s given GATK's output of %s -- this walker requires samtools pileup data over all bases",
|
||||||
context.getLocation(), new String(pileup.getBases())));
|
context.getLocation(), new String(pileup.getBases())));
|
||||||
|
|
@ -37,7 +37,7 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
|
||||||
} else {
|
} else {
|
||||||
String pileupDiff = pileupDiff(pileup, truePileup, true);
|
String pileupDiff = pileupDiff(pileup, truePileup, true);
|
||||||
if ( pileupDiff != null ) {
|
if ( pileupDiff != null ) {
|
||||||
out.printf("%s vs. %s%n", pileup.getPileupString(true), truePileup.getPileupString());
|
out.printf("%s vs. %s%n", pileup.getPileupString(ref.getBase(), true), truePileup.getPileupString());
|
||||||
if ( ! CONTINUE_AFTER_AN_ERROR ) {
|
if ( ! CONTINUE_AFTER_AN_ERROR ) {
|
||||||
throw new RuntimeException(String.format("Pileups aren't equal: %s", pileupDiff));
|
throw new RuntimeException(String.format("Pileups aren't equal: %s", pileupDiff));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ public class SecondBaseSkew implements VariantAnnotation {
|
||||||
|
|
||||||
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileupWithDel, Variation variation, List<Genotype> genotypes) {
|
public Pair<String, String> annotate(ReferenceContext ref, ReadBackedPileup pileupWithDel, Variation variation, List<Genotype> genotypes) {
|
||||||
ReadBackedPileup pileup = pileupWithDel; // .getPileupWithoutDeletions();
|
ReadBackedPileup pileup = pileupWithDel; // .getPileupWithoutDeletions();
|
||||||
Pair<Integer,Double> depthProp = getSecondaryPileupNonrefEstimator(pileup,genotypes);
|
Pair<Integer,Double> depthProp = getSecondaryPileupNonrefEstimator(ref.getBase(), pileup,genotypes);
|
||||||
if ( depthProp == null ) {
|
if ( depthProp == null ) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -48,10 +48,10 @@ public class SecondBaseSkew implements VariantAnnotation {
|
||||||
return proportion / ( Math.sqrt ( proportion*(1-proportion)/depth ) );
|
return proportion / ( Math.sqrt ( proportion*(1-proportion)/depth ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
private Pair<Integer, Double> getSecondaryPileupNonrefEstimator(ReadBackedPileup p, List<Genotype> genotypes) {
|
private Pair<Integer, Double> getSecondaryPileupNonrefEstimator(char ref, ReadBackedPileup p, List<Genotype> genotypes) {
|
||||||
char snp;
|
char snp;
|
||||||
try {
|
try {
|
||||||
snp = getNonref(genotypes, p.getRef());
|
snp = getNonref(genotypes, ref);
|
||||||
} catch ( IllegalStateException e ) {
|
} catch ( IllegalStateException e ) {
|
||||||
// tri-allelic site
|
// tri-allelic site
|
||||||
// System.out.println("Illegal State Exception caught at "+p.getLocation().toString()+" 2bb skew annotation suppressed ("+e.getLocalizedMessage()+")");
|
// System.out.println("Illegal State Exception caught at "+p.getLocation().toString()+" 2bb skew annotation suppressed ("+e.getLocalizedMessage()+")");
|
||||||
|
|
@ -67,7 +67,7 @@ public class SecondBaseSkew implements VariantAnnotation {
|
||||||
|
|
||||||
if ( BaseUtils.isRegularBase((char)sbase) && BaseUtils.basesAreEqual(pbase, (byte) snp) ) {
|
if ( BaseUtils.isRegularBase((char)sbase) && BaseUtils.basesAreEqual(pbase, (byte) snp) ) {
|
||||||
variantDepth++;
|
variantDepth++;
|
||||||
if ( BaseUtils.basesAreEqual(sbase, (byte)p.getRef()) ) {
|
if ( BaseUtils.basesAreEqual(sbase, (byte)ref) ) {
|
||||||
variantsWithRefSecondBase++;
|
variantsWithRefSecondBase++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -187,8 +187,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
||||||
public static Map<String, String> getAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation, List<Genotype> genotypes, Collection<VariantAnnotation> annotations) {
|
public static Map<String, String> getAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation, List<Genotype> genotypes, Collection<VariantAnnotation> annotations) {
|
||||||
|
|
||||||
// set up the pileup for the full collection of reads at this position
|
// set up the pileup for the full collection of reads at this position
|
||||||
ReadBackedPileup fullPileup = new ReadBackedPileup(ref.getBase(), context);
|
ReadBackedPileup fullPileup = context.getPileup();
|
||||||
|
|
||||||
|
// todo -- reimplement directly using ReadBackedPileups, which is vastly more efficient
|
||||||
// also, set up the pileup for the mapping-quality-zero-free context
|
// also, set up the pileup for the mapping-quality-zero-free context
|
||||||
List<SAMRecord> reads = context.getReads();
|
List<SAMRecord> reads = context.getReads();
|
||||||
List<Integer> offsets = context.getOffsets();
|
List<Integer> offsets = context.getOffsets();
|
||||||
|
|
@ -204,7 +205,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
||||||
MQ0freeOffsets.add(offset);
|
MQ0freeOffsets.add(offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ReadBackedPileup MQ0freePileup = new ReadBackedPileup(context.getLocation(), ref.getBase(), MQ0freeReads, MQ0freeOffsets);
|
ReadBackedPileup MQ0freePileup = new ReadBackedPileup(context.getLocation(), MQ0freeReads, MQ0freeOffsets);
|
||||||
|
|
||||||
HashMap<String, String> results = new HashMap<String, String>();
|
HashMap<String, String> results = new HashMap<String, String>();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
||||||
int index = 0;
|
int index = 0;
|
||||||
for ( String sample : contexts.keySet() ) {
|
for ( String sample : contexts.keySet() ) {
|
||||||
AlignmentContextBySample context = contexts.get(sample);
|
AlignmentContextBySample context = contexts.get(sample);
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
|
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
|
||||||
|
|
||||||
// create the GenotypeLikelihoods object
|
// create the GenotypeLikelihoods object
|
||||||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||||
|
|
@ -82,7 +82,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
||||||
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc);
|
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc);
|
||||||
|
|
||||||
if ( call instanceof ReadBacked ) {
|
if ( call instanceof ReadBacked ) {
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(sample).getContext(StratifiedContext.OVERALL));
|
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedContext.OVERALL).getPileup();
|
||||||
((ReadBacked)call).setPileup(pileup);
|
((ReadBacked)call).setPileup(pileup);
|
||||||
}
|
}
|
||||||
if ( call instanceof SampleBacked ) {
|
if ( call instanceof SampleBacked ) {
|
||||||
|
|
|
||||||
|
|
@ -96,7 +96,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
||||||
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
|
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
|
||||||
|
|
||||||
if ( call instanceof ReadBacked ) {
|
if ( call instanceof ReadBacked ) {
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, contexts.get(sample).getContext(StratifiedContext.OVERALL));
|
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedContext.OVERALL).getPileup();
|
||||||
((ReadBacked)call).setPileup(pileup);
|
((ReadBacked)call).setPileup(pileup);
|
||||||
}
|
}
|
||||||
if ( call instanceof SampleBacked ) {
|
if ( call instanceof SampleBacked ) {
|
||||||
|
|
|
||||||
|
|
@ -108,7 +108,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
||||||
private Pair<ReadBackedPileup, GenotypeLikelihoods> getSingleSampleLikelihoods(char ref, AlignmentContextBySample sampleContext, DiploidGenotypePriors priors, StratifiedContext contextType) {
|
private Pair<ReadBackedPileup, GenotypeLikelihoods> getSingleSampleLikelihoods(char ref, AlignmentContextBySample sampleContext, DiploidGenotypePriors priors, StratifiedContext contextType) {
|
||||||
// create the pileup
|
// create the pileup
|
||||||
AlignmentContext myContext = sampleContext.getContext(contextType);
|
AlignmentContext myContext = sampleContext.getContext(contextType);
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, myContext);
|
ReadBackedPileup pileup = myContext.getPileup();
|
||||||
|
|
||||||
// create the GenotypeLikelihoods object
|
// create the GenotypeLikelihoods object
|
||||||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||||
|
|
@ -132,7 +132,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
||||||
|
|
||||||
for ( String sample : contexts.keySet() ) {
|
for ( String sample : contexts.keySet() ) {
|
||||||
AlignmentContextBySample context = contexts.get(sample);
|
AlignmentContextBySample context = contexts.get(sample);
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
|
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
|
||||||
|
|
||||||
// create the GenotypeLikelihoods object
|
// create the GenotypeLikelihoods object
|
||||||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, AFPriors, defaultPlatform);
|
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, AFPriors, defaultPlatform);
|
||||||
|
|
|
||||||
|
|
@ -77,7 +77,7 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
|
||||||
protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int nChromosomes, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int nChromosomes, HashMap<String, AlignmentContextBySample> contexts, StratifiedContext contextType) {
|
||||||
|
|
||||||
AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME);
|
AlignmentContextBySample context = contexts.get(POOL_SAMPLE_NAME);
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context.getContext(contextType));
|
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
|
||||||
|
|
||||||
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref);
|
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref);
|
||||||
int altIndex = BaseUtils.simpleBaseToBaseIndex(alt);
|
int altIndex = BaseUtils.simpleBaseToBaseIndex(alt);
|
||||||
|
|
|
||||||
|
|
@ -106,8 +106,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, String ref, AlignmentContext context) {
|
public Integer map(RefMetaDataTracker tracker, String ref, GenomeLoc loc, List<SAMRecord> reads) {
|
||||||
List<SAMRecord> reads = context.getReads();
|
|
||||||
ArrayList<SAMRecord> goodReads = new ArrayList<SAMRecord>();
|
ArrayList<SAMRecord> goodReads = new ArrayList<SAMRecord>();
|
||||||
for ( SAMRecord read : reads ) {
|
for ( SAMRecord read : reads ) {
|
||||||
if ( !read.getReadUnmappedFlag() &&
|
if ( !read.getReadUnmappedFlag() &&
|
||||||
|
|
@ -121,7 +120,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
readsToWrite.add(new ComparableSAMRecord(read));
|
readsToWrite.add(new ComparableSAMRecord(read));
|
||||||
}
|
}
|
||||||
|
|
||||||
clean(goodReads, ref, context.getLocation());
|
clean(goodReads, ref, loc);
|
||||||
//bruteForceClean(goodReads, ref, context.getLocation().getStart());
|
//bruteForceClean(goodReads, ref, context.getLocation().getStart());
|
||||||
//testCleanWithDeletion();
|
//testCleanWithDeletion();
|
||||||
//testCleanWithInsertion();
|
//testCleanWithInsertion();
|
||||||
|
|
|
||||||
|
|
@ -76,7 +76,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
|
||||||
}
|
}
|
||||||
|
|
||||||
public Set<BaseTransitionTable> map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
public Set<BaseTransitionTable> map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(),context);
|
ReadBackedPileup pileup = context.getPileup();
|
||||||
Set<BaseTransitionTable> newCounts = null;
|
Set<BaseTransitionTable> newCounts = null;
|
||||||
//System.out.println(pileup.getBases());
|
//System.out.println(pileup.getBases());
|
||||||
if ( baseIsUsable(tracker, ref, pileup, context) ) {
|
if ( baseIsUsable(tracker, ref, pileup, context) ) {
|
||||||
|
|
@ -279,7 +279,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( usePileupMismatches ) {
|
if ( usePileupMismatches ) {
|
||||||
conditions.add(countMismatches(pileup));
|
conditions.add(countMismatches(ref.getBase(), pileup));
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( useReadGroup ) {
|
if ( useReadGroup ) {
|
||||||
|
|
@ -331,8 +331,8 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
|
||||||
return String.format("%s\t%s%n",header,"Counts");
|
return String.format("%s\t%s%n",header,"Counts");
|
||||||
}
|
}
|
||||||
|
|
||||||
public int countMismatches(ReadBackedPileup p) {
|
public int countMismatches(char ref, ReadBackedPileup p) {
|
||||||
int refM = p.getBaseCounts()[BaseUtils.simpleBaseToBaseIndex(p.getRef())];
|
int refM = p.getBaseCounts()[BaseUtils.simpleBaseToBaseIndex(ref)];
|
||||||
return p.size()-refM;
|
return p.size()-refM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -345,7 +345,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean pileupBelowMismatchThreshold( ReferenceContext ref, ReadBackedPileup pileup ) {
|
public boolean pileupBelowMismatchThreshold( ReferenceContext ref, ReadBackedPileup pileup ) {
|
||||||
return countMismatches(pileup) <= maxNumMismatches;
|
return countMismatches(ref.getBase(), pileup) <= maxNumMismatches;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean pileupContainsNoNs(ReadBackedPileup pileup) {
|
public boolean pileupContainsNoNs(ReadBackedPileup pileup) {
|
||||||
|
|
|
||||||
|
|
@ -318,7 +318,7 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
||||||
GenomeLoc Gloc = context.getLocation();
|
GenomeLoc Gloc = context.getLocation();
|
||||||
|
|
||||||
//Create pileup of reads at this locus
|
//Create pileup of reads at this locus
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
|
ReadBackedPileup pileup = context.getPileup();
|
||||||
|
|
||||||
long loc = context.getPosition();
|
long loc = context.getPosition();
|
||||||
if( context.getReads().size() > 0 ) {
|
if( context.getReads().size() > 0 ) {
|
||||||
|
|
|
||||||
|
|
@ -77,7 +77,7 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker<String, PrintWriter
|
||||||
}
|
}
|
||||||
int numVariantAllele = alleleFreqInfo.getSecond().getFirst();
|
int numVariantAllele = alleleFreqInfo.getSecond().getFirst();
|
||||||
int numChipsObserved = alleleFreqInfo.getSecond().getSecond();
|
int numChipsObserved = alleleFreqInfo.getSecond().getSecond();
|
||||||
int depth = context.numReads();
|
int depth = context.size();
|
||||||
double power = powerWalker.calculatePowerAtFrequency(context,numVariantAllele);
|
double power = powerWalker.calculatePowerAtFrequency(context,numVariantAllele);
|
||||||
int called;
|
int called;
|
||||||
Variation call = (Variation) tracker.lookup("calls",null);
|
Variation call = (Variation) tracker.lookup("calls",null);
|
||||||
|
|
@ -89,7 +89,7 @@ public class HapmapPoolAllelicInfoWalker extends LocusWalker<String, PrintWriter
|
||||||
called = 1;
|
called = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ReadBackedPileup p = new ReadBackedPileup(ref.getBase(),context);
|
ReadBackedPileup p = context.getPileup();
|
||||||
int support = p.getBaseCounts()[BaseUtils.simpleBaseToBaseIndex(alternate)];
|
int support = p.getBaseCounts()[BaseUtils.simpleBaseToBaseIndex(alternate)];
|
||||||
|
|
||||||
// sanity check
|
// sanity check
|
||||||
|
|
|
||||||
|
|
@ -69,7 +69,7 @@ public class MinimumNQSWalker extends LocusWalker<Pair<List<Pair<Integer,Integer
|
||||||
ArrayList<Pair<Integer,Integer>> matchingQualityNQSPairs = new ArrayList<Pair<Integer,Integer>>();
|
ArrayList<Pair<Integer,Integer>> matchingQualityNQSPairs = new ArrayList<Pair<Integer,Integer>>();
|
||||||
ArrayList<Pair<Integer,Integer>> mismatchingQualityNQSPairs = new ArrayList<Pair<Integer,Integer>>();
|
ArrayList<Pair<Integer,Integer>> mismatchingQualityNQSPairs = new ArrayList<Pair<Integer,Integer>>();
|
||||||
if ( (Variation) tracker.lookup("dbsnp",null) == null ) {
|
if ( (Variation) tracker.lookup("dbsnp",null) == null ) {
|
||||||
for ( int r = 0; r < context.numReads(); r ++ ) {
|
for ( int r = 0; r < context.size(); r ++ ) {
|
||||||
SAMRecord read = context.getReads().get(r);
|
SAMRecord read = context.getReads().get(r);
|
||||||
int offset = context.getOffsets().get(r);
|
int offset = context.getOffsets().get(r);
|
||||||
int quality = read.getBaseQualities()[offset];
|
int quality = read.getBaseQualities()[offset];
|
||||||
|
|
|
||||||
|
|
@ -69,7 +69,7 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
|
||||||
int altCount = 0;
|
int altCount = 0;
|
||||||
int totalCount = 0;
|
int totalCount = 0;
|
||||||
|
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref.getBase(), context);
|
ReadBackedPileup pileup = context.getPileup();
|
||||||
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
|
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
|
||||||
|
|
||||||
for (byte base : pileup.getBases() ) {
|
for (byte base : pileup.getBases() ) {
|
||||||
|
|
@ -108,7 +108,7 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
|
||||||
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
|
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
|
||||||
String colName = String.format("%s.%d", context.getContig(), context.getPosition());
|
String colName = String.format("%s.%d", context.getContig(), context.getPosition());
|
||||||
|
|
||||||
for (int i = 0; i < context.numReads(); i++) {
|
for (int i = 0; i < context.size(); i++) {
|
||||||
SAMRecord read = context.getReads().get(i);
|
SAMRecord read = context.getReads().get(i);
|
||||||
int offset = context.getOffsets().get(i);
|
int offset = context.getOffsets().get(i);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,7 @@ public class GATKPaperGenotyper extends LocusWalker<SimpleCall, Integer> impleme
|
||||||
public SimpleCall map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public SimpleCall map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
if (ref.getBase() == 'N' || ref.getBase() == 'n') return null; // we don't deal with the N ref base case
|
if (ref.getBase() == 'N' || ref.getBase() == 'n') return null; // we don't deal with the N ref base case
|
||||||
|
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(context.getLocation(), ref.getBase(), context.getReads(), context.getOffsets());
|
ReadBackedPileup pileup = context.getPileup();
|
||||||
double likelihoods[] = DiploidGenotypePriors.getReferencePolarizedPriors(ref.getBase(),
|
double likelihoods[] = DiploidGenotypePriors.getReferencePolarizedPriors(ref.getBase(),
|
||||||
DiploidGenotypePriors.HUMAN_HETEROZYGOSITY,
|
DiploidGenotypePriors.HUMAN_HETEROZYGOSITY,
|
||||||
DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
||||||
|
|
|
||||||
|
|
@ -111,7 +111,7 @@ public class PowerBelowFrequencyWalker extends LocusWalker<Integer,Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public double calculatePowerAtFrequency( AlignmentContext context, int alleles ) {
|
public double calculatePowerAtFrequency( AlignmentContext context, int alleles ) {
|
||||||
return theoreticalPower( context.numReads(), getMeanQ(context), alleles, lodThresh );
|
return theoreticalPower( context.size(), getMeanQ(context), alleles, lodThresh );
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte getMeanQ( AlignmentContext context ) {
|
public byte getMeanQ( AlignmentContext context ) {
|
||||||
|
|
@ -126,7 +126,7 @@ public class PowerBelowFrequencyWalker extends LocusWalker<Integer,Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public double expectedMatchRate(AlignmentContext context) {
|
public double expectedMatchRate(AlignmentContext context) {
|
||||||
int nReads = context.numReads();
|
int nReads = context.size();
|
||||||
double matches = 0.0;
|
double matches = 0.0;
|
||||||
for ( int r = 0; r < nReads; r ++ ) {
|
for ( int r = 0; r < nReads; r ++ ) {
|
||||||
matches += QualityUtils.qualToProb(context.getReads().get(r).getBaseQualities()[context.getOffsets().get(r)]);
|
matches += QualityUtils.qualToProb(context.getReads().get(r).getBaseQualities()[context.getOffsets().get(r)]);
|
||||||
|
|
|
||||||
|
|
@ -18,8 +18,11 @@ import java.util.Random;
|
||||||
abstract public class BasicPileup {
|
abstract public class BasicPileup {
|
||||||
public static final char DELETION_CHAR = 'D';
|
public static final char DELETION_CHAR = 'D';
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
abstract GenomeLoc getLocation();
|
abstract GenomeLoc getLocation();
|
||||||
|
@Deprecated
|
||||||
abstract char getRef();
|
abstract char getRef();
|
||||||
|
@Deprecated
|
||||||
abstract int size();
|
abstract int size();
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -28,6 +31,7 @@ abstract public class BasicPileup {
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
byte[] getBases() { return null; }
|
byte[] getBases() { return null; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -35,6 +39,7 @@ abstract public class BasicPileup {
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
byte[] getQuals() { return null; }
|
byte[] getQuals() { return null; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -57,50 +62,6 @@ abstract public class BasicPileup {
|
||||||
return String.format("%s: %s %s %s", getLocation(), getRef(), getBasesAsString(), getQualsAsString());
|
return String.format("%s: %s %s %s", getLocation(), getRef(), getBasesAsString(), getQualsAsString());
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String basePileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
|
|
||||||
StringBuilder bases = new StringBuilder();
|
|
||||||
for ( byte base : getBasesAsArrayList(reads, offsets)) {
|
|
||||||
bases.append((char)base);
|
|
||||||
}
|
|
||||||
return bases.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String baseWithStrandPileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
|
|
||||||
StringBuilder bases = new StringBuilder();
|
|
||||||
|
|
||||||
for ( int i = 0; i < reads.size(); i++ ) {
|
|
||||||
SAMRecord read = reads.get(i);
|
|
||||||
int offset = offsets.get(i);
|
|
||||||
|
|
||||||
char base;
|
|
||||||
if ( offset == -1 ) {
|
|
||||||
base = DELETION_CHAR;
|
|
||||||
} else {
|
|
||||||
base = (char) read.getReadBases()[offset];
|
|
||||||
}
|
|
||||||
|
|
||||||
base = Character.toUpperCase(base);
|
|
||||||
if (read.getReadNegativeStrandFlag()) {
|
|
||||||
base = Character.toLowerCase(base);
|
|
||||||
}
|
|
||||||
|
|
||||||
bases.append(base);
|
|
||||||
}
|
|
||||||
|
|
||||||
return bases.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// byte[] methods
|
|
||||||
//
|
|
||||||
public static byte[] getBases( List<SAMRecord> reads, List<Integer> offsets ) {
|
|
||||||
return getBasesAsArray(reads,offsets);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static byte[] getQuals( List<SAMRecord> reads, List<Integer> offsets ) {
|
|
||||||
return getQualsAsArray( reads, offsets );
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// ArrayList<Byte> methods
|
// ArrayList<Byte> methods
|
||||||
//
|
//
|
||||||
|
|
@ -119,7 +80,7 @@ abstract public class BasicPileup {
|
||||||
return array;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
|
public static ArrayList<Byte> getBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||||
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size());
|
ArrayList<Byte> bases = new ArrayList<Byte>(reads.size());
|
||||||
for (byte value : getBasesAsArray(reads, offsets))
|
for (byte value : getBasesAsArray(reads, offsets))
|
||||||
|
|
@ -127,6 +88,7 @@ abstract public class BasicPileup {
|
||||||
return bases;
|
return bases;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
|
public static ArrayList<Byte> getQualsAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||||
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
|
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
|
||||||
for (byte value : getQualsAsArray(reads, offsets))
|
for (byte value : getQualsAsArray(reads, offsets))
|
||||||
|
|
@ -134,6 +96,7 @@ abstract public class BasicPileup {
|
||||||
return quals;
|
return quals;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
public static byte[] getQualsAsArray( List<SAMRecord> reads, List<Integer> offsets ) {
|
public static byte[] getQualsAsArray( List<SAMRecord> reads, List<Integer> offsets ) {
|
||||||
byte array[] = new byte[reads.size()];
|
byte array[] = new byte[reads.size()];
|
||||||
int index = 0;
|
int index = 0;
|
||||||
|
|
@ -151,6 +114,7 @@ abstract public class BasicPileup {
|
||||||
return array;
|
return array;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
public static ArrayList<Byte> mappingQualPileup( List<SAMRecord> reads) {
|
public static ArrayList<Byte> mappingQualPileup( List<SAMRecord> reads) {
|
||||||
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
|
ArrayList<Byte> quals = new ArrayList<Byte>(reads.size());
|
||||||
for ( int i = 0; i < reads.size(); i++ ) {
|
for ( int i = 0; i < reads.size(); i++ ) {
|
||||||
|
|
@ -161,73 +125,6 @@ abstract public class BasicPileup {
|
||||||
return quals;
|
return quals;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String mappingQualPileupAsString( List<SAMRecord> reads) {
|
|
||||||
return quals2String(mappingQualPileup(reads));
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String quals2String( List<Byte> quals ) {
|
|
||||||
StringBuilder qualStr = new StringBuilder();
|
|
||||||
for ( int qual : quals ) {
|
|
||||||
qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea
|
|
||||||
char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63
|
|
||||||
qualStr.append(qualChar);
|
|
||||||
}
|
|
||||||
|
|
||||||
return qualStr.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String qualPileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
|
|
||||||
return quals2String(getQualsAsArrayList(reads, offsets));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public static ArrayList<Byte> getSecondaryBasesAsArrayList( List<SAMRecord> reads, List<Integer> offsets ) {
|
|
||||||
ArrayList<Byte> bases2 = new ArrayList<Byte>(reads.size());
|
|
||||||
boolean hasAtLeastOneSQorE2Field = false;
|
|
||||||
|
|
||||||
for ( int i = 0; i < reads.size(); i++ ) {
|
|
||||||
SAMRecord read = reads.get(i);
|
|
||||||
int offset = offsets.get(i);
|
|
||||||
byte base2 = BaseUtils.getSecondBase(read, offset);
|
|
||||||
hasAtLeastOneSQorE2Field = hasAtLeastOneSQorE2Field || BaseUtils.simpleBaseToBaseIndex((char)base2) != -1;
|
|
||||||
bases2.add(base2);
|
|
||||||
}
|
|
||||||
|
|
||||||
return (hasAtLeastOneSQorE2Field ? bases2 : null);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String getSecondaryBasePileupAsString( List<SAMRecord> reads, List<Integer> offsets ) {
|
|
||||||
StringBuilder bases2 = new StringBuilder();
|
|
||||||
ArrayList<Byte> sbases = getSecondaryBasesAsArrayList(reads, offsets);
|
|
||||||
|
|
||||||
if (sbases == null) { return null; }
|
|
||||||
|
|
||||||
ArrayList<Byte> pbases = getBasesAsArrayList(reads, offsets);
|
|
||||||
|
|
||||||
//Random generator = new Random();
|
|
||||||
|
|
||||||
if ( sbases.size() != pbases.size() ) {
|
|
||||||
throw new StingException("BUG in conversion of secondary bases: primary and secondary base vectors are different sizes!");
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int pileupIndex = 0; pileupIndex < sbases.size(); pileupIndex++) {
|
|
||||||
byte pbase = pbases.get(pileupIndex);
|
|
||||||
byte sbase = sbases.get(pileupIndex);
|
|
||||||
|
|
||||||
if ( sbase == pbase ) {
|
|
||||||
throw new StingException("BUG in conversion of secondary bases!");
|
|
||||||
}
|
|
||||||
|
|
||||||
// while (sbase == pbase) { // TODO why is here?
|
|
||||||
// sbase = (byte) BaseUtils.baseIndexToSimpleBase(generator.nextInt(4));
|
|
||||||
// }
|
|
||||||
|
|
||||||
bases2.append((char) sbase);
|
|
||||||
}
|
|
||||||
|
|
||||||
return bases2.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Deprecated // todo -- delete me
|
@Deprecated // todo -- delete me
|
||||||
public static String[] indelPileup( List<SAMRecord> reads, List<Integer> offsets )
|
public static String[] indelPileup( List<SAMRecord> reads, List<Integer> offsets )
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -8,9 +8,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.*;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
|
@ -21,26 +19,74 @@ import net.sf.samtools.SAMRecord;
|
||||||
*/
|
*/
|
||||||
public class ReadBackedPileup implements Iterable<PileupElement> {
|
public class ReadBackedPileup implements Iterable<PileupElement> {
|
||||||
private GenomeLoc loc = null;
|
private GenomeLoc loc = null;
|
||||||
private char ref = 0;
|
|
||||||
private ArrayList<PileupElement> pileup = null;
|
private ArrayList<PileupElement> pileup = null;
|
||||||
|
|
||||||
public ReadBackedPileup(char ref, AlignmentContext context ) {
|
private int size = 0; // cached value of the size of the pileup
|
||||||
this(context.getLocation(), ref, context.getReads(), context.getOffsets());
|
private int nDeletions = 0; // cached value of the number of deletions
|
||||||
|
private int[] counts = new int[4]; // cached value of counts
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
|
||||||
|
* offsets. This pileup will contain a list, in order of the reads, of the piled bases at
|
||||||
|
* reads[i] for all i in offsets. Does not make a copy of the data, so it's not safe to
|
||||||
|
* go changing the reads.
|
||||||
|
*
|
||||||
|
* @param loc
|
||||||
|
* @param reads
|
||||||
|
* @param offsets
|
||||||
|
*/
|
||||||
|
public ReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets ) {
|
||||||
|
this(loc, readsOffsets2Pileup(reads, offsets));
|
||||||
}
|
}
|
||||||
|
|
||||||
public ReadBackedPileup(GenomeLoc loc, char ref, List<SAMRecord> reads, List<Integer> offsets ) {
|
|
||||||
this(loc, ref, readsOffsets2Pileup(reads, offsets));
|
|
||||||
}
|
|
||||||
|
|
||||||
public ReadBackedPileup(GenomeLoc loc, char ref, ArrayList<PileupElement> pileup ) {
|
/**
|
||||||
|
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
|
||||||
|
* offsets. This lower level constructure assumes pileup is well-formed and merely keeps a
|
||||||
|
* pointer to pileup. Don't go changing the data in pileup.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public ReadBackedPileup(GenomeLoc loc, ArrayList<PileupElement> pileup ) {
|
||||||
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2");
|
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2");
|
||||||
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2");
|
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2");
|
||||||
|
|
||||||
this.loc = loc;
|
this.loc = loc;
|
||||||
this.ref = ref;
|
|
||||||
this.pileup = pileup;
|
this.pileup = pileup;
|
||||||
|
|
||||||
|
calculatedCachedData();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
|
||||||
|
* so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
|
||||||
|
* sizes, nDeletion, etc. over and over potentially.
|
||||||
|
*/
|
||||||
|
private void calculatedCachedData() {
|
||||||
|
size = 0;
|
||||||
|
nDeletions = 0;
|
||||||
|
counts[0] = 0; counts[1] = 0; counts[2] = 0; counts[3] = 0;
|
||||||
|
|
||||||
|
for ( PileupElement p : this ) {
|
||||||
|
size++;
|
||||||
|
if ( p.isDeletion() ) {
|
||||||
|
nDeletions++;
|
||||||
|
} else {
|
||||||
|
int index = BaseUtils.simpleBaseToBaseIndex((char)p.getBase());
|
||||||
|
if (index == -1)
|
||||||
|
continue;
|
||||||
|
counts[index]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper routine for converting reads and offset lists to a PileupElement list.
|
||||||
|
*
|
||||||
|
* @param reads
|
||||||
|
* @param offsets
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
private static ArrayList<PileupElement> readsOffsets2Pileup(List<SAMRecord> reads, List<Integer> offsets ) {
|
private static ArrayList<PileupElement> readsOffsets2Pileup(List<SAMRecord> reads, List<Integer> offsets ) {
|
||||||
if ( reads == null ) throw new StingException("Illegal null read list in ReadBackedPileup2");
|
if ( reads == null ) throw new StingException("Illegal null read list in ReadBackedPileup2");
|
||||||
if ( offsets == null ) throw new StingException("Illegal null offsets list in ReadBackedPileup2");
|
if ( offsets == null ) throw new StingException("Illegal null offsets list in ReadBackedPileup2");
|
||||||
|
|
@ -54,25 +100,19 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
||||||
return pileup;
|
return pileup;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------
|
||||||
//
|
//
|
||||||
// iterators
|
// Special 'constructors'
|
||||||
//
|
//
|
||||||
public Iterator<PileupElement> iterator() {
|
// --------------------------------------------------------
|
||||||
return pileup.iterator();
|
|
||||||
}
|
|
||||||
|
|
||||||
// todo -- reimplement efficiently
|
|
||||||
public IterableIterator<ExtendedPileupElement> extendedForeachIterator() {
|
|
||||||
ArrayList<ExtendedPileupElement> x = new ArrayList<ExtendedPileupElement>(size());
|
|
||||||
int i = 0;
|
|
||||||
for ( PileupElement pile : this ) {
|
|
||||||
x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this));
|
|
||||||
}
|
|
||||||
|
|
||||||
return new IterableIterator<ExtendedPileupElement>(x.iterator());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this
|
||||||
|
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
|
||||||
|
* of the pileup (just returns this) if there are no deletions in the pileup.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
public ReadBackedPileup getPileupWithoutDeletions() {
|
public ReadBackedPileup getPileupWithoutDeletions() {
|
||||||
// todo -- fixme
|
// todo -- fixme
|
||||||
if ( getNumberOfDeletions() > 0 ) { // todo -- remember number of deletions
|
if ( getNumberOfDeletions() > 0 ) { // todo -- remember number of deletions
|
||||||
|
|
@ -85,80 +125,144 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
||||||
newOffsets.add(getOffsets().get(i));
|
newOffsets.add(getOffsets().get(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new ReadBackedPileup(loc, ref, newReads, newOffsets);
|
return new ReadBackedPileup(loc, newReads, newOffsets);
|
||||||
} else {
|
} else {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a pileup randomly downsampled to the desiredCoverage.
|
||||||
|
*
|
||||||
|
* @param desiredCoverage
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public ReadBackedPileup getDownsampledPileup(int desiredCoverage) {
|
||||||
|
if ( size() <= desiredCoverage )
|
||||||
|
return this;
|
||||||
|
|
||||||
|
// randomly choose numbers corresponding to positions in the reads list
|
||||||
|
Random generator = new Random();
|
||||||
|
TreeSet<Integer> positions = new TreeSet<Integer>();
|
||||||
|
for ( int i = 0; i < desiredCoverage; /* no update */ ) {
|
||||||
|
if ( positions.add(generator.nextInt(pileup.size())) )
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
Iterator positionIter = positions.iterator();
|
||||||
|
ArrayList<PileupElement> downsampledPileup = new ArrayList<PileupElement>();
|
||||||
|
|
||||||
|
while ( positionIter.hasNext() ) {
|
||||||
|
int nextReadToKeep = (Integer)positionIter.next();
|
||||||
|
downsampledPileup.add(pileup.get(nextReadToKeep));
|
||||||
|
}
|
||||||
|
|
||||||
|
return new ReadBackedPileup(getLocation(), downsampledPileup);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------
|
||||||
|
//
|
||||||
|
// iterators
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The best way to access PileupElements where you only care about the bases and quals in the pileup.
|
||||||
|
*
|
||||||
|
* for (PileupElement p : this) { doSomething(p); }
|
||||||
|
*
|
||||||
|
* Provides efficient iteration of the data.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public Iterator<PileupElement> iterator() {
|
||||||
|
return pileup.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The best way to access PileupElements where you only care not only about bases and quals in the pileup
|
||||||
|
* but also need access to the index of the pileup element in the pile.
|
||||||
|
*
|
||||||
|
* for (ExtendedPileupElement p : this) { doSomething(p); }
|
||||||
|
*
|
||||||
|
* Provides efficient iteration of the data.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
|
||||||
|
// todo -- reimplement efficiently
|
||||||
|
public IterableIterator<ExtendedPileupElement> extendedForeachIterator() {
|
||||||
|
ArrayList<ExtendedPileupElement> x = new ArrayList<ExtendedPileupElement>(size());
|
||||||
|
int i = 0;
|
||||||
|
for ( PileupElement pile : this ) {
|
||||||
|
x.add(new ExtendedPileupElement(pile.getRead(), pile.getOffset(), i++, this));
|
||||||
|
}
|
||||||
|
|
||||||
|
return new IterableIterator<ExtendedPileupElement>(x.iterator());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple useful routine to count the number of deletion bases in this pileup
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
public int getNumberOfDeletions() {
|
public int getNumberOfDeletions() {
|
||||||
int n = 0;
|
return nDeletions;
|
||||||
|
|
||||||
for ( int i = 0; i < size(); i++ ) {
|
|
||||||
if ( getOffsets().get(i) != -1 ) { n++; }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return n;
|
// public int getNumberOfDeletions() {
|
||||||
}
|
// int n = 0;
|
||||||
|
//
|
||||||
|
// for ( int i = 0; i < size(); i++ ) {
|
||||||
|
// if ( getOffsets().get(i) != -1 ) { n++; }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// return n;
|
||||||
|
// }
|
||||||
|
|
||||||
// todo -- optimize me
|
// todo -- optimize me
|
||||||
|
/**
|
||||||
|
* @return the number of elements in this pileup
|
||||||
|
*/
|
||||||
public int size() {
|
public int size() {
|
||||||
return pileup.size();
|
return size;
|
||||||
|
//return pileup.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the location of this pileup
|
||||||
|
*/
|
||||||
public GenomeLoc getLocation() {
|
public GenomeLoc getLocation() {
|
||||||
return loc;
|
return loc;
|
||||||
}
|
}
|
||||||
|
|
||||||
public char getRef() {
|
/**
|
||||||
return ref;
|
* Get counts of A, C, G, T in order, which returns a int[4] vector with counts according
|
||||||
}
|
* to BaseUtils.simpleBaseToBaseIndex for each base.
|
||||||
|
*
|
||||||
public List<SAMRecord> getReads() {
|
* @return
|
||||||
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
|
*/
|
||||||
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
|
|
||||||
return reads;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<Integer> getOffsets() {
|
|
||||||
List<Integer> offsets = new ArrayList<Integer>(size());
|
|
||||||
for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); }
|
|
||||||
return offsets;
|
|
||||||
}
|
|
||||||
|
|
||||||
public byte[] getBases() {
|
|
||||||
byte[] v = new byte[size()];
|
|
||||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); }
|
|
||||||
return v;
|
|
||||||
}
|
|
||||||
|
|
||||||
public byte[] getSecondaryBases() {
|
|
||||||
byte[] v = new byte[size()];
|
|
||||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); }
|
|
||||||
return v;
|
|
||||||
}
|
|
||||||
|
|
||||||
public byte[] getQuals() {
|
|
||||||
byte[] v = new byte[size()];
|
|
||||||
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); }
|
|
||||||
return v;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int[] getBaseCounts() {
|
public int[] getBaseCounts() {
|
||||||
int[] counts = new int[4];
|
|
||||||
for ( PileupElement pile : this ) {
|
|
||||||
// skip deletion sites
|
|
||||||
if ( ! pile.isDeletion() ) {
|
|
||||||
char base = Character.toUpperCase((char)(pile.getBase()));
|
|
||||||
if (BaseUtils.simpleBaseToBaseIndex(base) == -1)
|
|
||||||
continue;
|
|
||||||
counts[BaseUtils.simpleBaseToBaseIndex(base)]++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return counts;
|
return counts;
|
||||||
|
// int[] counts = new int[4];
|
||||||
|
// for ( PileupElement pile : this ) {
|
||||||
|
// // skip deletion sites
|
||||||
|
// if ( ! pile.isDeletion() ) {
|
||||||
|
// int index = BaseUtils.simpleBaseToBaseIndex((char)pile.getBase());
|
||||||
|
// if (index == -1)
|
||||||
|
// continue;
|
||||||
|
// counts[index]++;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// return counts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
public boolean hasSecondaryBases() {
|
public boolean hasSecondaryBases() {
|
||||||
for ( PileupElement pile : this ) {
|
for ( PileupElement pile : this ) {
|
||||||
// skip deletion sites
|
// skip deletion sites
|
||||||
|
|
@ -169,14 +273,69 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getPileupString(boolean qualsAsInts) {
|
public String getPileupString(char ref, boolean qualsAsInts) {
|
||||||
// In the pileup format, each line represents a genomic position, consisting of chromosome name,
|
// In the pileup format, each line represents a genomic position, consisting of chromosome name,
|
||||||
// coordinate, reference base, read bases, read qualities and alignment mapping qualities.
|
// coordinate, reference base, read bases, read qualities and alignment mapping qualities.
|
||||||
|
|
||||||
//return String.format("%s %s %s %s", getLocation(), getRef(), getBases(), getQuals());
|
|
||||||
return String.format("%s %s %s %s",
|
return String.format("%s %s %s %s",
|
||||||
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
|
getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
|
||||||
getRef(), // reference base
|
ref, // reference base
|
||||||
new String(getBases()));
|
new String(getBases()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// --------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Convenience functions that may be slow
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public List<SAMRecord> getReads() {
|
||||||
|
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
|
||||||
|
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
|
||||||
|
return reads;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public List<Integer> getOffsets() {
|
||||||
|
List<Integer> offsets = new ArrayList<Integer>(size());
|
||||||
|
for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); }
|
||||||
|
return offsets;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public byte[] getBases() {
|
||||||
|
byte[] v = new byte[size()];
|
||||||
|
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getBase(); }
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an array of the secondary bases in this pileup. Note this call costs O(n) and allocates fresh array each time
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public byte[] getSecondaryBases() {
|
||||||
|
byte[] v = new byte[size()];
|
||||||
|
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getSecondBase(); }
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public byte[] getQuals() {
|
||||||
|
byte[] v = new byte[size()];
|
||||||
|
for ( ExtendedPileupElement pile : this.extendedForeachIterator() ) { v[pile.getPileupOffset()] = pile.getQual(); }
|
||||||
|
return v;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue