Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
fd37da13af
|
|
@ -333,10 +333,6 @@ public class RefMetaDataTracker {
|
||||||
return addValues(name, type, new ArrayList<T>(), getTrackDataByName(name), onlyAtThisLoc, true, false);
|
return addValues(name, type, new ArrayList<T>(), getTrackDataByName(name), onlyAtThisLoc, true, false);
|
||||||
}
|
}
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public <T extends Feature> List<T> getValues(final Class<T> type, final Collection<String> names, final GenomeLoc onlyAtThisLoc) {
|
|
||||||
return addValues(names, type, new ArrayList<T>(), onlyAtThisLoc, true, false);
|
|
||||||
}
|
|
||||||
@Deprecated
|
|
||||||
public <T extends Feature> T getFirstValue(final Class<T> type, final String name) {
|
public <T extends Feature> T getFirstValue(final Class<T> type, final String name) {
|
||||||
return safeGetFirst(getValues(type, name));
|
return safeGetFirst(getValues(type, name));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,7 @@ import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation;
|
||||||
import org.broadinstitute.sting.utils.clipreads.ReadClipper;
|
import org.broadinstitute.sting.utils.clipreads.ReadClipper;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
import org.yaml.snakeyaml.events.SequenceStartEvent;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
@ -54,7 +55,7 @@ import java.util.regex.Pattern;
|
||||||
* with poor quality scores, that match particular sequences, or that were generated by particular machine cycles.
|
* with poor quality scores, that match particular sequences, or that were generated by particular machine cycles.
|
||||||
*/
|
*/
|
||||||
@Requires({DataSource.READS})
|
@Requires({DataSource.READS})
|
||||||
public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.ClippingData> {
|
public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithData, ClipReadsWalker.ClippingData> {
|
||||||
@Output
|
@Output
|
||||||
PrintStream out;
|
PrintStream out;
|
||||||
|
|
||||||
|
|
@ -180,12 +181,12 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
* @param read the read itself, as a SAMRecord
|
* @param read the read itself, as a SAMRecord
|
||||||
* @return the ReadClipper object describing what should be done to clip this read
|
* @return the ReadClipper object describing what should be done to clip this read
|
||||||
*/
|
*/
|
||||||
public ReadClipper map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
public ReadClipperWithData map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||||
if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
|
if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
|
||||||
if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES ) {
|
if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES ) {
|
||||||
read = ReadUtils.replaceSoftClipsWithMatches(read);
|
read = ReadUtils.replaceSoftClipsWithMatches(read);
|
||||||
}
|
}
|
||||||
ReadClipper clipper = new ReadClipper(read);
|
ReadClipperWithData clipper = new ReadClipperWithData(read, sequencesToClip);
|
||||||
|
|
||||||
//
|
//
|
||||||
// run all three clipping modules
|
// run all three clipping modules
|
||||||
|
|
@ -205,9 +206,10 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
*
|
*
|
||||||
* @param clipper
|
* @param clipper
|
||||||
*/
|
*/
|
||||||
private void clipSequences(ReadClipper clipper) {
|
private void clipSequences(ReadClipperWithData clipper) {
|
||||||
if (sequencesToClip != null) { // don't bother if we don't have any sequences to clip
|
if (sequencesToClip != null) { // don't bother if we don't have any sequences to clip
|
||||||
SAMRecord read = clipper.getRead();
|
SAMRecord read = clipper.getRead();
|
||||||
|
ClippingData data = clipper.getData();
|
||||||
|
|
||||||
for (SeqToClip stc : sequencesToClip) {
|
for (SeqToClip stc : sequencesToClip) {
|
||||||
// we have a pattern for both the forward and the reverse strands
|
// we have a pattern for both the forward and the reverse strands
|
||||||
|
|
@ -223,11 +225,14 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
if (found) {
|
if (found) {
|
||||||
int start = match.start();
|
int start = match.start();
|
||||||
int stop = match.end() - 1;
|
int stop = match.end() - 1;
|
||||||
ClippingOp op = new ClippingOp(ClippingOp.ClippingType.MATCHES_CLIP_SEQ, start, stop, stc.seq);
|
//ClippingOp op = new ClippingOp(ClippingOp.ClippingType.MATCHES_CLIP_SEQ, start, stop, stc.seq);
|
||||||
|
ClippingOp op = new ClippingOp(start, stop);
|
||||||
clipper.addOp(op);
|
clipper.addOp(op);
|
||||||
|
data.incSeqClippedBases(stc.seq, op.getLength());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
clipper.setData(data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -252,9 +257,10 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
*
|
*
|
||||||
* @param clipper
|
* @param clipper
|
||||||
*/
|
*/
|
||||||
private void clipCycles(ReadClipper clipper) {
|
private void clipCycles(ReadClipperWithData clipper) {
|
||||||
if (cyclesToClip != null) {
|
if (cyclesToClip != null) {
|
||||||
SAMRecord read = clipper.getRead();
|
SAMRecord read = clipper.getRead();
|
||||||
|
ClippingData data = clipper.getData();
|
||||||
|
|
||||||
for (Pair<Integer, Integer> p : cyclesToClip) { // iterate over each cycle range
|
for (Pair<Integer, Integer> p : cyclesToClip) { // iterate over each cycle range
|
||||||
int cycleStart = p.first;
|
int cycleStart = p.first;
|
||||||
|
|
@ -270,10 +276,13 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
int start = startStop.first;
|
int start = startStop.first;
|
||||||
int stop = startStop.second;
|
int stop = startStop.second;
|
||||||
|
|
||||||
ClippingOp op = new ClippingOp(ClippingOp.ClippingType.WITHIN_CLIP_RANGE, start, stop, null);
|
//ClippingOp op = new ClippingOp(ClippingOp.ClippingType.WITHIN_CLIP_RANGE, start, stop, null);
|
||||||
|
ClippingOp op = new ClippingOp(start, stop);
|
||||||
clipper.addOp(op);
|
clipper.addOp(op);
|
||||||
|
data.incNRangeClippedBases(op.getLength());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
clipper.setData(data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -291,8 +300,9 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
*
|
*
|
||||||
* @param clipper
|
* @param clipper
|
||||||
*/
|
*/
|
||||||
private void clipBadQualityScores(ReadClipper clipper) {
|
private void clipBadQualityScores(ReadClipperWithData clipper) {
|
||||||
SAMRecord read = clipper.getRead();
|
SAMRecord read = clipper.getRead();
|
||||||
|
ClippingData data = clipper.getData();
|
||||||
int readLen = read.getReadBases().length;
|
int readLen = read.getReadBases().length;
|
||||||
byte[] quals = read.getBaseQualities();
|
byte[] quals = read.getBaseQualities();
|
||||||
|
|
||||||
|
|
@ -311,8 +321,12 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
if (clipPoint != -1) {
|
if (clipPoint != -1) {
|
||||||
int start = read.getReadNegativeStrandFlag() ? 0 : clipPoint;
|
int start = read.getReadNegativeStrandFlag() ? 0 : clipPoint;
|
||||||
int stop = read.getReadNegativeStrandFlag() ? clipPoint : readLen - 1;
|
int stop = read.getReadNegativeStrandFlag() ? clipPoint : readLen - 1;
|
||||||
clipper.addOp(new ClippingOp(ClippingOp.ClippingType.LOW_Q_SCORES, start, stop, null));
|
//clipper.addOp(new ClippingOp(ClippingOp.ClippingType.LOW_Q_SCORES, start, stop, null));
|
||||||
|
ClippingOp op = new ClippingOp(start, stop);
|
||||||
|
clipper.addOp(op);
|
||||||
|
data.incNQClippedBases(op.getLength());
|
||||||
}
|
}
|
||||||
|
clipper.setData(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -325,7 +339,7 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
return new ClippingData(sequencesToClip);
|
return new ClippingData(sequencesToClip);
|
||||||
}
|
}
|
||||||
|
|
||||||
public ClippingData reduce(ReadClipper clipper, ClippingData data) {
|
public ClippingData reduce(ReadClipperWithData clipper, ClippingData data) {
|
||||||
if ( clipper == null )
|
if ( clipper == null )
|
||||||
return data;
|
return data;
|
||||||
|
|
||||||
|
|
@ -340,23 +354,8 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
data.nTotalBases += clipper.getRead().getReadLength();
|
data.nTotalBases += clipper.getRead().getReadLength();
|
||||||
if (clipper.wasClipped()) {
|
if (clipper.wasClipped()) {
|
||||||
data.nClippedReads++;
|
data.nClippedReads++;
|
||||||
for (ClippingOp op : clipper.getOps()) {
|
data.addData(clipper.getData());
|
||||||
switch (op.type) {
|
|
||||||
case LOW_Q_SCORES:
|
|
||||||
data.incNQClippedBases(op.getLength());
|
|
||||||
break;
|
|
||||||
case WITHIN_CLIP_RANGE:
|
|
||||||
data.incNRangeClippedBases(op.getLength());
|
|
||||||
break;
|
|
||||||
case MATCHES_CLIP_SEQ:
|
|
||||||
data.incSeqClippedBases((String) op.extraInfo, op.getLength());
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new IllegalStateException("Unexpected Clipping operator type " + op);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -417,6 +416,23 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
seqClipCounts.put(seq, seqClipCounts.get(seq) + n);
|
seqClipCounts.put(seq, seqClipCounts.get(seq) + n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addData (ClippingData data) {
|
||||||
|
nTotalReads += data.nTotalReads;
|
||||||
|
nTotalBases += data.nTotalBases;
|
||||||
|
nClippedReads += data.nClippedReads;
|
||||||
|
nClippedBases += data.nClippedBases;
|
||||||
|
nQClippedBases += data.nQClippedBases;
|
||||||
|
nRangeClippedBases += data.nRangeClippedBases;
|
||||||
|
nSeqClippedBases += data.nSeqClippedBases;
|
||||||
|
|
||||||
|
for (String seqClip : data.seqClipCounts.keySet()) {
|
||||||
|
Long count = data.seqClipCounts.get(seqClip);
|
||||||
|
if (seqClipCounts.containsKey(seqClip))
|
||||||
|
count += seqClipCounts.get(seqClip);
|
||||||
|
seqClipCounts.put(seqClip, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder s = new StringBuilder();
|
StringBuilder s = new StringBuilder();
|
||||||
|
|
||||||
|
|
@ -439,4 +455,27 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
return s.toString();
|
return s.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public class ReadClipperWithData extends ReadClipper {
|
||||||
|
private ClippingData data;
|
||||||
|
|
||||||
|
public ReadClipperWithData(SAMRecord read, List<SeqToClip> clipSeqs) {
|
||||||
|
super(read);
|
||||||
|
data = new ClippingData(clipSeqs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ClippingData getData() {
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setData(ClippingData data) {
|
||||||
|
this.data = data;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addData(ClippingData data) {
|
||||||
|
this.data.addData(data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -49,7 +50,7 @@ import java.util.List;
|
||||||
public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
|
public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
|
||||||
|
|
||||||
@Input(fullName = "variant", shortName = "V", doc="variants to model", required=false)
|
@Input(fullName = "variant", shortName = "V", doc="variants to model", required=false)
|
||||||
public List<RodBinding<VariantContext>> variants;
|
public List<RodBinding<VariantContext>> variants = Collections.emptyList();
|
||||||
|
|
||||||
@Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false)
|
@Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false)
|
||||||
public RodBinding<VariantContext> snpmask;
|
public RodBinding<VariantContext> snpmask;
|
||||||
|
|
@ -66,17 +67,18 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
|
||||||
String refBase = String.valueOf((char)ref.getBase());
|
String refBase = String.valueOf((char)ref.getBase());
|
||||||
|
|
||||||
// Check to see if we have a called snp
|
// Check to see if we have a called snp
|
||||||
for ( VariantContext vc : tracker.getValues(VariantContext.class) ) {
|
for ( VariantContext vc : tracker.getValues(variants) ) {
|
||||||
if ( ! vc.getSource().equals(snpmask.getName())) {
|
if ( vc.isFiltered() )
|
||||||
if ( vc.isDeletion()) {
|
continue;
|
||||||
deletionBasesRemaining = vc.getReference().length();
|
|
||||||
// delete the next n bases, not this one
|
if ( vc.isDeletion()) {
|
||||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
|
deletionBasesRemaining = vc.getReference().length();
|
||||||
} else if ( vc.isInsertion()) {
|
// delete the next n bases, not this one
|
||||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString()));
|
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
|
||||||
} else if (vc.isSNP()) {
|
} else if ( vc.isInsertion()) {
|
||||||
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
|
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString()));
|
||||||
}
|
} else if (vc.isSNP()) {
|
||||||
|
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,16 +2,18 @@ package org.broadinstitute.sting.gatk.walkers.qc;
|
||||||
|
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -23,6 +25,9 @@ public class CountIntervals extends RefWalker<Long, Long> {
|
||||||
@Output
|
@Output
|
||||||
PrintStream out;
|
PrintStream out;
|
||||||
|
|
||||||
|
@Input(fullName="check", shortName = "check", doc="Any number of RODs", required=false)
|
||||||
|
public List<RodBinding<Feature>> features = Collections.emptyList();
|
||||||
|
|
||||||
@Argument(fullName="numOverlaps",shortName="no",doc="Count all occurrences of X or more overlapping intervals; defaults to 2", required=false)
|
@Argument(fullName="numOverlaps",shortName="no",doc="Count all occurrences of X or more overlapping intervals; defaults to 2", required=false)
|
||||||
int numOverlaps = 2;
|
int numOverlaps = 2;
|
||||||
|
|
||||||
|
|
@ -37,7 +42,7 @@ public class CountIntervals extends RefWalker<Long, Long> {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
List<Feature> checkIntervals = tracker.getValues(Feature.class, "check");
|
List<Feature> checkIntervals = tracker.getValues(features);
|
||||||
return (long) checkIntervals.size();
|
return (long) checkIntervals.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@ import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
||||||
import java.util.Vector;
|
import java.util.Vector;
|
||||||
|
|
@ -16,22 +18,14 @@ import java.util.Vector;
|
||||||
* according to the wishes of the supplid ClippingAlgorithm enum.
|
* according to the wishes of the supplid ClippingAlgorithm enum.
|
||||||
*/
|
*/
|
||||||
public class ClippingOp {
|
public class ClippingOp {
|
||||||
public final ClippingType type;
|
|
||||||
public final int start, stop; // inclusive
|
public final int start, stop; // inclusive
|
||||||
public final Object extraInfo;
|
|
||||||
|
|
||||||
public ClippingOp(int start, int stop) {
|
public ClippingOp(int start, int stop) {
|
||||||
this(null, start, stop, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
public ClippingOp(ClippingType type, int start, int stop, Object extraInfo) {
|
|
||||||
// todo -- remove type and extra info
|
|
||||||
this.type = type;
|
|
||||||
this.start = start;
|
this.start = start;
|
||||||
this.stop = stop;
|
this.stop = stop;
|
||||||
this.extraInfo = extraInfo;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public int getLength() {
|
public int getLength() {
|
||||||
return stop - start + 1;
|
return stop - start + 1;
|
||||||
}
|
}
|
||||||
|
|
@ -72,48 +66,45 @@ public class ClippingOp {
|
||||||
break;
|
break;
|
||||||
case HARDCLIP_BASES:
|
case HARDCLIP_BASES:
|
||||||
case SOFTCLIP_BASES:
|
case SOFTCLIP_BASES:
|
||||||
if ( ! clippedRead.getReadUnmappedFlag() ) {
|
if ( clippedRead.getReadUnmappedFlag() ) {
|
||||||
// we can't process unmapped reads
|
// we can't process unmapped reads
|
||||||
|
throw new UserException("Read Clipper cannot soft/hard clip unmapped reads");
|
||||||
//System.out.printf("%d %d %d%n", stop, start, clippedRead.getReadLength());
|
|
||||||
int myStop = stop;
|
|
||||||
if ( (stop + 1 - start) == clippedRead.getReadLength() ) {
|
|
||||||
// BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it alone
|
|
||||||
//Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be represented with SOFTCLIP_BASES, just leaving it alone", clippedRead.getReadName()));
|
|
||||||
//break;
|
|
||||||
myStop--; // just decrement stop
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( start > 0 && myStop != clippedRead.getReadLength() - 1 )
|
|
||||||
throw new RuntimeException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d",
|
|
||||||
clippedRead.getReadName(), start, myStop));
|
|
||||||
|
|
||||||
Cigar oldCigar = clippedRead.getCigar();
|
|
||||||
|
|
||||||
int scLeft = 0, scRight = clippedRead.getReadLength();
|
|
||||||
if ( start == 0 )
|
|
||||||
scLeft = myStop + 1;
|
|
||||||
else
|
|
||||||
scRight = start;
|
|
||||||
|
|
||||||
Cigar newCigar = softClip(oldCigar, scLeft, scRight);
|
|
||||||
clippedRead.setCigar(newCigar);
|
|
||||||
|
|
||||||
int newClippedStart = getNewAlignmentStartOffset(newCigar, oldCigar);
|
|
||||||
int newStart = clippedRead.getAlignmentStart() + newClippedStart;
|
|
||||||
clippedRead.setAlignmentStart(newStart);
|
|
||||||
|
|
||||||
if ( algorithm == ClippingRepresentation.HARDCLIP_BASES )
|
|
||||||
clippedRead = ReadUtils.hardClipSoftClippedBases(clippedRead);
|
|
||||||
//System.out.printf("%s clipping at %d %d / %d %d => %s and %d%n", oldCigar.toString(), start, stop, scLeft, scRight, newCigar.toString(), newStart);
|
|
||||||
} else if ( algorithm == ClippingRepresentation.HARDCLIP_BASES ) {
|
|
||||||
// we can hard clip unmapped reads
|
|
||||||
if ( clippedRead.getReadNegativeStrandFlag() )
|
|
||||||
clippedRead = ReadUtils.hardClipBases(clippedRead, 0, start, null);
|
|
||||||
else
|
|
||||||
clippedRead = ReadUtils.hardClipBases(clippedRead, start, start + getLength(), null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//System.out.printf("%d %d %d%n", stop, start, clippedRead.getReadLength());
|
||||||
|
int myStop = stop;
|
||||||
|
if ( (stop + 1 - start) == clippedRead.getReadLength() ) {
|
||||||
|
// BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it alone
|
||||||
|
//Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be represented with SOFTCLIP_BASES, just leaving it alone", clippedRead.getReadName()));
|
||||||
|
//break;
|
||||||
|
myStop--; // just decrement stop
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( start > 0 && myStop != clippedRead.getReadLength() - 1 )
|
||||||
|
throw new RuntimeException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d",
|
||||||
|
clippedRead.getReadName(), start, myStop));
|
||||||
|
|
||||||
|
Cigar oldCigar = clippedRead.getCigar();
|
||||||
|
|
||||||
|
int scLeft = 0, scRight = clippedRead.getReadLength();
|
||||||
|
if ( start == 0 )
|
||||||
|
scLeft = myStop + 1;
|
||||||
|
else
|
||||||
|
scRight = start;
|
||||||
|
|
||||||
|
Cigar newCigar = softClip(oldCigar, scLeft, scRight);
|
||||||
|
clippedRead.setCigar(newCigar);
|
||||||
|
|
||||||
|
int newClippedStart = getNewAlignmentStartOffset(newCigar, oldCigar);
|
||||||
|
int newStart = clippedRead.getAlignmentStart() + newClippedStart;
|
||||||
|
clippedRead.setAlignmentStart(newStart);
|
||||||
|
|
||||||
|
if ( algorithm == ClippingRepresentation.HARDCLIP_BASES )
|
||||||
|
clippedRead = ReadUtils.hardClipSoftClippedBases(clippedRead);
|
||||||
|
//System.out.printf("%s clipping at %d %d / %d %d => %s and %d%n", oldCigar.toString(), start, stop, scLeft, scRight, newCigar.toString(), newStart);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
throw new IllegalStateException("Unexpected Clipping operator type " + algorithm);
|
throw new IllegalStateException("Unexpected Clipping operator type " + algorithm);
|
||||||
}
|
}
|
||||||
|
|
@ -121,15 +112,6 @@ public class ClippingOp {
|
||||||
return clippedRead;
|
return clippedRead;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* What is the type of a ClippingOp?
|
|
||||||
*/
|
|
||||||
public enum ClippingType {
|
|
||||||
LOW_Q_SCORES,
|
|
||||||
WITHIN_CLIP_RANGE,
|
|
||||||
MATCHES_CLIP_SEQ
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given a cigar string, get the number of bases hard or soft clipped at the start
|
* Given a cigar string, get the number of bases hard or soft clipped at the start
|
||||||
*/
|
*/
|
||||||
|
|
@ -198,7 +180,7 @@ public class ClippingOp {
|
||||||
Vector<CigarElement> newElements = new Vector<CigarElement>();
|
Vector<CigarElement> newElements = new Vector<CigarElement>();
|
||||||
for (CigarElement curElem : __cigar.getCigarElements()) {
|
for (CigarElement curElem : __cigar.getCigarElements()) {
|
||||||
if (!curElem.getOperator().consumesReadBases()) {
|
if (!curElem.getOperator().consumesReadBases()) {
|
||||||
if (curLength > __startClipEnd && curLength < __endClipBegin) {
|
if (curElem.getOperator() == CigarOperator.HARD_CLIP || curLength > __startClipEnd && curLength < __endClipBegin) {
|
||||||
newElements.add(new CigarElement(curElem.getLength(), curElem.getOperator()));
|
newElements.add(new CigarElement(curElem.getLength(), curElem.getOperator()));
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,10 @@
|
||||||
package org.broadinstitute.sting.utils.clipreads;
|
package org.broadinstitute.sting.utils.clipreads;
|
||||||
|
|
||||||
|
import net.sf.samtools.Cigar;
|
||||||
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
import org.jets3t.service.multi.ThreadedStorageService;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -43,6 +47,23 @@ public class ReadClipper {
|
||||||
return read;
|
return read;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||||
|
int start = ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
|
||||||
|
int stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
|
||||||
|
this.addOp(new ClippingOp(start, stop));
|
||||||
|
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMRecord hardClipByReadCoordinates(int start, int stop) {
|
||||||
|
this.addOp(new ClippingOp(start, stop));
|
||||||
|
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
|
||||||
|
this.read = hardClipByReferenceCoordinates(read.getUnclippedStart(), left);
|
||||||
|
this.ops = null; // reset the operations
|
||||||
|
return hardClipByReferenceCoordinates(right, read.getUnclippedEnd());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return a new read corresponding to this.read that's been clipped according to ops, if any are present.
|
* Return a new read corresponding to this.read that's been clipped according to ops, if any are present.
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.sam;
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
|
@ -112,7 +113,42 @@ public class ReadUtils {
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public enum OverlapType { NOT_OVERLAPPING, IN_ADAPTOR }
|
public enum OverlapType { NOT_OVERLAPPING, IN_ADAPTOR}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This enum represents all the different ways in which a read can overlap an interval.
|
||||||
|
*
|
||||||
|
* NO_OVERLAP:
|
||||||
|
* the read does not overlap the interval.
|
||||||
|
*
|
||||||
|
* |----------------| (interval)
|
||||||
|
* <----------------> (read)
|
||||||
|
*
|
||||||
|
* LEFT_OVERLAP:
|
||||||
|
* the read starts before the beginning of the interval but ends inside of it
|
||||||
|
*
|
||||||
|
* |----------------| (interval)
|
||||||
|
* <----------------> (read)
|
||||||
|
*
|
||||||
|
* RIGHT_OVERLAP:
|
||||||
|
* the read starts inside the interval but ends outside of it
|
||||||
|
*
|
||||||
|
* |----------------| (interval)
|
||||||
|
* <----------------> (read)
|
||||||
|
*
|
||||||
|
* FULL_OVERLAP:
|
||||||
|
* the read starts before the interval and ends after the interval
|
||||||
|
*
|
||||||
|
* |-----------| (interval)
|
||||||
|
* <-------------------> (read)
|
||||||
|
*
|
||||||
|
* CONTAINED:
|
||||||
|
* the read starts and ends inside the interval
|
||||||
|
*
|
||||||
|
* |----------------| (interval)
|
||||||
|
* <--------> (read)
|
||||||
|
*/
|
||||||
|
public enum ReadAndIntervalOverlap {NO_OVERLAP, LEFT_OVERLAP, RIGHT_OVERLAP, FULL_OVERLAP, CONTAINED}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* God, there's a huge information asymmetry in SAM format:
|
* God, there's a huge information asymmetry in SAM format:
|
||||||
|
|
@ -396,16 +432,34 @@ public class ReadUtils {
|
||||||
keepEnd = rec.getReadLength() - l - 1;
|
keepEnd = rec.getReadLength() - l - 1;
|
||||||
newCigarElements.add(new CigarElement(l, CigarOperator.HARD_CLIP));
|
newCigarElements.add(new CigarElement(l, CigarOperator.HARD_CLIP));
|
||||||
break;
|
break;
|
||||||
case H:
|
|
||||||
// TODO -- must be handled specially
|
|
||||||
throw new ReviewedStingException("BUG: tell mark he forgot to implement this");
|
|
||||||
default:
|
default:
|
||||||
newCigarElements.add(ce);
|
newCigarElements.add(ce);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return hardClipBases(rec, keepStart, keepEnd, newCigarElements);
|
// Merges tandem cigar elements like 5H10H or 2S5S to 15H or 7S
|
||||||
|
// this will happen if you soft clip a read that has been hard clipped before
|
||||||
|
// like: 5H20S => 5H20H
|
||||||
|
List<CigarElement> mergedCigarElements = new LinkedList<CigarElement>();
|
||||||
|
Iterator<CigarElement> cigarElementIterator = newCigarElements.iterator();
|
||||||
|
CigarOperator currentOperator = null;
|
||||||
|
int currentOperatorLength = 0;
|
||||||
|
while (cigarElementIterator.hasNext()) {
|
||||||
|
CigarElement cigarElement = cigarElementIterator.next();
|
||||||
|
if (currentOperator != cigarElement.getOperator()) {
|
||||||
|
if (currentOperator != null)
|
||||||
|
mergedCigarElements.add(new CigarElement(currentOperatorLength, currentOperator));
|
||||||
|
currentOperator = cigarElement.getOperator();
|
||||||
|
currentOperatorLength = cigarElement.getLength();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
currentOperatorLength += cigarElement.getLength();
|
||||||
|
}
|
||||||
|
mergedCigarElements.add(new CigarElement(currentOperatorLength, currentOperator));
|
||||||
|
|
||||||
|
return hardClipBases(rec, keepStart, keepEnd, mergedCigarElements);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -424,8 +478,7 @@ public class ReadUtils {
|
||||||
"keepEnd < rec.getReadLength()",
|
"keepEnd < rec.getReadLength()",
|
||||||
"rec.getReadUnmappedFlag() || newCigarElements != null"})
|
"rec.getReadUnmappedFlag() || newCigarElements != null"})
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
public static SAMRecord hardClipBases(SAMRecord rec, int keepStart, int keepEnd,
|
public static SAMRecord hardClipBases(SAMRecord rec, int keepStart, int keepEnd, List<CigarElement> newCigarElements) {
|
||||||
List<CigarElement> newCigarElements) {
|
|
||||||
int newLength = keepEnd - keepStart + 1;
|
int newLength = keepEnd - keepStart + 1;
|
||||||
if ( newLength != rec.getReadLength() ) {
|
if ( newLength != rec.getReadLength() ) {
|
||||||
try {
|
try {
|
||||||
|
|
@ -569,7 +622,71 @@ public class ReadUtils {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines what is the position of the read in relation to the interval.
|
||||||
|
* Note: This function uses the UNCLIPPED ENDS of the reads for the comparison.
|
||||||
|
* @param read the read
|
||||||
|
* @param interval the interval
|
||||||
|
* @return the overlap type as described by ReadAndIntervalOverlap enum (see above)
|
||||||
|
*/
|
||||||
|
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) {
|
||||||
|
if ( (!read.getReferenceName().equals(interval.getContig())) ||
|
||||||
|
(read.getUnclippedEnd() < interval.getStart()) ||
|
||||||
|
(read.getUnclippedStart() > interval.getStop()) )
|
||||||
|
return ReadAndIntervalOverlap.NO_OVERLAP;
|
||||||
|
|
||||||
|
else if ( (read.getUnclippedStart() >= interval.getStart()) &&
|
||||||
|
(read.getUnclippedEnd() <= interval.getStop()) )
|
||||||
|
return ReadAndIntervalOverlap.CONTAINED;
|
||||||
|
|
||||||
|
else if ( (read.getUnclippedStart() < interval.getStart()) &&
|
||||||
|
(read.getUnclippedEnd() > interval.getStop()) )
|
||||||
|
return ReadAndIntervalOverlap.FULL_OVERLAP;
|
||||||
|
|
||||||
|
else if ( (read.getAlignmentStart() < interval.getStart()) )
|
||||||
|
return ReadAndIntervalOverlap.LEFT_OVERLAP;
|
||||||
|
|
||||||
|
else
|
||||||
|
return ReadAndIntervalOverlap.RIGHT_OVERLAP;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
|
||||||
|
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
||||||
|
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
|
||||||
|
int readBases = 0;
|
||||||
|
int refBases = 0;
|
||||||
|
int goal = refCoord - read.getUnclippedStart(); // read coords are 0-based!
|
||||||
|
boolean goalReached = false;
|
||||||
|
|
||||||
|
Iterator<CigarElement> cigarElementIterator = read.getCigar().getCigarElements().iterator();
|
||||||
|
while (!goalReached && cigarElementIterator.hasNext()) {
|
||||||
|
CigarElement cigarElement = cigarElementIterator.next();
|
||||||
|
int shift = 0;
|
||||||
|
if (refBases == 0 && readBases == 0 && cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
|
||||||
|
goal -= cigarElement.getLength();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cigarElement.getOperator().consumesReferenceBases()) {
|
||||||
|
if (refBases + cigarElement.getLength() < goal) {
|
||||||
|
shift = cigarElement.getLength();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
shift = goal - refBases;
|
||||||
|
}
|
||||||
|
refBases += shift;
|
||||||
|
}
|
||||||
|
goalReached = refBases == goal;
|
||||||
|
|
||||||
|
if (cigarElement.getOperator().consumesReadBases()) {
|
||||||
|
readBases += goalReached ? shift : cigarElement.getLength();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!goalReached)
|
||||||
|
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
||||||
|
|
||||||
|
return readBases;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue