GATKSAMRecord refactor
The GATK engine will now provide a GATKSAMRecord to all tools which incorporates the functionality used by the GATK to the bam file (ReadGroups, Reduced Reads, ...). * No tools should create SAMRecord anymore, use GATKSAMRecord instead *
This commit is contained in:
parent
f1df6c0c81
commit
e89ff063fc
|
|
@ -25,7 +25,6 @@
|
|||
|
||||
package org.broadinstitute.sting.alignment;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||
|
|
@ -35,6 +34,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
|
|
@ -72,12 +72,13 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
/**
|
||||
* Aligns a read to the given reference.
|
||||
*
|
||||
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
|
||||
* @param read Read to align.
|
||||
* @return Number of reads aligned by this map (aka 1).
|
||||
*/
|
||||
@Override
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
//logger.info(String.format("examining read %s", read.getReadName()));
|
||||
|
||||
byte[] bases = read.getReadBases();
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
|||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
|
|
@ -92,12 +93,13 @@ public class AlignmentWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
/**
|
||||
* Aligns a read to the given reference.
|
||||
*
|
||||
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
|
||||
* @param read Read to align.
|
||||
* @return Number of alignments found for this read.
|
||||
*/
|
||||
@Override
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
SAMRecord alignedRead = aligner.align(read,header);
|
||||
out.addAlignment(alignedRead);
|
||||
return 1;
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@
|
|||
|
||||
package org.broadinstitute.sting.alignment;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||
|
|
@ -34,6 +33,7 @@ import org.broadinstitute.sting.commandline.Output;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.Iterator;
|
||||
|
|
@ -79,12 +79,13 @@ public class CountBestAlignmentsWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
/**
|
||||
* Aligns a read to the given reference.
|
||||
*
|
||||
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
|
||||
* @param read Read to align.
|
||||
* @return Number of alignments found for this read.
|
||||
*/
|
||||
@Override
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
Iterator<Alignment[]> alignmentIterator = aligner.getAllAlignments(read.getReadBases()).iterator();
|
||||
if(alignmentIterator.hasNext()) {
|
||||
int numAlignments = alignmentIterator.next().length;
|
||||
|
|
|
|||
|
|
@ -25,12 +25,12 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.contexts;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -130,7 +130,7 @@ public class AlignmentContext implements HasGenomeLocation {
|
|||
*/
|
||||
@Deprecated
|
||||
//todo: unsafe and tailored for current usage only; both pileups can be null or worse, bot can be not null in theory
|
||||
public List<SAMRecord> getReads() { return ( basePileup.getReads() ); }
|
||||
public List<GATKSAMRecord> getReads() { return ( basePileup.getReads() ); }
|
||||
|
||||
/**
|
||||
* Are there any reads associated with this locus?
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
|
@ -132,7 +132,7 @@ public class AllLocusView extends LocusView {
|
|||
* @param site Site at which to create the blank locus context.
|
||||
* @return empty context.
|
||||
*/
|
||||
private final static List<SAMRecord> EMPTY_PILEUP_READS = Collections.emptyList();
|
||||
private final static List<GATKSAMRecord> EMPTY_PILEUP_READS = Collections.emptyList();
|
||||
private final static List<Integer> EMPTY_PILEUP_OFFSETS = Collections.emptyList();
|
||||
private AlignmentContext createEmptyLocus( GenomeLoc site ) {
|
||||
return new AlignmentContext(site,new ReadBackedPileupImpl(site, EMPTY_PILEUP_READS, EMPTY_PILEUP_OFFSETS));
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
|||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileupImpl;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -377,10 +378,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
maxDeletionLength = Math.max(maxDeletionLength,state.getEventLength());
|
||||
}
|
||||
else nInsertions++;
|
||||
indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
|
||||
state.getReadEventStartOffset(),
|
||||
state.getEventLength(),
|
||||
state.getEventBases()) );
|
||||
indelPile.add ( new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), state.getReadEventStartOffset(), state.getEventLength(), state.getEventBases()) );
|
||||
|
||||
} else {
|
||||
// HACK: The readahead mechanism for LocusIteratorByState will effectively read past the current position
|
||||
|
|
@ -402,9 +400,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
// we count such reads (with a longer deletion spanning over a deletion at the previous base we are
|
||||
// about to report) only if includeReadsWithDeletionAtLoci is true.
|
||||
size++;
|
||||
indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
|
||||
state.getReadOffset()-1,
|
||||
-1) // length=-1 --> noevent
|
||||
indelPile.add ( new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), state.getReadOffset()-1, -1) // length=-1 --> noevent
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -442,12 +438,12 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
continue;
|
||||
} else {
|
||||
//observed_bases++;
|
||||
pile.add(new PileupElement(state.getRead(), state.getReadOffset()));
|
||||
pile.add(new PileupElement((GATKSAMRecord) state.getRead(), state.getReadOffset()));
|
||||
size++;
|
||||
}
|
||||
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||
size++;
|
||||
pile.add(new PileupElement(state.getRead(), -1));
|
||||
pile.add(new PileupElement((GATKSAMRecord) state.getRead(), -1));
|
||||
nDeletions++;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
|||
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -57,9 +58,9 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
|||
return "dups";
|
||||
}
|
||||
|
||||
private List<SAMRecord> readsAtLoc(final SAMRecord read, PushbackIterator<SAMRecord> iter) {
|
||||
private List<GATKSAMRecord> readsAtLoc(final GATKSAMRecord read, PushbackIterator<SAMRecord> iter) {
|
||||
GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
|
||||
ArrayList<SAMRecord> l = new ArrayList<SAMRecord>();
|
||||
ArrayList<GATKSAMRecord> l = new ArrayList<GATKSAMRecord>();
|
||||
|
||||
l.add(read);
|
||||
for (SAMRecord read2 : iter) {
|
||||
|
|
@ -70,7 +71,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
|||
iter.pushback(read2);
|
||||
break;
|
||||
} else {
|
||||
l.add(read2);
|
||||
l.add((GATKSAMRecord) read2);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -84,15 +85,15 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
|||
* @param reads the list of reads to split into unique molecular samples
|
||||
* @return
|
||||
*/
|
||||
protected Set<List<SAMRecord>> uniqueReadSets(List<SAMRecord> reads) {
|
||||
Set<List<SAMRecord>> readSets = new LinkedHashSet<List<SAMRecord>>();
|
||||
protected Set<List<GATKSAMRecord>> uniqueReadSets(List<GATKSAMRecord> reads) {
|
||||
Set<List<GATKSAMRecord>> readSets = new LinkedHashSet<List<GATKSAMRecord>>();
|
||||
|
||||
// for each read, find duplicates, and either add the read to its duplicate list or start a new one
|
||||
for ( SAMRecord read : reads ) {
|
||||
List<SAMRecord> readSet = findDuplicateReads(read, readSets);
|
||||
for ( GATKSAMRecord read : reads ) {
|
||||
List<GATKSAMRecord> readSet = findDuplicateReads(read, readSets);
|
||||
|
||||
if ( readSet == null ) {
|
||||
readSets.add(new ArrayList<SAMRecord>(Arrays.asList(read))); // copy so I can add to the list
|
||||
readSets.add(new ArrayList<GATKSAMRecord>(Arrays.asList(read))); // copy so I can add to the list
|
||||
} else {
|
||||
readSet.add(read);
|
||||
}
|
||||
|
|
@ -110,13 +111,13 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
|||
* @param readSets
|
||||
* @return The list of duplicate reads that read is a member of, or null if it's the only one of its kind
|
||||
*/
|
||||
protected List<SAMRecord> findDuplicateReads(SAMRecord read, Set<List<SAMRecord>> readSets ) {
|
||||
protected List<GATKSAMRecord> findDuplicateReads(GATKSAMRecord read, Set<List<GATKSAMRecord>> readSets ) {
|
||||
if ( read.getReadPairedFlag() ) {
|
||||
// paired
|
||||
final GenomeLoc readMateLoc = engine.getGenomeLocParser().createGenomeLoc(read.getMateReferenceName(), read.getMateAlignmentStart(), read.getMateAlignmentStart());
|
||||
|
||||
for (List<SAMRecord> reads : readSets) {
|
||||
SAMRecord key = reads.get(0);
|
||||
for (List<GATKSAMRecord> reads : readSets) {
|
||||
GATKSAMRecord key = reads.get(0);
|
||||
|
||||
// read and key start at the same place, and either the this read and the key
|
||||
// share a mate location or the read is flagged as a duplicate
|
||||
|
|
@ -131,8 +132,8 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
|||
}
|
||||
}
|
||||
} else {
|
||||
for (List<SAMRecord> reads : readSets) {
|
||||
SAMRecord key = reads.get(0);
|
||||
for (List<GATKSAMRecord> reads : readSets) {
|
||||
GATKSAMRecord key = reads.get(0);
|
||||
boolean v = (! key.getReadPairedFlag()) && read.getAlignmentStart() == key.getAlignmentStart() && ( key.getDuplicateReadFlag() || read.getDuplicateReadFlag() ) && read.getReadLength() == key.getReadLength();
|
||||
//System.out.printf("%s %s %b %b %d %d %d %d => %b%n",
|
||||
// read.getReadPairedFlag(), key.getReadPairedFlag(), read.getDuplicateReadFlag(), key.getDuplicateReadFlag(),
|
||||
|
|
@ -179,7 +180,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
|||
// get the genome loc from the read
|
||||
GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
|
||||
|
||||
Set<List<SAMRecord>> readSets = uniqueReadSets(readsAtLoc(read, iter));
|
||||
Set<List<GATKSAMRecord>> readSets = uniqueReadSets(readsAtLoc((GATKSAMRecord) read, iter));
|
||||
if ( DEBUG ) logger.debug(String.format("*** TraverseDuplicates.traverse at %s with %d read sets", site, readSets.size()));
|
||||
|
||||
// Jump forward in the reference to this locus location
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -100,9 +101,9 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
|
|||
// if the read is mapped, create a metadata tracker
|
||||
ReadMetaDataTracker tracker = (read.getReferenceIndex() >= 0) ? rodView.getReferenceOrderedDataForRead(read) : null;
|
||||
|
||||
final boolean keepMeP = walker.filter(refContext, read);
|
||||
final boolean keepMeP = walker.filter(refContext, (GATKSAMRecord) read);
|
||||
if (keepMeP) {
|
||||
M x = walker.map(refContext, read, tracker); // the tracker can be null
|
||||
M x = walker.map(refContext, (GATKSAMRecord) read, tracker); // the tracker can be null
|
||||
sum = walker.reduce(x, sum);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers;
|
|||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import org.broadinstitute.sting.commandline.Advanced;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
|
@ -43,6 +42,7 @@ import org.broadinstitute.sting.utils.clipreads.ClippingOp;
|
|||
import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation;
|
||||
import org.broadinstitute.sting.utils.clipreads.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -292,11 +292,12 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
|
|||
/**
|
||||
* The reads map function.
|
||||
*
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a SAMRecord
|
||||
* @param read the read itself, as a GATKSAMRecord
|
||||
* @return the ReadClipper object describing what should be done to clip this read
|
||||
*/
|
||||
public ReadClipperWithData map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public ReadClipperWithData map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
|
||||
if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES ) {
|
||||
read = ReadUtils.replaceSoftClipsWithMatches(read);
|
||||
|
|
@ -323,7 +324,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
|
|||
*/
|
||||
private void clipSequences(ReadClipperWithData clipper) {
|
||||
if (sequencesToClip != null) { // don't bother if we don't have any sequences to clip
|
||||
SAMRecord read = clipper.getRead();
|
||||
GATKSAMRecord read = clipper.getRead();
|
||||
ClippingData data = clipper.getData();
|
||||
|
||||
for (SeqToClip stc : sequencesToClip) {
|
||||
|
|
@ -360,7 +361,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
|
|||
* @param stop
|
||||
* @return
|
||||
*/
|
||||
private Pair<Integer, Integer> strandAwarePositions(SAMRecord read, int start, int stop) {
|
||||
private Pair<Integer, Integer> strandAwarePositions(GATKSAMRecord read, int start, int stop) {
|
||||
if (read.getReadNegativeStrandFlag())
|
||||
return new Pair<Integer, Integer>(read.getReadLength() - stop - 1, read.getReadLength() - start - 1);
|
||||
else
|
||||
|
|
@ -374,7 +375,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
|
|||
*/
|
||||
private void clipCycles(ReadClipperWithData clipper) {
|
||||
if (cyclesToClip != null) {
|
||||
SAMRecord read = clipper.getRead();
|
||||
GATKSAMRecord read = clipper.getRead();
|
||||
ClippingData data = clipper.getData();
|
||||
|
||||
for (Pair<Integer, Integer> p : cyclesToClip) { // iterate over each cycle range
|
||||
|
|
@ -416,7 +417,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
|
|||
* @param clipper
|
||||
*/
|
||||
private void clipBadQualityScores(ReadClipperWithData clipper) {
|
||||
SAMRecord read = clipper.getRead();
|
||||
GATKSAMRecord read = clipper.getRead();
|
||||
ClippingData data = clipper.getData();
|
||||
int readLen = read.getReadBases().length;
|
||||
byte[] quals = read.getBaseQualities();
|
||||
|
|
@ -458,7 +459,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
|
|||
if ( clipper == null )
|
||||
return data;
|
||||
|
||||
SAMRecord clippedRead = clipper.clipRead(clippingRepresentation);
|
||||
GATKSAMRecord clippedRead = clipper.clipRead(clippingRepresentation);
|
||||
if (outputBam != null) {
|
||||
outputBam.addAlignment(clippedRead);
|
||||
} else {
|
||||
|
|
@ -575,7 +576,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
|
|||
public class ReadClipperWithData extends ReadClipper {
|
||||
private ClippingData data;
|
||||
|
||||
public ReadClipperWithData(SAMRecord read, List<SeqToClip> clipSeqs) {
|
||||
public ReadClipperWithData(GATKSAMRecord read, List<SeqToClip> clipSeqs) {
|
||||
super(read);
|
||||
data = new ClippingData(clipSeqs);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
|
@ -20,11 +20,11 @@ import java.util.Set;
|
|||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class})
|
||||
public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets ) {
|
||||
public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<GATKSAMRecord>> readSets ) {
|
||||
return true; // We are keeping all the reads
|
||||
}
|
||||
|
||||
public abstract MapType map(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets );
|
||||
public abstract MapType map(GenomeLoc loc, AlignmentContext context, Set<List<GATKSAMRecord>> readSets );
|
||||
|
||||
// Given result of map function
|
||||
public abstract ReduceType reduceInit();
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -71,21 +72,23 @@ public class FindReadsWithNamesWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
/**
|
||||
* The reads filter function.
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a SAMRecord
|
||||
* @return true if the read passes the filter, false if it doesn't
|
||||
*/
|
||||
public boolean filter(ReferenceContext ref, SAMRecord read) {
|
||||
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
|
||||
return namesToKeep.contains(read.getReadName());
|
||||
}
|
||||
|
||||
/**
|
||||
* The reads map function.
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a SAMRecord
|
||||
* @return the read itself
|
||||
*/
|
||||
public SAMRecord map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
return read;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.text.DecimalFormat;
|
||||
|
|
@ -119,7 +119,7 @@ public class FlagStatWalker extends ReadWalker<Integer, Integer> {
|
|||
|
||||
private FlagStat myStat = new FlagStat();
|
||||
|
||||
public Integer map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
public Integer map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
myStat.readCount++;
|
||||
if (read.getReadFailsVendorQualityCheckFlag()) {
|
||||
myStat.QC_failure++;
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ import java.util.TreeSet;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
|
||||
|
|
@ -136,11 +137,12 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
|
||||
/**
|
||||
* The reads filter function.
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a SAMRecord
|
||||
* @return true if the read passes the filter, false if it doesn't
|
||||
*/
|
||||
public boolean filter(ReferenceContext ref, SAMRecord read) {
|
||||
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
|
||||
// check the read group
|
||||
if ( readGroup != null ) {
|
||||
SAMReadGroupRecord myReadGroup = read.getReadGroup();
|
||||
|
|
@ -180,11 +182,12 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
|
||||
/**
|
||||
* The reads map function.
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a SAMRecord
|
||||
* @return the read itself
|
||||
*/
|
||||
public SAMRecord map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
return read;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -20,11 +21,11 @@ public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, Re
|
|||
/** Must return true for reads that need to be processed. Reads, for which this method return false will
|
||||
* be skipped by the engine and never passed to the walker.
|
||||
*/
|
||||
public boolean filter(ReferenceContext ref, SAMRecord read) {
|
||||
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
|
||||
// We are keeping all the reads
|
||||
return true;
|
||||
}
|
||||
|
||||
// Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
|
||||
public abstract MapType map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker);
|
||||
public abstract MapType map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -60,7 +61,7 @@ public class SplitSamFileWalker extends ReadWalker<SAMRecord, Map<String, SAMFil
|
|||
logger.info("SplitSamFile version: " + VERSION);
|
||||
}
|
||||
|
||||
public SAMRecord map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public SAMRecord map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
return read;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.diagnostics;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.List;
|
||||
|
|
@ -69,12 +69,12 @@ public class ReadLengthDistribution extends ReadWalker<Integer, Integer> {
|
|||
|
||||
}
|
||||
|
||||
public boolean filter(ReferenceContext ref, SAMRecord read) {
|
||||
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
|
||||
return ( !read.getReadPairedFlag() || read.getReadPairedFlag() && read.getFirstOfPairFlag());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer map(ReferenceContext referenceContext, SAMRecord samRecord, ReadMetaDataTracker readMetaDataTracker) {
|
||||
public Integer map(ReferenceContext referenceContext, GATKSAMRecord samRecord, ReadMetaDataTracker readMetaDataTracker) {
|
||||
GATKReportTable table = report.getTable("ReadLengthDistribution");
|
||||
|
||||
int length = Math.abs(samRecord.getReadLength());
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -180,8 +181,8 @@ public class ConstrainedMateFixingManager {
|
|||
addRead(newRead, readWasModified, true);
|
||||
}
|
||||
|
||||
public void addReads(List<SAMRecord> newReads, Set<SAMRecord> modifiedReads) {
|
||||
for ( SAMRecord newRead : newReads )
|
||||
public void addReads(List<GATKSAMRecord> newReads, Set<GATKSAMRecord> modifiedReads) {
|
||||
for ( GATKSAMRecord newRead : newReads )
|
||||
addRead(newRead, modifiedReads.contains(newRead), false);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ import org.broadinstitute.sting.utils.exceptions.StingException;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.NWaySAMFileWriter;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||
|
|
@ -281,10 +282,10 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
|
||||
// the reads and known indels that fall into the current interval
|
||||
private final ReadBin readsToClean = new ReadBin();
|
||||
private final ArrayList<SAMRecord> readsNotToClean = new ArrayList<SAMRecord>();
|
||||
private final ArrayList<GATKSAMRecord> readsNotToClean = new ArrayList<GATKSAMRecord>();
|
||||
private final ArrayList<VariantContext> knownIndelsToTry = new ArrayList<VariantContext>();
|
||||
private final HashSet<Object> indelRodsSeen = new HashSet<Object>();
|
||||
private final HashSet<SAMRecord> readsActuallyCleaned = new HashSet<SAMRecord>();
|
||||
private final HashSet<GATKSAMRecord> readsActuallyCleaned = new HashSet<GATKSAMRecord>();
|
||||
|
||||
private static final int MAX_QUAL = 99;
|
||||
|
||||
|
|
@ -469,7 +470,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
readsActuallyCleaned.clear();
|
||||
}
|
||||
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
if ( currentInterval == null ) {
|
||||
emit(read);
|
||||
return 0;
|
||||
|
|
@ -535,7 +536,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
// TODO -- it would be nice if we could use indels from 454 reads as alternate consenses
|
||||
}
|
||||
|
||||
private void cleanAndCallMap(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker, GenomeLoc readLoc) {
|
||||
private void cleanAndCallMap(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker, GenomeLoc readLoc) {
|
||||
if ( readsToClean.size() > 0 ) {
|
||||
GenomeLoc earliestPossibleMove = getToolkit().getGenomeLocParser().createGenomeLoc(readsToClean.getReads().get(0));
|
||||
if ( manager.canMoveReads(earliestPossibleMove) )
|
||||
|
|
@ -656,14 +657,14 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
|
||||
private void clean(ReadBin readsToClean) {
|
||||
|
||||
final List<SAMRecord> reads = readsToClean.getReads();
|
||||
final List<GATKSAMRecord> reads = readsToClean.getReads();
|
||||
if ( reads.size() == 0 )
|
||||
return;
|
||||
|
||||
byte[] reference = readsToClean.getReference(referenceReader);
|
||||
int leftmostIndex = readsToClean.getLocation().getStart();
|
||||
|
||||
final ArrayList<SAMRecord> refReads = new ArrayList<SAMRecord>(); // reads that perfectly match ref
|
||||
final ArrayList<GATKSAMRecord> refReads = new ArrayList<GATKSAMRecord>(); // reads that perfectly match ref
|
||||
final ArrayList<AlignedRead> altReads = new ArrayList<AlignedRead>(); // reads that don't perfectly match
|
||||
final LinkedList<AlignedRead> altAlignmentsToTest = new LinkedList<AlignedRead>(); // should we try to make an alt consensus from the read?
|
||||
final Set<Consensus> altConsenses = new LinkedHashSet<Consensus>(); // list of alt consenses
|
||||
|
|
@ -815,7 +816,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
// however we don't have enough info to use the proper MAQ scoring system.
|
||||
// For now, we will just arbitrarily add 10 to the mapping quality. [EB, 6/7/2010].
|
||||
// TODO -- we need a better solution here
|
||||
SAMRecord read = aRead.getRead();
|
||||
GATKSAMRecord read = aRead.getRead();
|
||||
read.setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + 10, 254));
|
||||
|
||||
// before we fix the attribute tags we first need to make sure we have enough of the reference sequence
|
||||
|
|
@ -874,8 +875,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
private long determineReadsThatNeedCleaning(final List<SAMRecord> reads,
|
||||
final ArrayList<SAMRecord> refReadsToPopulate,
|
||||
private long determineReadsThatNeedCleaning(final List<GATKSAMRecord> reads,
|
||||
final ArrayList<GATKSAMRecord> refReadsToPopulate,
|
||||
final ArrayList<AlignedRead> altReadsToPopulate,
|
||||
final LinkedList<AlignedRead> altAlignmentsToTest,
|
||||
final Set<Consensus> altConsenses,
|
||||
|
|
@ -884,7 +885,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
|
||||
long totalRawMismatchSum = 0L;
|
||||
|
||||
for ( final SAMRecord read : reads ) {
|
||||
for ( final GATKSAMRecord read : reads ) {
|
||||
|
||||
// we can not deal with screwy records
|
||||
if ( read.getCigar().numCigarElements() == 0 ) {
|
||||
|
|
@ -1372,7 +1373,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
private class AlignedRead {
|
||||
private final SAMRecord read;
|
||||
private final GATKSAMRecord read;
|
||||
private byte[] readBases = null;
|
||||
private byte[] baseQuals = null;
|
||||
private Cigar newCigar = null;
|
||||
|
|
@ -1380,12 +1381,12 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
private int mismatchScoreToReference = 0;
|
||||
private long alignerMismatchScore = 0;
|
||||
|
||||
public AlignedRead(SAMRecord read) {
|
||||
public AlignedRead(GATKSAMRecord read) {
|
||||
this.read = read;
|
||||
mismatchScoreToReference = 0;
|
||||
}
|
||||
|
||||
public SAMRecord getRead() {
|
||||
public GATKSAMRecord getRead() {
|
||||
return read;
|
||||
}
|
||||
|
||||
|
|
@ -1569,7 +1570,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
|
||||
private class ReadBin implements HasGenomeLocation {
|
||||
|
||||
private final ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
private final ArrayList<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
|
||||
private byte[] reference = null;
|
||||
private GenomeLoc loc = null;
|
||||
|
||||
|
|
@ -1577,7 +1578,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
|
||||
// Return false if we can't process this read bin because the reads are not correctly overlapping.
|
||||
// This can happen if e.g. there's a large known indel with no overlapping reads.
|
||||
public void add(SAMRecord read) {
|
||||
public void add(GATKSAMRecord read) {
|
||||
|
||||
GenomeLoc locForRead = getToolkit().getGenomeLocParser().createGenomeLoc(read);
|
||||
if ( loc == null )
|
||||
|
|
@ -1588,7 +1589,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
reads.add(read);
|
||||
}
|
||||
|
||||
public List<SAMRecord> getReads() { return reads; }
|
||||
public List<GATKSAMRecord> getReads() { return reads; }
|
||||
|
||||
public byte[] getReference(IndexedFastaSequenceFile referenceReader) {
|
||||
// set up the reference if we haven't done so yet
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
|||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -88,7 +89,7 @@ public class LeftAlignIndels extends ReadWalker<Integer, Integer> {
|
|||
writer.addAlignment(read);
|
||||
}
|
||||
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
// we can not deal with screwy records
|
||||
if ( read.getCigar().numCigarElements() == 0 ) {
|
||||
emit(read);
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
|||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.interval.OverlappingIntervalIterator;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
@ -394,7 +395,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
|
||||
@Override
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
|
||||
// if ( read.getReadName().equals("428EFAAXX090610:2:36:1384:639#0") ) System.out.println("GOT READ");
|
||||
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.samples.Gender;
|
||||
|
|
@ -32,6 +31,7 @@ import org.broadinstitute.sting.gatk.samples.Sample;
|
|||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* Walks over the input data set, calculating the number of reads seen for diagnostic purposes.
|
||||
|
|
@ -40,7 +40,7 @@ import org.broadinstitute.sting.gatk.walkers.Requires;
|
|||
*/
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class CountMalesWalker extends ReadWalker<Integer, Integer> {
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
|
||||
Sample sample = getSampleDB().getSample(read);
|
||||
return sample.getGender() == Gender.MALE ? 1 : 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* Walks over the input data set, calculating the number of reads seen for diagnostic purposes.
|
||||
|
|
@ -38,7 +38,7 @@ import org.broadinstitute.sting.gatk.walkers.Requires;
|
|||
*/
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class CountReadsWalker extends ReadWalker<Integer, Integer> {
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,434 +1,434 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.utils.collections.PrimitivePair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Apr 9, 2010
|
||||
* Time: 12:16:41 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Walks over the input data set, calculating the number of reads seen for diagnostic purposes.
|
||||
* Can also count the number of reads matching a given criterion using read filters (see the
|
||||
* --read-filter command line argument). Simplest example of a read-backed analysis.
|
||||
*/
|
||||
@Requires({DataSource.READS})
|
||||
public class CycleQualityWalker extends ReadWalker<Integer,Integer> {
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
||||
@Argument(fullName="mappedOnly", shortName="mo", doc="when this flag is set (default), statistics will be collected "+
|
||||
"on mapped reads only, while unmapped reads will be discarded", required=false)
|
||||
protected boolean MAPPED_ONLY = true;
|
||||
@Argument(fullName="maxReadLength", shortName="rl", doc="maximum read length", required=false)
|
||||
protected int MAX_READ_LENGTH = 500;
|
||||
@Argument(fullName="out_prefix",shortName="p",doc="prefix for output report and statistics files",required=true)
|
||||
protected String PREFIX = null;
|
||||
// @Argument(fullName="html",shortName="html",doc="produce html-formatted output (starting with h3-level tags) rather than plain text",required=false)
|
||||
protected boolean HTML = false;
|
||||
@Argument(fullName="qualThreshold", shortName="Q",doc="flag as problematic all cycles with av. qualities below the threshold (applies only to the generated report)",required=false)
|
||||
protected double QTHRESHOLD = 10.0;
|
||||
@Argument(fullName="useBothQualities",shortName="bothQ",required=false,doc="Generate statistics both for currently set and for "+
|
||||
"original base qualities (OQ tag, must be present in the bam); two separate data files will be generated.")
|
||||
protected boolean ASSESS_BOTH_QUALS = false;
|
||||
|
||||
private Map<String,CycleStats[]> cyclesByLaneMap = null;
|
||||
private Map<String,CycleStats[]> cyclesByLibraryMap = null;
|
||||
private Map<String,CycleStats[]> cyclesByLaneMapOrig = null;
|
||||
private Map<String,CycleStats[]> cyclesByLibraryMapOrig = null;
|
||||
|
||||
public void initialize() {
|
||||
if ( PREFIX == null ) throw new ReviewedStingException("Prefix for output file(s) must be specified");
|
||||
cyclesByLaneMap = new HashMap<String,CycleStats[]>();
|
||||
cyclesByLibraryMap = new HashMap<String,CycleStats[]>();
|
||||
cyclesByLaneMapOrig = new HashMap<String,CycleStats[]>();
|
||||
cyclesByLibraryMapOrig = new HashMap<String,CycleStats[]>();
|
||||
}
|
||||
|
||||
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
|
||||
if ( AlignmentUtils.isReadUnmapped(read) ) return 0;
|
||||
|
||||
SAMReadGroupRecord rg = read.getReadGroup();
|
||||
|
||||
if ( rg == null ) throw new UserException.ReadMissingReadGroup(read);
|
||||
|
||||
String lane = read.getReadGroup().getPlatformUnit();
|
||||
String library = read.getReadGroup().getLibrary();
|
||||
|
||||
if ( lane == null ) throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has no platform unit information");
|
||||
if ( library == null ) throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has no library information");
|
||||
|
||||
int end = 0;
|
||||
|
||||
if ( read.getReadPairedFlag() ) {
|
||||
|
||||
if ( read.getFirstOfPairFlag() ) {
|
||||
if ( read.getSecondOfPairFlag() )
|
||||
throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes");
|
||||
end = 1;
|
||||
} else {
|
||||
if ( ! read.getSecondOfPairFlag() )
|
||||
throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes");
|
||||
end = 2;
|
||||
}
|
||||
}
|
||||
|
||||
CycleStats[] byLane = cyclesByLaneMap.get(lane);
|
||||
CycleStats[] byLib = cyclesByLibraryMap.get(library);
|
||||
|
||||
//byte [] quals = USE_ORIGINAL_QUALS ? AlignmentUtils.getOriginalQualsInCycleOrder(read) : AlignmentUtils.getQualsInCycleOrder(read);
|
||||
|
||||
byte [] quals = AlignmentUtils.getQualsInCycleOrder(read);
|
||||
|
||||
// if end == 0 (single end lane), we allocate array of length 1, otherwise we need two
|
||||
// elements in the array in order to be able to collect statistics for each end in the pair independently
|
||||
if ( byLane == null ) cyclesByLaneMap.put(lane,byLane = new CycleStats[(end==0?1:2)]);
|
||||
if ( byLib == null ) cyclesByLibraryMap.put(library, byLib =new CycleStats[2]);
|
||||
|
||||
if ( end != 0 ) end--; // we will now use 'end' as index into the array of stats
|
||||
|
||||
if ( byLane[end] == null ) byLane[end] = new CycleStats(MAX_READ_LENGTH);
|
||||
if ( byLib[end] == null ) byLib[end] =new CycleStats(MAX_READ_LENGTH);
|
||||
byLane[end].add(quals);
|
||||
byLib[end].add(quals);
|
||||
|
||||
return 1; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* Provide an initial value for reduce computations.
|
||||
*
|
||||
* @return Initial value of reduce.
|
||||
*/
|
||||
public Integer reduceInit() {
|
||||
return 0; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces a single map with the accumulator provided as the ReduceType.
|
||||
*
|
||||
* @param value result of the map.
|
||||
* @param sum accumulator for the reduce.
|
||||
* @return accumulator with result of the map taken into account.
|
||||
*/
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return sum.intValue()+value.intValue(); //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
if ( HTML ) {
|
||||
out.println("<h3>Cycle Quality QC</h3>\n");
|
||||
out.println("File(s) analyzed: <br>");
|
||||
for ( String fileName : getToolkit().getArguments().samFiles) out.println(fileName+"<br>");
|
||||
out.println("<br>");
|
||||
}
|
||||
if ( HTML ) out.println("<br><br>");
|
||||
out.println("\n"+result+" reads analyzed\n");
|
||||
if ( HTML ) out.println("<br><br>");
|
||||
out.println("by platform unit:");
|
||||
if ( HTML ) out.println("<br>");
|
||||
report2(cyclesByLaneMap, new File(PREFIX+".byLane.txt"),true);
|
||||
out.println();
|
||||
if ( HTML ) out.println("<br><br>");
|
||||
out.println("\nby library:");
|
||||
if ( HTML ) out.println("<br>");
|
||||
report2(cyclesByLibraryMap, new File(PREFIX+".byLibrary.txt"),true);
|
||||
out.println();
|
||||
if ( HTML ) out.println("<br><br>");
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void report2(Map<String,CycleStats[]> m, File f,boolean summaryReport) {
|
||||
long totalReads_1 =0;
|
||||
long totalReads_2 =0;
|
||||
long totalReads_unpaired = 0;
|
||||
SortedSet<String> columns = new TreeSet<String>();
|
||||
int maxLength = 0; // maximum read length across all lanes/read ends analyzed
|
||||
|
||||
for( Map.Entry<String,CycleStats[]> e : m.entrySet() ) {
|
||||
if ( e.getValue()[0].getMaxReadLength() > maxLength ) maxLength = e.getValue()[0].getMaxReadLength();
|
||||
|
||||
if ( e.getValue().length == 1 || e.getValue().length == 2 && e.getValue()[1] == null ) {
|
||||
totalReads_unpaired += e.getValue()[0].getReadCount(); // single end lane
|
||||
} else {
|
||||
totalReads_1 += e.getValue()[0].getReadCount();
|
||||
totalReads_2 += e.getValue()[1].getReadCount();
|
||||
if ( e.getValue()[1].getMaxReadLength() > maxLength ) maxLength = e.getValue()[1].getMaxReadLength();
|
||||
}
|
||||
|
||||
columns.add(e.getKey());
|
||||
}
|
||||
|
||||
if ( summaryReport ) {
|
||||
if ( totalReads_1 == 0 && totalReads_2 != 0) {
|
||||
out.println(" End 1: No reads");
|
||||
if ( HTML ) out.println("<br>");
|
||||
}
|
||||
if ( totalReads_2 == 0 && totalReads_1 != 0 ) {
|
||||
out.println(" End 2: No reads");
|
||||
if ( HTML ) out.println("<br>");
|
||||
}
|
||||
if ( totalReads_1 == 0 && totalReads_2 == 0 && totalReads_unpaired == 0 ) {
|
||||
out.println(" No reads found.");
|
||||
if ( HTML ) out.println("<br>");
|
||||
}
|
||||
}
|
||||
|
||||
if ( totalReads_1 == 0 && totalReads_2 == 0 && totalReads_unpaired == 0 ) return;
|
||||
|
||||
try {
|
||||
BufferedWriter w = new BufferedWriter(new FileWriter(f));
|
||||
|
||||
w.write("cycle");
|
||||
|
||||
for( String col : columns ) {
|
||||
CycleStats[] data = m.get(col);
|
||||
if ( summaryReport ) {
|
||||
out.print(" ");
|
||||
out.print(col);
|
||||
}
|
||||
|
||||
CycleStats end1 = data[0];
|
||||
int minL = ( end1 == null ? 0 : end1.getMinReadLength() );
|
||||
int maxL = ( end1 == null ? 0 : end1.getMaxReadLength() );
|
||||
|
||||
if ( data.length == 2 && data[1] != null ) {
|
||||
if ( summaryReport ) {
|
||||
out.println(": paired");
|
||||
if ( HTML ) out.println("<br>");
|
||||
out.println(" Reads analyzed:");
|
||||
if ( HTML ) out.println("<br>");
|
||||
}
|
||||
CycleStats end2 = data[1];
|
||||
|
||||
out.print( " End 1: "+ ( end1 == null ? 0 : end1.getReadCount()) );
|
||||
if ( minL == maxL ) out.println("; read length = "+minL);
|
||||
else out.println("; WARNING: variable read length = "+minL+"-"+maxL);
|
||||
if ( HTML ) out.println("<br>");
|
||||
|
||||
out.print( " End 2: "+ ( end2 == null ? 0 : end2.getReadCount()) );
|
||||
minL = ( end2 == null ? 0 : end2.getMinReadLength() );
|
||||
maxL = ( end2 == null ? 0 : end2.getMaxReadLength() );
|
||||
if ( minL == maxL ) out.println("; read length = "+minL);
|
||||
else out.println("; WARNING: variable read length = "+minL+"-"+maxL);
|
||||
if ( HTML ) out.println("<br>");
|
||||
}
|
||||
else {
|
||||
out.println(": unpaired");
|
||||
if ( HTML ) out.println("<br>");
|
||||
out.print( " Reads analyzed: "+ ( end1 == null ? 0 : end1.getReadCount()) );
|
||||
if ( minL == maxL ) out.println("; read length = "+minL);
|
||||
else out.println("; WARNING: variable read length = "+minL+"-"+maxL);
|
||||
if ( HTML ) out.println("<br>");
|
||||
}
|
||||
|
||||
w.write('\t') ;
|
||||
w.write(col);
|
||||
if ( data.length == 1 || data.length == 2 && data[1] == null ) {
|
||||
w.write(".unpaired");
|
||||
w.write('\t');
|
||||
w.write(col);
|
||||
w.write(".unpaired.stddev");
|
||||
} else {
|
||||
w.write(".end1");
|
||||
w.write('\t');
|
||||
w.write(col);
|
||||
w.write(".end1.stddev");
|
||||
w.write('\t') ;
|
||||
w.write(col);
|
||||
w.write(".end2");
|
||||
w.write('\t');
|
||||
w.write(col);
|
||||
w.write(".end2.stddev");
|
||||
}
|
||||
}
|
||||
|
||||
w.write('\n');
|
||||
|
||||
int cycle = 0;
|
||||
|
||||
Map<String,List<PrimitivePair.Int>> problems = new HashMap<String,List<PrimitivePair.Int>>();
|
||||
|
||||
while ( cycle < maxLength ) {
|
||||
w.write(Integer.toString(cycle+1));
|
||||
for ( String col : columns ) {
|
||||
|
||||
CycleStats[] data = m.get(col);
|
||||
CycleStats end1 = data[0];
|
||||
w.write('\t');
|
||||
if ( end1 == null || cycle >= end1.getMaxReadLength() ) w.write(".\t.");
|
||||
else {
|
||||
double aq = end1.getCycleQualAverage(cycle);
|
||||
w.write(String.format("%.4f\t%.4f",aq,end1.getCycleQualStdDev(cycle)));
|
||||
recordProblem(aq,cycle, problems,col+".End1");
|
||||
}
|
||||
if ( data.length > 1 && data[1] != null ) {
|
||||
w.write('\t');
|
||||
CycleStats end2 = data[1];
|
||||
if ( end2 == null || cycle >= end2.getMaxReadLength() ) w.write(".\t.");
|
||||
else {
|
||||
double aq = end2.getCycleQualAverage(cycle);
|
||||
w.write(String.format("%.4f\t%.4f",aq,end2.getCycleQualStdDev(cycle)));
|
||||
recordProblem(aq,cycle, problems,col+".End2");
|
||||
}
|
||||
}
|
||||
}
|
||||
w.write('\n');
|
||||
cycle++;
|
||||
}
|
||||
w.close();
|
||||
|
||||
if ( HTML ) out.println("<hr>");
|
||||
|
||||
if ( HTML ) out.println("<br>");
|
||||
out.println("\nOUTCOME (threshold at Q="+QTHRESHOLD+"):");
|
||||
if ( HTML ) out.println("<br>");
|
||||
for ( String col : columns ) {
|
||||
List<PrimitivePair.Int> lp = problems.get(col+".End1");
|
||||
out.print(" "+col+" End1:");
|
||||
if ( lp == null ) {
|
||||
out.print(" GOOD");
|
||||
} else {
|
||||
for ( PrimitivePair.Int p : lp ) {
|
||||
out.print(" "+(p.first+1)+"-");
|
||||
if ( p.second >= 0 ) out.print((p.second+1));
|
||||
else out.print("END");
|
||||
}
|
||||
}
|
||||
out.println();
|
||||
if ( HTML ) out.println("<br>");
|
||||
|
||||
lp = problems.get(col+".End2");
|
||||
out.print(" "+col+" End2:");
|
||||
if ( lp == null ) {
|
||||
out.print(" GOOD");
|
||||
} else {
|
||||
for ( PrimitivePair.Int p : lp ) {
|
||||
out.print(" "+(p.first+1)+"-");
|
||||
if ( p.second >= 0 ) out.print(p.second);
|
||||
else out.print("END");
|
||||
}
|
||||
}
|
||||
out.println();
|
||||
if ( HTML ) out.println("<br>");
|
||||
}
|
||||
|
||||
} catch (IOException ioe) {
|
||||
throw new UserException.CouldNotCreateOutputFile(f, "Failed to write report", ioe);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void recordProblem(double q, int cycle, Map<String,List<PrimitivePair.Int>> problems, String name) {
|
||||
|
||||
PrimitivePair.Int p = null;
|
||||
List<PrimitivePair.Int> lp = null;
|
||||
if ( q < QTHRESHOLD ) { // there is a problem
|
||||
if ( ! problems.containsKey(name) ) {
|
||||
lp = new ArrayList<PrimitivePair.Int>();
|
||||
p = new PrimitivePair.Int(cycle,-1);
|
||||
lp.add(p);
|
||||
problems.put(name,lp);
|
||||
} else {
|
||||
lp = problems.get(name);
|
||||
p = lp.get(lp.size()-1);
|
||||
}
|
||||
if ( p.second != -1 ) { // if we are not already inside a run of bad qual bases
|
||||
lp.add(new PrimitivePair.Int(cycle,-1)); // start new run
|
||||
}
|
||||
} else { // good base
|
||||
if ( problems.containsKey(name) ) { // only if we had problem intervals at all, we need to check if the last one needs to be closed
|
||||
lp = problems.get(name);
|
||||
p = lp.get(lp.size()-1);
|
||||
if ( p.second == -1 ) p.second = cycle - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static class CycleStats {
|
||||
private long readCount = 0;
|
||||
private double[] cycleQualsAv = null;
|
||||
private double[] cycleQualsSd = null;
|
||||
private int minL = 1000000000; // read min. length
|
||||
private int maxL = 0; // read max. length
|
||||
|
||||
public CycleStats(int N) {
|
||||
readCount = 0;
|
||||
cycleQualsAv = new double[N];
|
||||
cycleQualsSd = new double[N];
|
||||
}
|
||||
|
||||
public void add(byte[] quals) {
|
||||
if ( quals.length > cycleQualsAv.length )
|
||||
throw new UserException("A read of length "+quals.length+" encountered, which exceeds specified maximum read length");
|
||||
if ( quals.length > maxL ) maxL = quals.length;
|
||||
if ( quals.length < minL ) minL = quals.length;
|
||||
readCount++;
|
||||
for ( int i = 0 ; i < quals.length ; i++ ) {
|
||||
// NOTE: in the update equaltions below, there is no need to check if readCount == 1 (i.e.
|
||||
// we are initializing with the very first record) or not. Indeed, the arrays are initialized with
|
||||
// 0; when the very first value arrives, readCount is 1 and cycleQuals[i] gets set to quals[i] (correct!);
|
||||
// this will also make the second term in the update equation for Sd (quals[i]-cycleQualsAv[i]) equal
|
||||
// to 0, so Sd will be initially set to 0.
|
||||
double oldAvg = cycleQualsAv[i]; // save old mean, will need it for calculation of the variance
|
||||
cycleQualsAv[i] += ( quals[i] - cycleQualsAv[i] ) / readCount; // update mean
|
||||
cycleQualsSd[i] += ( quals[i] - oldAvg ) * ( quals[i] - cycleQualsAv[i] );
|
||||
}
|
||||
}
|
||||
|
||||
public long getReadCount() { return readCount; }
|
||||
public int getMaxReadLength() { return maxL; }
|
||||
public int getMinReadLength() { return minL; }
|
||||
// long [] getCycleQualSums() { return cycleQuals; }
|
||||
// long getCycleQualSum(int i) { return cycleQuals[i]; }
|
||||
double getCycleQualAverage(int i) { return cycleQualsAv[i]; }
|
||||
double getCycleQualStdDev(int i) { return Math.sqrt( cycleQualsSd[i]/(readCount-1) ); }
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.utils.collections.PrimitivePair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Apr 9, 2010
|
||||
* Time: 12:16:41 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Walks over the input data set, calculating the number of reads seen for diagnostic purposes.
|
||||
* Can also count the number of reads matching a given criterion using read filters (see the
|
||||
* --read-filter command line argument). Simplest example of a read-backed analysis.
|
||||
*/
|
||||
@Requires({DataSource.READS})
|
||||
public class CycleQualityWalker extends ReadWalker<Integer,Integer> {
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
||||
@Argument(fullName="mappedOnly", shortName="mo", doc="when this flag is set (default), statistics will be collected "+
|
||||
"on mapped reads only, while unmapped reads will be discarded", required=false)
|
||||
protected boolean MAPPED_ONLY = true;
|
||||
@Argument(fullName="maxReadLength", shortName="rl", doc="maximum read length", required=false)
|
||||
protected int MAX_READ_LENGTH = 500;
|
||||
@Argument(fullName="out_prefix",shortName="p",doc="prefix for output report and statistics files",required=true)
|
||||
protected String PREFIX = null;
|
||||
// @Argument(fullName="html",shortName="html",doc="produce html-formatted output (starting with h3-level tags) rather than plain text",required=false)
|
||||
protected boolean HTML = false;
|
||||
@Argument(fullName="qualThreshold", shortName="Q",doc="flag as problematic all cycles with av. qualities below the threshold (applies only to the generated report)",required=false)
|
||||
protected double QTHRESHOLD = 10.0;
|
||||
@Argument(fullName="useBothQualities",shortName="bothQ",required=false,doc="Generate statistics both for currently set and for "+
|
||||
"original base qualities (OQ tag, must be present in the bam); two separate data files will be generated.")
|
||||
protected boolean ASSESS_BOTH_QUALS = false;
|
||||
|
||||
private Map<String,CycleStats[]> cyclesByLaneMap = null;
|
||||
private Map<String,CycleStats[]> cyclesByLibraryMap = null;
|
||||
private Map<String,CycleStats[]> cyclesByLaneMapOrig = null;
|
||||
private Map<String,CycleStats[]> cyclesByLibraryMapOrig = null;
|
||||
|
||||
public void initialize() {
|
||||
if ( PREFIX == null ) throw new ReviewedStingException("Prefix for output file(s) must be specified");
|
||||
cyclesByLaneMap = new HashMap<String,CycleStats[]>();
|
||||
cyclesByLibraryMap = new HashMap<String,CycleStats[]>();
|
||||
cyclesByLaneMapOrig = new HashMap<String,CycleStats[]>();
|
||||
cyclesByLibraryMapOrig = new HashMap<String,CycleStats[]>();
|
||||
}
|
||||
|
||||
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
|
||||
if ( AlignmentUtils.isReadUnmapped(read) ) return 0;
|
||||
|
||||
SAMReadGroupRecord rg = read.getReadGroup();
|
||||
|
||||
if ( rg == null ) throw new UserException.ReadMissingReadGroup(read);
|
||||
|
||||
String lane = read.getReadGroup().getPlatformUnit();
|
||||
String library = read.getReadGroup().getLibrary();
|
||||
|
||||
if ( lane == null ) throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has no platform unit information");
|
||||
if ( library == null ) throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has no library information");
|
||||
|
||||
int end = 0;
|
||||
|
||||
if ( read.getReadPairedFlag() ) {
|
||||
|
||||
if ( read.getFirstOfPairFlag() ) {
|
||||
if ( read.getSecondOfPairFlag() )
|
||||
throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes");
|
||||
end = 1;
|
||||
} else {
|
||||
if ( ! read.getSecondOfPairFlag() )
|
||||
throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes");
|
||||
end = 2;
|
||||
}
|
||||
}
|
||||
|
||||
CycleStats[] byLane = cyclesByLaneMap.get(lane);
|
||||
CycleStats[] byLib = cyclesByLibraryMap.get(library);
|
||||
|
||||
//byte [] quals = USE_ORIGINAL_QUALS ? AlignmentUtils.getOriginalQualsInCycleOrder(read) : AlignmentUtils.getQualsInCycleOrder(read);
|
||||
|
||||
byte [] quals = AlignmentUtils.getQualsInCycleOrder(read);
|
||||
|
||||
// if end == 0 (single end lane), we allocate array of length 1, otherwise we need two
|
||||
// elements in the array in order to be able to collect statistics for each end in the pair independently
|
||||
if ( byLane == null ) cyclesByLaneMap.put(lane,byLane = new CycleStats[(end==0?1:2)]);
|
||||
if ( byLib == null ) cyclesByLibraryMap.put(library, byLib =new CycleStats[2]);
|
||||
|
||||
if ( end != 0 ) end--; // we will now use 'end' as index into the array of stats
|
||||
|
||||
if ( byLane[end] == null ) byLane[end] = new CycleStats(MAX_READ_LENGTH);
|
||||
if ( byLib[end] == null ) byLib[end] =new CycleStats(MAX_READ_LENGTH);
|
||||
byLane[end].add(quals);
|
||||
byLib[end].add(quals);
|
||||
|
||||
return 1; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* Provide an initial value for reduce computations.
|
||||
*
|
||||
* @return Initial value of reduce.
|
||||
*/
|
||||
public Integer reduceInit() {
|
||||
return 0; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces a single map with the accumulator provided as the ReduceType.
|
||||
*
|
||||
* @param value result of the map.
|
||||
* @param sum accumulator for the reduce.
|
||||
* @return accumulator with result of the map taken into account.
|
||||
*/
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return sum.intValue()+value.intValue(); //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
if ( HTML ) {
|
||||
out.println("<h3>Cycle Quality QC</h3>\n");
|
||||
out.println("File(s) analyzed: <br>");
|
||||
for ( String fileName : getToolkit().getArguments().samFiles) out.println(fileName+"<br>");
|
||||
out.println("<br>");
|
||||
}
|
||||
if ( HTML ) out.println("<br><br>");
|
||||
out.println("\n"+result+" reads analyzed\n");
|
||||
if ( HTML ) out.println("<br><br>");
|
||||
out.println("by platform unit:");
|
||||
if ( HTML ) out.println("<br>");
|
||||
report2(cyclesByLaneMap, new File(PREFIX+".byLane.txt"),true);
|
||||
out.println();
|
||||
if ( HTML ) out.println("<br><br>");
|
||||
out.println("\nby library:");
|
||||
if ( HTML ) out.println("<br>");
|
||||
report2(cyclesByLibraryMap, new File(PREFIX+".byLibrary.txt"),true);
|
||||
out.println();
|
||||
if ( HTML ) out.println("<br><br>");
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void report2(Map<String,CycleStats[]> m, File f,boolean summaryReport) {
|
||||
long totalReads_1 =0;
|
||||
long totalReads_2 =0;
|
||||
long totalReads_unpaired = 0;
|
||||
SortedSet<String> columns = new TreeSet<String>();
|
||||
int maxLength = 0; // maximum read length across all lanes/read ends analyzed
|
||||
|
||||
for( Map.Entry<String,CycleStats[]> e : m.entrySet() ) {
|
||||
if ( e.getValue()[0].getMaxReadLength() > maxLength ) maxLength = e.getValue()[0].getMaxReadLength();
|
||||
|
||||
if ( e.getValue().length == 1 || e.getValue().length == 2 && e.getValue()[1] == null ) {
|
||||
totalReads_unpaired += e.getValue()[0].getReadCount(); // single end lane
|
||||
} else {
|
||||
totalReads_1 += e.getValue()[0].getReadCount();
|
||||
totalReads_2 += e.getValue()[1].getReadCount();
|
||||
if ( e.getValue()[1].getMaxReadLength() > maxLength ) maxLength = e.getValue()[1].getMaxReadLength();
|
||||
}
|
||||
|
||||
columns.add(e.getKey());
|
||||
}
|
||||
|
||||
if ( summaryReport ) {
|
||||
if ( totalReads_1 == 0 && totalReads_2 != 0) {
|
||||
out.println(" End 1: No reads");
|
||||
if ( HTML ) out.println("<br>");
|
||||
}
|
||||
if ( totalReads_2 == 0 && totalReads_1 != 0 ) {
|
||||
out.println(" End 2: No reads");
|
||||
if ( HTML ) out.println("<br>");
|
||||
}
|
||||
if ( totalReads_1 == 0 && totalReads_2 == 0 && totalReads_unpaired == 0 ) {
|
||||
out.println(" No reads found.");
|
||||
if ( HTML ) out.println("<br>");
|
||||
}
|
||||
}
|
||||
|
||||
if ( totalReads_1 == 0 && totalReads_2 == 0 && totalReads_unpaired == 0 ) return;
|
||||
|
||||
try {
|
||||
BufferedWriter w = new BufferedWriter(new FileWriter(f));
|
||||
|
||||
w.write("cycle");
|
||||
|
||||
for( String col : columns ) {
|
||||
CycleStats[] data = m.get(col);
|
||||
if ( summaryReport ) {
|
||||
out.print(" ");
|
||||
out.print(col);
|
||||
}
|
||||
|
||||
CycleStats end1 = data[0];
|
||||
int minL = ( end1 == null ? 0 : end1.getMinReadLength() );
|
||||
int maxL = ( end1 == null ? 0 : end1.getMaxReadLength() );
|
||||
|
||||
if ( data.length == 2 && data[1] != null ) {
|
||||
if ( summaryReport ) {
|
||||
out.println(": paired");
|
||||
if ( HTML ) out.println("<br>");
|
||||
out.println(" Reads analyzed:");
|
||||
if ( HTML ) out.println("<br>");
|
||||
}
|
||||
CycleStats end2 = data[1];
|
||||
|
||||
out.print( " End 1: "+ ( end1 == null ? 0 : end1.getReadCount()) );
|
||||
if ( minL == maxL ) out.println("; read length = "+minL);
|
||||
else out.println("; WARNING: variable read length = "+minL+"-"+maxL);
|
||||
if ( HTML ) out.println("<br>");
|
||||
|
||||
out.print( " End 2: "+ ( end2 == null ? 0 : end2.getReadCount()) );
|
||||
minL = ( end2 == null ? 0 : end2.getMinReadLength() );
|
||||
maxL = ( end2 == null ? 0 : end2.getMaxReadLength() );
|
||||
if ( minL == maxL ) out.println("; read length = "+minL);
|
||||
else out.println("; WARNING: variable read length = "+minL+"-"+maxL);
|
||||
if ( HTML ) out.println("<br>");
|
||||
}
|
||||
else {
|
||||
out.println(": unpaired");
|
||||
if ( HTML ) out.println("<br>");
|
||||
out.print( " Reads analyzed: "+ ( end1 == null ? 0 : end1.getReadCount()) );
|
||||
if ( minL == maxL ) out.println("; read length = "+minL);
|
||||
else out.println("; WARNING: variable read length = "+minL+"-"+maxL);
|
||||
if ( HTML ) out.println("<br>");
|
||||
}
|
||||
|
||||
w.write('\t') ;
|
||||
w.write(col);
|
||||
if ( data.length == 1 || data.length == 2 && data[1] == null ) {
|
||||
w.write(".unpaired");
|
||||
w.write('\t');
|
||||
w.write(col);
|
||||
w.write(".unpaired.stddev");
|
||||
} else {
|
||||
w.write(".end1");
|
||||
w.write('\t');
|
||||
w.write(col);
|
||||
w.write(".end1.stddev");
|
||||
w.write('\t') ;
|
||||
w.write(col);
|
||||
w.write(".end2");
|
||||
w.write('\t');
|
||||
w.write(col);
|
||||
w.write(".end2.stddev");
|
||||
}
|
||||
}
|
||||
|
||||
w.write('\n');
|
||||
|
||||
int cycle = 0;
|
||||
|
||||
Map<String,List<PrimitivePair.Int>> problems = new HashMap<String,List<PrimitivePair.Int>>();
|
||||
|
||||
while ( cycle < maxLength ) {
|
||||
w.write(Integer.toString(cycle+1));
|
||||
for ( String col : columns ) {
|
||||
|
||||
CycleStats[] data = m.get(col);
|
||||
CycleStats end1 = data[0];
|
||||
w.write('\t');
|
||||
if ( end1 == null || cycle >= end1.getMaxReadLength() ) w.write(".\t.");
|
||||
else {
|
||||
double aq = end1.getCycleQualAverage(cycle);
|
||||
w.write(String.format("%.4f\t%.4f",aq,end1.getCycleQualStdDev(cycle)));
|
||||
recordProblem(aq,cycle, problems,col+".End1");
|
||||
}
|
||||
if ( data.length > 1 && data[1] != null ) {
|
||||
w.write('\t');
|
||||
CycleStats end2 = data[1];
|
||||
if ( end2 == null || cycle >= end2.getMaxReadLength() ) w.write(".\t.");
|
||||
else {
|
||||
double aq = end2.getCycleQualAverage(cycle);
|
||||
w.write(String.format("%.4f\t%.4f",aq,end2.getCycleQualStdDev(cycle)));
|
||||
recordProblem(aq,cycle, problems,col+".End2");
|
||||
}
|
||||
}
|
||||
}
|
||||
w.write('\n');
|
||||
cycle++;
|
||||
}
|
||||
w.close();
|
||||
|
||||
if ( HTML ) out.println("<hr>");
|
||||
|
||||
if ( HTML ) out.println("<br>");
|
||||
out.println("\nOUTCOME (threshold at Q="+QTHRESHOLD+"):");
|
||||
if ( HTML ) out.println("<br>");
|
||||
for ( String col : columns ) {
|
||||
List<PrimitivePair.Int> lp = problems.get(col+".End1");
|
||||
out.print(" "+col+" End1:");
|
||||
if ( lp == null ) {
|
||||
out.print(" GOOD");
|
||||
} else {
|
||||
for ( PrimitivePair.Int p : lp ) {
|
||||
out.print(" "+(p.first+1)+"-");
|
||||
if ( p.second >= 0 ) out.print((p.second+1));
|
||||
else out.print("END");
|
||||
}
|
||||
}
|
||||
out.println();
|
||||
if ( HTML ) out.println("<br>");
|
||||
|
||||
lp = problems.get(col+".End2");
|
||||
out.print(" "+col+" End2:");
|
||||
if ( lp == null ) {
|
||||
out.print(" GOOD");
|
||||
} else {
|
||||
for ( PrimitivePair.Int p : lp ) {
|
||||
out.print(" "+(p.first+1)+"-");
|
||||
if ( p.second >= 0 ) out.print(p.second);
|
||||
else out.print("END");
|
||||
}
|
||||
}
|
||||
out.println();
|
||||
if ( HTML ) out.println("<br>");
|
||||
}
|
||||
|
||||
} catch (IOException ioe) {
|
||||
throw new UserException.CouldNotCreateOutputFile(f, "Failed to write report", ioe);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void recordProblem(double q, int cycle, Map<String,List<PrimitivePair.Int>> problems, String name) {
|
||||
|
||||
PrimitivePair.Int p = null;
|
||||
List<PrimitivePair.Int> lp = null;
|
||||
if ( q < QTHRESHOLD ) { // there is a problem
|
||||
if ( ! problems.containsKey(name) ) {
|
||||
lp = new ArrayList<PrimitivePair.Int>();
|
||||
p = new PrimitivePair.Int(cycle,-1);
|
||||
lp.add(p);
|
||||
problems.put(name,lp);
|
||||
} else {
|
||||
lp = problems.get(name);
|
||||
p = lp.get(lp.size()-1);
|
||||
}
|
||||
if ( p.second != -1 ) { // if we are not already inside a run of bad qual bases
|
||||
lp.add(new PrimitivePair.Int(cycle,-1)); // start new run
|
||||
}
|
||||
} else { // good base
|
||||
if ( problems.containsKey(name) ) { // only if we had problem intervals at all, we need to check if the last one needs to be closed
|
||||
lp = problems.get(name);
|
||||
p = lp.get(lp.size()-1);
|
||||
if ( p.second == -1 ) p.second = cycle - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static class CycleStats {
|
||||
private long readCount = 0;
|
||||
private double[] cycleQualsAv = null;
|
||||
private double[] cycleQualsSd = null;
|
||||
private int minL = 1000000000; // read min. length
|
||||
private int maxL = 0; // read max. length
|
||||
|
||||
public CycleStats(int N) {
|
||||
readCount = 0;
|
||||
cycleQualsAv = new double[N];
|
||||
cycleQualsSd = new double[N];
|
||||
}
|
||||
|
||||
public void add(byte[] quals) {
|
||||
if ( quals.length > cycleQualsAv.length )
|
||||
throw new UserException("A read of length "+quals.length+" encountered, which exceeds specified maximum read length");
|
||||
if ( quals.length > maxL ) maxL = quals.length;
|
||||
if ( quals.length < minL ) minL = quals.length;
|
||||
readCount++;
|
||||
for ( int i = 0 ; i < quals.length ; i++ ) {
|
||||
// NOTE: in the update equaltions below, there is no need to check if readCount == 1 (i.e.
|
||||
// we are initializing with the very first record) or not. Indeed, the arrays are initialized with
|
||||
// 0; when the very first value arrives, readCount is 1 and cycleQuals[i] gets set to quals[i] (correct!);
|
||||
// this will also make the second term in the update equation for Sd (quals[i]-cycleQualsAv[i]) equal
|
||||
// to 0, so Sd will be initially set to 0.
|
||||
double oldAvg = cycleQualsAv[i]; // save old mean, will need it for calculation of the variance
|
||||
cycleQualsAv[i] += ( quals[i] - cycleQualsAv[i] ) / readCount; // update mean
|
||||
cycleQualsSd[i] += ( quals[i] - oldAvg ) * ( quals[i] - cycleQualsAv[i] );
|
||||
}
|
||||
}
|
||||
|
||||
public long getReadCount() { return readCount; }
|
||||
public int getMaxReadLength() { return maxL; }
|
||||
public int getMinReadLength() { return minL; }
|
||||
// long [] getCycleQualSums() { return cycleQuals; }
|
||||
// long getCycleQualSum(int i) { return cycleQuals[i]; }
|
||||
double getCycleQualAverage(int i) { return cycleQualsAv[i]; }
|
||||
double getCycleQualStdDev(int i) { return Math.sqrt( cycleQualsSd[i]/(readCount-1) ); }
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.Arrays;
|
||||
|
|
@ -40,7 +40,7 @@ public class PrintLocusContextWalker extends LocusWalker<AlignmentContext, Integ
|
|||
return lhs + rhs;
|
||||
}
|
||||
|
||||
private String[] getReadNames( List<SAMRecord> reads ) {
|
||||
private String[] getReadNames( List<GATKSAMRecord> reads ) {
|
||||
String[] readNames = new String[ reads.size() ];
|
||||
for( int i = 0; i < reads.size(); i++ ) {
|
||||
readNames[i] = String.format("%nname = %s, start = %d, end = %d", reads.get(i).getReadName(), reads.get(i).getAlignmentStart(), reads.get(i).getAlignmentEnd());
|
||||
|
|
|
|||
|
|
@ -1,142 +1,142 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* * OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* User: depristo
|
||||
* Date: May 5, 2010
|
||||
* Time: 12:16:41 PM
|
||||
*/
|
||||
|
||||
/**
|
||||
* Walks over the input reads, printing out statistics about the read length, number of clipping events, and length
|
||||
* of the clipping to the output stream.
|
||||
*/
|
||||
@Requires({DataSource.READS})
|
||||
public class ReadClippingStatsWalker extends ReadWalker<ReadClippingStatsWalker.ReadClippingInfo,Integer> {
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
||||
@Argument(fullName="mappedOnly", shortName="mo", doc="when this flag is set (default), statistics will be collected "+
|
||||
"on mapped reads only, while unmapped reads will be discarded", required=false)
|
||||
protected boolean MAPPED_ONLY = true;
|
||||
|
||||
@Argument(fullName="skip", shortName="skip", doc="When provided, only every skip reads are analyzed", required=false)
|
||||
protected int SKIP = 1;
|
||||
|
||||
// public void initialize() {
|
||||
//
|
||||
// }
|
||||
|
||||
public class ReadClippingInfo {
|
||||
SAMReadGroupRecord rg;
|
||||
int readLength, nClippingEvents, nClippedBases;
|
||||
}
|
||||
|
||||
public ReadClippingInfo map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
if ( AlignmentUtils.isReadUnmapped(read) && MAPPED_ONLY)
|
||||
return null;
|
||||
|
||||
ReadClippingInfo info = new ReadClippingInfo();
|
||||
info.rg = read.getReadGroup();
|
||||
|
||||
if ( info.rg == null ) throw new UserException.ReadMissingReadGroup(read);
|
||||
|
||||
for ( CigarElement elt : read.getCigar().getCigarElements() ) {
|
||||
if ( elt.getOperator() != CigarOperator.N )
|
||||
|
||||
switch ( elt.getOperator()) {
|
||||
case H : // ignore hard clips
|
||||
case S : // soft clip
|
||||
info.nClippingEvents++;
|
||||
info.nClippedBases += elt.getLength();
|
||||
// note the fall through here
|
||||
case M :
|
||||
case D : // deletion w.r.t. the reference
|
||||
case P : // ignore pads
|
||||
case I : // insertion w.r.t. the reference
|
||||
info.readLength += elt.getLength(); // Unless we have a reference skip, the read gets longer
|
||||
break;
|
||||
case N : // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
||||
break;
|
||||
default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + elt.getOperator());
|
||||
}
|
||||
}
|
||||
|
||||
return info; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* Provide an initial value for reduce computations.
|
||||
*
|
||||
* @return Initial value of reduce.
|
||||
*/
|
||||
public Integer reduceInit() {
|
||||
out.println(Utils.join(" \t", Arrays.asList("ReadGroup", "ReadLength", "NClippingEvents", "NClippedBases", "PercentClipped")));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces a single map with the accumulator provided as the ReduceType.
|
||||
*
|
||||
* @param info result of the map.
|
||||
* @param sum accumulator for the reduce.
|
||||
* @return accumulator with result of the map taken into account.
|
||||
*/
|
||||
public Integer reduce(ReadClippingInfo info, Integer sum) {
|
||||
if ( info != null ) {
|
||||
if ( sum % SKIP == 0 ) {
|
||||
String id = info.rg.getReadGroupId();
|
||||
out.printf("%s\t %d\t %d\t %d\t %.2f%n",
|
||||
id, info.readLength, info.nClippingEvents, info.nClippedBases,
|
||||
100.0 * MathUtils.ratio(info.nClippedBases, info.readLength));
|
||||
}
|
||||
return sum + 1; //To change body of implemented methods use File | Settings | File Templates.
|
||||
} else {
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
|
||||
}
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* * OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* User: depristo
|
||||
* Date: May 5, 2010
|
||||
* Time: 12:16:41 PM
|
||||
*/
|
||||
|
||||
/**
|
||||
* Walks over the input reads, printing out statistics about the read length, number of clipping events, and length
|
||||
* of the clipping to the output stream.
|
||||
*/
|
||||
@Requires({DataSource.READS})
|
||||
public class ReadClippingStatsWalker extends ReadWalker<ReadClippingStatsWalker.ReadClippingInfo,Integer> {
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
||||
@Argument(fullName="mappedOnly", shortName="mo", doc="when this flag is set (default), statistics will be collected "+
|
||||
"on mapped reads only, while unmapped reads will be discarded", required=false)
|
||||
protected boolean MAPPED_ONLY = true;
|
||||
|
||||
@Argument(fullName="skip", shortName="skip", doc="When provided, only every skip reads are analyzed", required=false)
|
||||
protected int SKIP = 1;
|
||||
|
||||
// public void initialize() {
|
||||
//
|
||||
// }
|
||||
|
||||
public class ReadClippingInfo {
|
||||
SAMReadGroupRecord rg;
|
||||
int readLength, nClippingEvents, nClippedBases;
|
||||
}
|
||||
|
||||
public ReadClippingInfo map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
if ( AlignmentUtils.isReadUnmapped(read) && MAPPED_ONLY)
|
||||
return null;
|
||||
|
||||
ReadClippingInfo info = new ReadClippingInfo();
|
||||
info.rg = read.getReadGroup();
|
||||
|
||||
if ( info.rg == null ) throw new UserException.ReadMissingReadGroup(read);
|
||||
|
||||
for ( CigarElement elt : read.getCigar().getCigarElements() ) {
|
||||
if ( elt.getOperator() != CigarOperator.N )
|
||||
|
||||
switch ( elt.getOperator()) {
|
||||
case H : // ignore hard clips
|
||||
case S : // soft clip
|
||||
info.nClippingEvents++;
|
||||
info.nClippedBases += elt.getLength();
|
||||
// note the fall through here
|
||||
case M :
|
||||
case D : // deletion w.r.t. the reference
|
||||
case P : // ignore pads
|
||||
case I : // insertion w.r.t. the reference
|
||||
info.readLength += elt.getLength(); // Unless we have a reference skip, the read gets longer
|
||||
break;
|
||||
case N : // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
||||
break;
|
||||
default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + elt.getOperator());
|
||||
}
|
||||
}
|
||||
|
||||
return info; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* Provide an initial value for reduce computations.
|
||||
*
|
||||
* @return Initial value of reduce.
|
||||
*/
|
||||
public Integer reduceInit() {
|
||||
out.println(Utils.join(" \t", Arrays.asList("ReadGroup", "ReadLength", "NClippingEvents", "NClippedBases", "PercentClipped")));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces a single map with the accumulator provided as the ReduceType.
|
||||
*
|
||||
* @param info result of the map.
|
||||
* @param sum accumulator for the reduce.
|
||||
* @return accumulator with result of the map taken into account.
|
||||
*/
|
||||
public Integer reduce(ReadClippingInfo info, Integer sum) {
|
||||
if ( info != null ) {
|
||||
if ( sum % SKIP == 0 ) {
|
||||
String id = info.rg.getReadGroupId();
|
||||
out.printf("%s\t %d\t %d\t %d\t %.2f%n",
|
||||
id, info.readLength, info.nClippingEvents, info.nClippedBases,
|
||||
100.0 * MathUtils.ratio(info.nClippedBases, info.readLength));
|
||||
}
|
||||
return sum + 1; //To change body of implemented methods use File | Settings | File Templates.
|
||||
} else {
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
|
|
@ -64,21 +65,23 @@ public class ReadValidationWalker extends ReadWalker<SAMRecord, SAMRecord> {
|
|||
|
||||
/**
|
||||
* The reads filter function.
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a SAMRecord
|
||||
* @return true if the read passes the filter, false if it doesn't
|
||||
*/
|
||||
public boolean filter(ReferenceContext ref, SAMRecord read) {
|
||||
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* The reads map function.
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a SAMRecord
|
||||
* @return the read itself
|
||||
*/
|
||||
public SAMRecord map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
return read;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -364,11 +364,12 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
/**
|
||||
* For each base in the read calculate a new recalibrated quality score and replace the quality scores in the read
|
||||
*
|
||||
* @param refBases References bases over the length of the read
|
||||
* @param read The read to be recalibrated
|
||||
* @return The read with quality scores replaced
|
||||
*/
|
||||
public SAMRecord map( ReferenceContext refBases, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
public SAMRecord map( ReferenceContext refBases, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
|
||||
if( read.getReadLength() == 0 ) { // Some reads have '*' as the SEQ field and samtools returns length zero. We don't touch these reads.
|
||||
return read;
|
||||
|
|
|
|||
|
|
@ -4,9 +4,9 @@ import com.google.java.contract.Requires;
|
|||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.Stack;
|
||||
|
|
@ -39,14 +39,14 @@ public class ClippingOp {
|
|||
* @param algorithm
|
||||
* @param read
|
||||
*/
|
||||
public SAMRecord apply(ClippingRepresentation algorithm, SAMRecord read) {
|
||||
public GATKSAMRecord apply(ClippingRepresentation algorithm, GATKSAMRecord read) {
|
||||
byte[] quals = read.getBaseQualities();
|
||||
byte[] bases = read.getReadBases();
|
||||
|
||||
switch (algorithm) {
|
||||
// important note:
|
||||
// it's not safe to call read.getReadBases()[i] = 'N' or read.getBaseQualities()[i] = 0
|
||||
// because you're not guaranteed to get a pointer to the actual array of bytes in the SAMRecord
|
||||
// because you're not guaranteed to get a pointer to the actual array of bytes in the GATKSAMRecord
|
||||
case WRITE_NS:
|
||||
for (int i = start; i <= stop; i++)
|
||||
bases[i] = 'N';
|
||||
|
|
@ -248,9 +248,9 @@ public class ClippingOp {
|
|||
}
|
||||
|
||||
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1", "!read.getReadUnmappedFlag()"})
|
||||
private SAMRecord hardClip (SAMRecord read, int start, int stop) {
|
||||
private GATKSAMRecord hardClip (GATKSAMRecord read, int start, int stop) {
|
||||
if (start == 0 && stop == read.getReadLength() - 1)
|
||||
return new SAMRecord(read.getHeader());
|
||||
return new GATKSAMRecord(read.getHeader());
|
||||
|
||||
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
|
||||
CigarShift cigarShift = (read.getReadUnmappedFlag()) ? new CigarShift(new Cigar(), 0, 0) : hardClipCigar(read.getCigar(), start, stop);
|
||||
|
|
@ -265,9 +265,9 @@ public class ClippingOp {
|
|||
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
|
||||
System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength);
|
||||
|
||||
SAMRecord hardClippedRead;
|
||||
GATKSAMRecord hardClippedRead;
|
||||
try {
|
||||
hardClippedRead = (SAMRecord) read.clone();
|
||||
hardClippedRead = (GATKSAMRecord) read.clone();
|
||||
} catch (CloneNotSupportedException e) {
|
||||
throw new ReviewedStingException("Where did the clone go?");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@ package org.broadinstitute.sting.utils.clipreads;
|
|||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -14,7 +14,7 @@ import java.util.List;
|
|||
* A simple collection of the clipping operations to apply to a read along with its read
|
||||
*/
|
||||
public class ReadClipper {
|
||||
SAMRecord read;
|
||||
GATKSAMRecord read;
|
||||
boolean wasClipped;
|
||||
List<ClippingOp> ops = null;
|
||||
|
||||
|
|
@ -23,7 +23,7 @@ public class ReadClipper {
|
|||
*
|
||||
* @param read
|
||||
*/
|
||||
public ReadClipper(final SAMRecord read) {
|
||||
public ReadClipper(final GATKSAMRecord read) {
|
||||
this.read = read;
|
||||
this.wasClipped = false;
|
||||
}
|
||||
|
|
@ -46,19 +46,19 @@ public class ReadClipper {
|
|||
return wasClipped;
|
||||
}
|
||||
|
||||
public SAMRecord getRead() {
|
||||
public GATKSAMRecord getRead() {
|
||||
return read;
|
||||
}
|
||||
|
||||
public SAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) {
|
||||
public GATKSAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) {
|
||||
return hardClipByReferenceCoordinates(-1, refStop);
|
||||
}
|
||||
|
||||
public SAMRecord hardClipByReferenceCoordinatesRightTail(int refStart) {
|
||||
public GATKSAMRecord hardClipByReferenceCoordinatesRightTail(int refStart) {
|
||||
return hardClipByReferenceCoordinates(refStart, -1);
|
||||
}
|
||||
|
||||
private int numDeletions(SAMRecord read) {
|
||||
private int numDeletions(GATKSAMRecord read) {
|
||||
int result = 0;
|
||||
for (CigarElement e: read.getCigar().getCigarElements()) {
|
||||
if ( e.getOperator() == CigarOperator.DELETION || e.getOperator() == CigarOperator.D )
|
||||
|
|
@ -67,7 +67,7 @@ public class ReadClipper {
|
|||
return result;
|
||||
}
|
||||
|
||||
protected SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||
protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
|
||||
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||
|
||||
|
|
@ -78,32 +78,32 @@ public class ReadClipper {
|
|||
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
|
||||
|
||||
this.addOp(new ClippingOp(start, stop));
|
||||
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
this.ops = null;
|
||||
return clippedRead;
|
||||
}
|
||||
|
||||
public SAMRecord hardClipByReadCoordinates(int start, int stop) {
|
||||
public GATKSAMRecord hardClipByReadCoordinates(int start, int stop) {
|
||||
this.addOp(new ClippingOp(start, stop));
|
||||
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
}
|
||||
|
||||
@Requires("left <= right")
|
||||
public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
|
||||
public GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
|
||||
if (left == right)
|
||||
return new SAMRecord(read.getHeader());
|
||||
SAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1);
|
||||
return new GATKSAMRecord(read.getHeader());
|
||||
GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1);
|
||||
|
||||
// after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions
|
||||
// make the left cut index no longer part of the read. In that case, clip the read entirely.
|
||||
if (left > leftTailRead.getAlignmentEnd())
|
||||
return new SAMRecord(read.getHeader());
|
||||
return new GATKSAMRecord(read.getHeader());
|
||||
|
||||
ReadClipper clipper = new ReadClipper(leftTailRead);
|
||||
return clipper.hardClipByReferenceCoordinatesLeftTail(left);
|
||||
}
|
||||
|
||||
public SAMRecord hardClipLowQualEnds(byte lowQual) {
|
||||
public GATKSAMRecord hardClipLowQualEnds(byte lowQual) {
|
||||
byte [] quals = read.getBaseQualities();
|
||||
int leftClipIndex = 0;
|
||||
int rightClipIndex = read.getReadLength() - 1;
|
||||
|
|
@ -114,7 +114,7 @@ public class ReadClipper {
|
|||
|
||||
// if the entire read should be clipped, then return an empty read. (--todo: maybe null is better? testing this for now)
|
||||
if (leftClipIndex > rightClipIndex)
|
||||
return (new SAMRecord(read.getHeader()));
|
||||
return (new GATKSAMRecord(read.getHeader()));
|
||||
|
||||
if (rightClipIndex < read.getReadLength() - 1) {
|
||||
this.addOp(new ClippingOp(rightClipIndex + 1, read.getReadLength() - 1));
|
||||
|
|
@ -125,7 +125,7 @@ public class ReadClipper {
|
|||
return this.clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
}
|
||||
|
||||
public SAMRecord hardClipSoftClippedBases () {
|
||||
public GATKSAMRecord hardClipSoftClippedBases () {
|
||||
int readIndex = 0;
|
||||
int cutLeft = -1; // first position to hard clip (inclusive)
|
||||
int cutRight = -1; // first position to hard clip (inclusive)
|
||||
|
|
@ -164,12 +164,12 @@ public class ReadClipper {
|
|||
* @param algorithm
|
||||
* @return
|
||||
*/
|
||||
public SAMRecord clipRead(ClippingRepresentation algorithm) {
|
||||
public GATKSAMRecord clipRead(ClippingRepresentation algorithm) {
|
||||
if (ops == null)
|
||||
return getRead();
|
||||
else {
|
||||
try {
|
||||
SAMRecord clippedRead = (SAMRecord) read.clone();
|
||||
GATKSAMRecord clippedRead = (GATKSAMRecord) read.clone();
|
||||
for (ClippingOp op : getOps()) {
|
||||
clippedRead = op.apply(algorithm, clippedRead);
|
||||
}
|
||||
|
|
@ -181,7 +181,7 @@ public class ReadClipper {
|
|||
}
|
||||
}
|
||||
|
||||
public SAMRecord hardClipLeadingInsertions() {
|
||||
public GATKSAMRecord hardClipLeadingInsertions() {
|
||||
for(CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||
if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP &&
|
||||
cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.DELETION)
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.duplicates;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -35,27 +34,28 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class DupUtils {
|
||||
private static SAMRecord tmpCopyRead(SAMRecord read) {
|
||||
private static GATKSAMRecord tmpCopyRead(GATKSAMRecord read) {
|
||||
try {
|
||||
return (SAMRecord)read.clone();
|
||||
return (GATKSAMRecord)read.clone();
|
||||
} catch ( CloneNotSupportedException e ) {
|
||||
throw new ReviewedStingException("Unexpected Clone failure!");
|
||||
}
|
||||
}
|
||||
|
||||
public static SAMRecord combineDuplicates(GenomeLocParser genomeLocParser,List<SAMRecord> duplicates, int maxQScore) {
|
||||
public static GATKSAMRecord combineDuplicates(GenomeLocParser genomeLocParser,List<GATKSAMRecord> duplicates, int maxQScore) {
|
||||
if ( duplicates.size() == 0 )
|
||||
return null;
|
||||
|
||||
// make the combined read by copying the first read and setting the
|
||||
// bases and quals to new arrays
|
||||
SAMRecord comb = tmpCopyRead(duplicates.get(0));
|
||||
//SAMRecord comb = tmpCopyRead(duplicates.get(0));
|
||||
GATKSAMRecord comb = tmpCopyRead(duplicates.get(0));
|
||||
//GATKSAMRecord comb = tmpCopyRead(duplicates.get(0));
|
||||
comb.setDuplicateReadFlag(false);
|
||||
int readLen = comb.getReadBases().length;
|
||||
byte[] bases = new byte[readLen];
|
||||
|
|
@ -63,7 +63,7 @@ public class DupUtils {
|
|||
|
||||
for ( int i = 0; i < readLen; i++ ) {
|
||||
//System.out.printf("I is %d%n", i);
|
||||
//for ( SAMRecord read : duplicates ) {
|
||||
//for ( GATKSAMRecord read : duplicates ) {
|
||||
// System.out.printf("dup base %c %d%n", (char)read.getReadBases()[i], read.getBaseQualities()[i]);
|
||||
//}
|
||||
Pair<Byte, Byte> baseAndQual = combineBaseProbs(genomeLocParser,duplicates, i, maxQScore);
|
||||
|
|
@ -117,7 +117,7 @@ public class DupUtils {
|
|||
System.out.printf("%n");
|
||||
}
|
||||
|
||||
private static Pair<Byte, Byte> combineBaseProbs(GenomeLocParser genomeLocParser,List<SAMRecord> duplicates, int readOffset, int maxQScore) {
|
||||
private static Pair<Byte, Byte> combineBaseProbs(GenomeLocParser genomeLocParser,List<GATKSAMRecord> duplicates, int readOffset, int maxQScore) {
|
||||
GenomeLoc loc = genomeLocParser.createGenomeLoc(duplicates.get(0));
|
||||
ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, duplicates, readOffset);
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.fragments;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -35,17 +36,17 @@ public class FragmentUtils {
|
|||
* @param <T>
|
||||
*/
|
||||
public interface ReadGetter<T> {
|
||||
public SAMRecord get(T object);
|
||||
public GATKSAMRecord get(T object);
|
||||
}
|
||||
|
||||
/** Identify getter for SAMRecords themselves */
|
||||
private final static ReadGetter<SAMRecord> SamRecordGetter = new ReadGetter<SAMRecord>() {
|
||||
@Override public SAMRecord get(final SAMRecord object) { return object; }
|
||||
private final static ReadGetter<GATKSAMRecord> SamRecordGetter = new ReadGetter<GATKSAMRecord>() {
|
||||
@Override public GATKSAMRecord get(final GATKSAMRecord object) { return object; }
|
||||
};
|
||||
|
||||
/** Gets the SAMRecord in a PileupElement */
|
||||
private final static ReadGetter<PileupElement> PileupElementGetter = new ReadGetter<PileupElement>() {
|
||||
@Override public SAMRecord get(final PileupElement object) { return object.getRead(); }
|
||||
@Override public GATKSAMRecord get(final PileupElement object) { return object.getRead(); }
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -116,7 +117,7 @@ public class FragmentUtils {
|
|||
return create(rbp, rbp.getNumberOfElements(), PileupElementGetter);
|
||||
}
|
||||
|
||||
public final static FragmentCollection<SAMRecord> create(List<SAMRecord> reads) {
|
||||
public final static FragmentCollection<GATKSAMRecord> create(List<GATKSAMRecord> reads) {
|
||||
return create(reads, reads.size(), SamRecordGetter);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -24,13 +24,13 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
|
||||
import org.broadinstitute.sting.utils.fragments.FragmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -59,12 +59,12 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
* @param reads
|
||||
* @param offsets
|
||||
*/
|
||||
public AbstractReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
public AbstractReadBackedPileup(GenomeLoc loc, List<GATKSAMRecord> reads, List<Integer> offsets ) {
|
||||
this.loc = loc;
|
||||
this.pileupElementTracker = readsOffsets2Pileup(reads,offsets);
|
||||
}
|
||||
|
||||
public AbstractReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, int offset ) {
|
||||
public AbstractReadBackedPileup(GenomeLoc loc, List<GATKSAMRecord> reads, int offset ) {
|
||||
this.loc = loc;
|
||||
this.pileupElementTracker = readsOffsets2Pileup(reads,offset);
|
||||
}
|
||||
|
|
@ -167,7 +167,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
* @param offsets
|
||||
* @return
|
||||
*/
|
||||
private PileupElementTracker<PE> readsOffsets2Pileup(List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
private PileupElementTracker<PE> readsOffsets2Pileup(List<GATKSAMRecord> reads, List<Integer> offsets ) {
|
||||
if ( reads == null ) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup");
|
||||
if ( offsets == null ) throw new ReviewedStingException("Illegal null offsets list in UnifiedReadBackedPileup");
|
||||
if ( reads.size() != offsets.size() ) throw new ReviewedStingException("Reads and offset lists have different sizes!");
|
||||
|
|
@ -187,7 +187,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
* @param offset
|
||||
* @return
|
||||
*/
|
||||
private PileupElementTracker<PE> readsOffsets2Pileup(List<SAMRecord> reads, int offset ) {
|
||||
private PileupElementTracker<PE> readsOffsets2Pileup(List<GATKSAMRecord> reads, int offset ) {
|
||||
if ( reads == null ) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup");
|
||||
if ( offset < 0 ) throw new ReviewedStingException("Illegal offset < 0 UnifiedReadBackedPileup");
|
||||
|
||||
|
|
@ -200,7 +200,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
}
|
||||
|
||||
protected abstract AbstractReadBackedPileup<RBP,PE> createNewPileup(GenomeLoc loc, PileupElementTracker<PE> pileupElementTracker);
|
||||
protected abstract PE createNewPileupElement(SAMRecord read, int offset);
|
||||
protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset);
|
||||
|
||||
// --------------------------------------------------------
|
||||
//
|
||||
|
|
@ -512,7 +512,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
else {
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
for(PE p: pileupElementTracker) {
|
||||
SAMRecord read = p.getRead();
|
||||
GATKSAMRecord read = p.getRead();
|
||||
if(targetReadGroupId != null) {
|
||||
if(read.getReadGroup() != null && targetReadGroupId.equals(read.getReadGroup().getReadGroupId()))
|
||||
filteredTracker.add(p);
|
||||
|
|
@ -543,7 +543,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
else {
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
for(PE p: pileupElementTracker) {
|
||||
SAMRecord read = p.getRead();
|
||||
GATKSAMRecord read = p.getRead();
|
||||
if(laneID != null) {
|
||||
if(read.getReadGroup() != null &&
|
||||
(read.getReadGroup().getReadGroupId().startsWith(laneID + ".")) || // lane is the same, but sample identifier is different
|
||||
|
|
@ -567,7 +567,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
else {
|
||||
Collection<String> sampleNames = new HashSet<String>();
|
||||
for(PileupElement p: this) {
|
||||
SAMRecord read = p.getRead();
|
||||
GATKSAMRecord read = p.getRead();
|
||||
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
|
||||
sampleNames.add(sampleName);
|
||||
}
|
||||
|
|
@ -644,7 +644,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
HashSet<String> hashSampleNames = new HashSet<String>(sampleNames); // to speed up the "contains" access in the for loop
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
for(PE p: pileupElementTracker) {
|
||||
SAMRecord read = p.getRead();
|
||||
GATKSAMRecord read = p.getRead();
|
||||
if(sampleNames != null) { // still checking on sampleNames because hashSampleNames will never be null. And empty means something else.
|
||||
if(read.getReadGroup() != null && hashSampleNames.contains(read.getReadGroup().getSample()))
|
||||
filteredTracker.add(p);
|
||||
|
|
@ -669,7 +669,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
else {
|
||||
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||
for(PE p: pileupElementTracker) {
|
||||
SAMRecord read = p.getRead();
|
||||
GATKSAMRecord read = p.getRead();
|
||||
if(sampleName != null) {
|
||||
if(read.getReadGroup() != null && sampleName.equals(read.getReadGroup().getSample()))
|
||||
filteredTracker.add(p);
|
||||
|
|
@ -824,8 +824,8 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<SAMRecord> getReads() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>(getNumberOfElements());
|
||||
public List<GATKSAMRecord> getReads() {
|
||||
List<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>(getNumberOfElements());
|
||||
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
|
||||
return reads;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,131 +1,132 @@
|
|||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* In the "standard" locus traversal mode,
|
||||
* the traversal is performed striclty over the reference bases. Thus, only pileups of bases (and hence local events
|
||||
* such as point mutations) are "seen" at every invocation of the walker's map() function at every (genomic) locus. Deletions
|
||||
* are seen on the base-by-base basis (i.e. the pileup does keep the information about the current reference base being deleted
|
||||
* in some reads), but the information about the extended event (deletion length, string of all deleted bases) is not kept.
|
||||
* The insertions that may be present in some reads are not seen at all in such strict reference traversal mode.
|
||||
*
|
||||
* By convention, any extended event (indel) is mapped onto the reference at the last base prior to the event (i.e.
|
||||
* last base before the insertion or deletion). If the special "extended" traversal mode is turned on and there is
|
||||
* an indel in at least one read that maps onto the reference position Z, the walker's map function will be called twice:
|
||||
* first call will be performed in a "standard" mode, with a pileup of bases over the position Z, and then the additional
|
||||
* call will be made at the same position with a pileup of event/noevent calls, where events are extended and contain
|
||||
* full information about insertions/deletions. Then the next, "standard", call to map() will be performed at the next
|
||||
* (covered) reference position. Note that if the extended event at Z was a deletion, the "standard" base pileup at
|
||||
* Z+1 and following bases may still contain deleted bases. However the fully extended event call will be performed
|
||||
* only once, at the position where the indel maps (starts).
|
||||
*
|
||||
* This class wraps an "extended" event (indel) so that in can be added to a pileup of events at a given location.
|
||||
*
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Dec 21, 2009
|
||||
* Time: 2:57:55 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ExtendedEventPileupElement extends PileupElement {
|
||||
public enum Type {
|
||||
NOEVENT, DELETION, INSERTION
|
||||
}
|
||||
|
||||
private Type type = null;
|
||||
private int eventLength = -1;
|
||||
private String eventBases = null; // if it is a deletion, we do not have information about the actual deleted bases
|
||||
// in the read itself, so we fill the string with D's; for insertions we keep actual inserted bases
|
||||
private SAMRecord read;
|
||||
private int offset; // position in the read immediately BEFORE the event
|
||||
// This is broken! offset is always zero because these member variables are shadowed by base class
|
||||
|
||||
/** Constructor for extended pileup element (indel).
|
||||
*
|
||||
* @param read the read, in which the indel is observed
|
||||
* @param offset position in the read immediately before the indel (can be -1 if read starts with an insertion)
|
||||
* @param length length of the indel (number of inserted or deleted bases); length <=0 indicates that the read has no indel (NOEVENT)
|
||||
* @param eventBases inserted bases. null indicates that the event is a deletion; ignored if length<=0 (noevent)
|
||||
*/
|
||||
public ExtendedEventPileupElement( SAMRecord read, int offset, int length, byte[] eventBases ) {
|
||||
super(read, offset);
|
||||
this.eventLength = length;
|
||||
if ( length <= 0 ) type = Type.NOEVENT;
|
||||
else {
|
||||
if ( eventBases != null ) {
|
||||
this.eventBases = new String(eventBases).toUpperCase();
|
||||
type = Type.INSERTION;
|
||||
} else {
|
||||
type = Type.DELETION;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Constructor for deletion or noevent calls - does not take event bases as an argument (as those should
|
||||
* be null or are ignored in these cases anyway)
|
||||
* @param read
|
||||
* @param offset
|
||||
* @param length
|
||||
*/
|
||||
public ExtendedEventPileupElement( SAMRecord read, int offset, int length ) {
|
||||
this(read,offset, length, null);
|
||||
}
|
||||
|
||||
public boolean isDeletion() {
|
||||
return type == Type.DELETION;
|
||||
}
|
||||
|
||||
public boolean isInsertion() {
|
||||
return type == Type.INSERTION;
|
||||
}
|
||||
|
||||
public boolean isIndel() {
|
||||
return isDeletion() || isInsertion();
|
||||
}
|
||||
|
||||
public Type getType() { return type; }
|
||||
|
||||
// The offset can be negative with insertions at the start of the read, but a valid base does exist at this position with
|
||||
// a valid base quality. The following code attempts to compensate for that.'
|
||||
|
||||
@Override
|
||||
public byte getBase() {
|
||||
return getBase(offset >= 0 ? offset : offset+eventLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBaseIndex() {
|
||||
return getBaseIndex(offset >= 0 ? offset : offset+eventLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getQual() {
|
||||
return getQual(offset >= 0 ? offset : offset+eventLength);
|
||||
}
|
||||
|
||||
/** Returns length of the event (number of inserted or deleted bases */
|
||||
public int getEventLength() { return eventLength; }
|
||||
|
||||
/** Returns actual sequence of inserted bases, or a null if the event is a deletion or if there is no event in the associated read.
|
||||
* */
|
||||
public String getEventBases() { return eventBases; }
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
char c = '.';
|
||||
String fillStr = null ;
|
||||
if ( isDeletion() ) {
|
||||
c = '-';
|
||||
char [] filler = new char[eventLength];
|
||||
Arrays.fill(filler, 'D');
|
||||
fillStr = new String(filler);
|
||||
}
|
||||
else if ( isInsertion() ) c = '+';
|
||||
return String.format("%s @ %d = %c%s MQ%d", getRead().getReadName(), getOffset(), c, isIndel()?
|
||||
(isInsertion() ? eventBases : fillStr ): "", getMappingQual());
|
||||
}
|
||||
|
||||
}
|
||||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* In the "standard" locus traversal mode,
|
||||
* the traversal is performed striclty over the reference bases. Thus, only pileups of bases (and hence local events
|
||||
* such as point mutations) are "seen" at every invocation of the walker's map() function at every (genomic) locus. Deletions
|
||||
* are seen on the base-by-base basis (i.e. the pileup does keep the information about the current reference base being deleted
|
||||
* in some reads), but the information about the extended event (deletion length, string of all deleted bases) is not kept.
|
||||
* The insertions that may be present in some reads are not seen at all in such strict reference traversal mode.
|
||||
*
|
||||
* By convention, any extended event (indel) is mapped onto the reference at the last base prior to the event (i.e.
|
||||
* last base before the insertion or deletion). If the special "extended" traversal mode is turned on and there is
|
||||
* an indel in at least one read that maps onto the reference position Z, the walker's map function will be called twice:
|
||||
* first call will be performed in a "standard" mode, with a pileup of bases over the position Z, and then the additional
|
||||
* call will be made at the same position with a pileup of event/noevent calls, where events are extended and contain
|
||||
* full information about insertions/deletions. Then the next, "standard", call to map() will be performed at the next
|
||||
* (covered) reference position. Note that if the extended event at Z was a deletion, the "standard" base pileup at
|
||||
* Z+1 and following bases may still contain deleted bases. However the fully extended event call will be performed
|
||||
* only once, at the position where the indel maps (starts).
|
||||
*
|
||||
* This class wraps an "extended" event (indel) so that in can be added to a pileup of events at a given location.
|
||||
*
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Dec 21, 2009
|
||||
* Time: 2:57:55 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ExtendedEventPileupElement extends PileupElement {
|
||||
public enum Type {
|
||||
NOEVENT, DELETION, INSERTION
|
||||
}
|
||||
|
||||
private Type type = null;
|
||||
private int eventLength = -1;
|
||||
private String eventBases = null; // if it is a deletion, we do not have information about the actual deleted bases
|
||||
// in the read itself, so we fill the string with D's; for insertions we keep actual inserted bases
|
||||
private SAMRecord read;
|
||||
private int offset; // position in the read immediately BEFORE the event
|
||||
// This is broken! offset is always zero because these member variables are shadowed by base class
|
||||
|
||||
/** Constructor for extended pileup element (indel).
|
||||
*
|
||||
* @param read the read, in which the indel is observed
|
||||
* @param offset position in the read immediately before the indel (can be -1 if read starts with an insertion)
|
||||
* @param length length of the indel (number of inserted or deleted bases); length <=0 indicates that the read has no indel (NOEVENT)
|
||||
* @param eventBases inserted bases. null indicates that the event is a deletion; ignored if length<=0 (noevent)
|
||||
*/
|
||||
public ExtendedEventPileupElement( GATKSAMRecord read, int offset, int length, byte[] eventBases ) {
|
||||
super(read, offset);
|
||||
this.eventLength = length;
|
||||
if ( length <= 0 ) type = Type.NOEVENT;
|
||||
else {
|
||||
if ( eventBases != null ) {
|
||||
this.eventBases = new String(eventBases).toUpperCase();
|
||||
type = Type.INSERTION;
|
||||
} else {
|
||||
type = Type.DELETION;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Constructor for deletion or noevent calls - does not take event bases as an argument (as those should
|
||||
* be null or are ignored in these cases anyway)
|
||||
* @param read
|
||||
* @param offset
|
||||
* @param length
|
||||
*/
|
||||
public ExtendedEventPileupElement( GATKSAMRecord read, int offset, int length ) {
|
||||
this(read,offset, length, null);
|
||||
}
|
||||
|
||||
public boolean isDeletion() {
|
||||
return type == Type.DELETION;
|
||||
}
|
||||
|
||||
public boolean isInsertion() {
|
||||
return type == Type.INSERTION;
|
||||
}
|
||||
|
||||
public boolean isIndel() {
|
||||
return isDeletion() || isInsertion();
|
||||
}
|
||||
|
||||
public Type getType() { return type; }
|
||||
|
||||
// The offset can be negative with insertions at the start of the read, but a valid base does exist at this position with
|
||||
// a valid base quality. The following code attempts to compensate for that.'
|
||||
|
||||
@Override
|
||||
public byte getBase() {
|
||||
return getBase(offset >= 0 ? offset : offset+eventLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBaseIndex() {
|
||||
return getBaseIndex(offset >= 0 ? offset : offset+eventLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getQual() {
|
||||
return getQual(offset >= 0 ? offset : offset+eventLength);
|
||||
}
|
||||
|
||||
/** Returns length of the event (number of inserted or deleted bases */
|
||||
public int getEventLength() { return eventLength; }
|
||||
|
||||
/** Returns actual sequence of inserted bases, or a null if the event is a deletion or if there is no event in the associated read.
|
||||
* */
|
||||
public String getEventBases() { return eventBases; }
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
char c = '.';
|
||||
String fillStr = null ;
|
||||
if ( isDeletion() ) {
|
||||
c = '-';
|
||||
char [] filler = new char[eventLength];
|
||||
Arrays.fill(filler, 'D');
|
||||
fillStr = new String(filler);
|
||||
}
|
||||
else if ( isInsertion() ) c = '+';
|
||||
return String.format("%s @ %d = %c%s MQ%d", getRead().getReadName(), getOffset(), c, isIndel()?
|
||||
(isInsertion() ? eventBases : fillStr ): "", getMappingQual());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,10 +2,8 @@ package org.broadinstitute.sting.utils.pileup;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -21,14 +19,14 @@ public class PileupElement implements Comparable<PileupElement> {
|
|||
public static final byte T_FOLLOWED_BY_INSERTION_BASE = (byte) 89;
|
||||
public static final byte G_FOLLOWED_BY_INSERTION_BASE = (byte) 90;
|
||||
|
||||
protected final SAMRecord read;
|
||||
protected final GATKSAMRecord read;
|
||||
protected final int offset;
|
||||
|
||||
@Requires({
|
||||
"read != null",
|
||||
"offset >= -1",
|
||||
"offset <= read.getReadLength()"})
|
||||
public PileupElement( SAMRecord read, int offset ) {
|
||||
public PileupElement( GATKSAMRecord read, int offset ) {
|
||||
this.read = read;
|
||||
this.offset = offset;
|
||||
}
|
||||
|
|
@ -38,7 +36,7 @@ public class PileupElement implements Comparable<PileupElement> {
|
|||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
public SAMRecord getRead() { return read; }
|
||||
public GATKSAMRecord getRead() { return read; }
|
||||
|
||||
@Ensures("result == offset")
|
||||
public int getOffset() { return offset; }
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.pileup;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
|
@ -166,7 +167,7 @@ public interface ReadBackedExtendedEventPileup extends ReadBackedPileup {
|
|||
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
* @return
|
||||
*/
|
||||
public List<SAMRecord> getReads();
|
||||
public List<GATKSAMRecord> getReads();
|
||||
|
||||
/**
|
||||
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
|
|
|
|||
|
|
@ -23,10 +23,10 @@
|
|||
*/
|
||||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -95,7 +95,7 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
|
|||
}
|
||||
|
||||
@Override
|
||||
protected ExtendedEventPileupElement createNewPileupElement(SAMRecord read, int offset) {
|
||||
protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset) {
|
||||
throw new UnsupportedOperationException("Not enough information provided to create a new pileup element");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -24,10 +24,10 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
|
@ -202,7 +202,7 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
|
|||
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
* @return
|
||||
*/
|
||||
public List<SAMRecord> getReads();
|
||||
public List<GATKSAMRecord> getReads();
|
||||
|
||||
/**
|
||||
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
||||
|
|
|
|||
|
|
@ -23,8 +23,8 @@
|
|||
*/
|
||||
package org.broadinstitute.sting.utils.pileup;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
|
@ -35,11 +35,11 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPil
|
|||
super(loc);
|
||||
}
|
||||
|
||||
public ReadBackedPileupImpl(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets ) {
|
||||
public ReadBackedPileupImpl(GenomeLoc loc, List<GATKSAMRecord> reads, List<Integer> offsets ) {
|
||||
super(loc,reads,offsets);
|
||||
}
|
||||
|
||||
public ReadBackedPileupImpl(GenomeLoc loc, List<SAMRecord> reads, int offset ) {
|
||||
public ReadBackedPileupImpl(GenomeLoc loc, List<GATKSAMRecord> reads, int offset ) {
|
||||
super(loc,reads,offset);
|
||||
}
|
||||
|
||||
|
|
@ -70,7 +70,7 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPil
|
|||
}
|
||||
|
||||
@Override
|
||||
protected PileupElement createNewPileupElement(SAMRecord read, int offset) {
|
||||
protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset) {
|
||||
return new PileupElement(read,offset);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -104,9 +104,9 @@ public class ArtificialReadsTraversal<M,T> extends TraversalEngine<M,T,Walker<M,
|
|||
// an array of characters that represent the reference
|
||||
ReferenceContext refSeq = null;
|
||||
|
||||
final boolean keepMeP = readWalker.filter(refSeq, read);
|
||||
final boolean keepMeP = readWalker.filter(refSeq, (GATKSAMRecord) read);
|
||||
if (keepMeP) {
|
||||
M x = readWalker.map(refSeq, read, null); // TODO: fix me at some point, it would be nice to fake out ROD data too
|
||||
M x = readWalker.map(refSeq, (GATKSAMRecord) read, null); // TODO: fix me at some point, it would be nice to fake out ROD data too
|
||||
sum = readWalker.reduce(x, sum);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ package org.broadinstitute.sting.utils.sam;
|
|||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -201,9 +200,9 @@ public class ArtificialSAMUtils {
|
|||
return rec;
|
||||
}
|
||||
|
||||
public final static List<SAMRecord> createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
|
||||
SAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen);
|
||||
SAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen);
|
||||
public final static List<GATKSAMRecord> createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
|
||||
GATKSAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen);
|
||||
GATKSAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen);
|
||||
|
||||
left.setReadPairedFlag(true);
|
||||
right.setReadPairedFlag(true);
|
||||
|
|
@ -327,9 +326,9 @@ public class ArtificialSAMUtils {
|
|||
|
||||
if ( rightStart <= 0 ) continue;
|
||||
|
||||
List<SAMRecord> pair = createPair(header, readName, readLen, leftStart, rightStart, leftIsFirst, leftIsNegative);
|
||||
final SAMRecord left = pair.get(0);
|
||||
final SAMRecord right = pair.get(1);
|
||||
List<GATKSAMRecord> pair = createPair(header, readName, readLen, leftStart, rightStart, leftIsFirst, leftIsNegative);
|
||||
final GATKSAMRecord left = pair.get(0);
|
||||
final GATKSAMRecord right = pair.get(1);
|
||||
|
||||
pileupElements.add(new PileupElement(left, pos - leftStart));
|
||||
|
||||
|
|
|
|||
|
|
@ -188,15 +188,15 @@ public class ReadUtils {
|
|||
* This makes the following code a little nasty, since we can only detect if a base is in the adaptor, but not
|
||||
* if it overlaps the read.
|
||||
*
|
||||
* @param rec
|
||||
* @param read
|
||||
* @param basePos
|
||||
* @param adaptorLength
|
||||
* @return
|
||||
*/
|
||||
public static OverlapType readPairBaseOverlapType(final SAMRecord rec, long basePos, final int adaptorLength) {
|
||||
public static OverlapType readPairBaseOverlapType(final SAMRecord read, long basePos, final int adaptorLength) {
|
||||
OverlapType state = OverlapType.NOT_OVERLAPPING;
|
||||
|
||||
Pair<Integer, Integer> adaptorBoundaries = getAdaptorBoundaries(rec, adaptorLength);
|
||||
Pair<Integer, Integer> adaptorBoundaries = getAdaptorBoundaries(read, adaptorLength);
|
||||
|
||||
if ( adaptorBoundaries != null ) { // we're not an unmapped pair -- cannot filter out
|
||||
|
||||
|
|
@ -205,28 +205,28 @@ public class ReadUtils {
|
|||
if ( inAdapator ) {
|
||||
state = OverlapType.IN_ADAPTOR;
|
||||
//System.out.printf("baseOverlapState: %50s negStrand=%b base=%d start=%d stop=%d, adaptorStart=%d adaptorEnd=%d isize=%d => %s%n",
|
||||
// rec.getReadName(), rec.getReadNegativeStrandFlag(), basePos, rec.getAlignmentStart(), rec.getAlignmentEnd(), adaptorBoundaries.first, adaptorBoundaries.second, rec.getInferredInsertSize(), state);
|
||||
// read.getReadName(), read.getReadNegativeStrandFlag(), basePos, read.getAlignmentStart(), read.getAlignmentEnd(), adaptorBoundaries.first, adaptorBoundaries.second, read.getInferredInsertSize(), state);
|
||||
}
|
||||
}
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
private static Pair<Integer, Integer> getAdaptorBoundaries(SAMRecord rec, int adaptorLength) {
|
||||
int isize = rec.getInferredInsertSize();
|
||||
private static Pair<Integer, Integer> getAdaptorBoundaries(SAMRecord read, int adaptorLength) {
|
||||
int isize = read.getInferredInsertSize();
|
||||
if ( isize == 0 )
|
||||
return null; // don't worry about unmapped pairs
|
||||
|
||||
int adaptorStart, adaptorEnd;
|
||||
|
||||
if ( rec.getReadNegativeStrandFlag() ) {
|
||||
if ( read.getReadNegativeStrandFlag() ) {
|
||||
// we are on the negative strand, so our mate is on the positive strand
|
||||
int mateStart = rec.getMateAlignmentStart();
|
||||
int mateStart = read.getMateAlignmentStart();
|
||||
adaptorStart = mateStart - adaptorLength - 1;
|
||||
adaptorEnd = mateStart - 1;
|
||||
} else {
|
||||
// we are on the positive strand, so our mate is on the negative strand
|
||||
int mateEnd = rec.getAlignmentStart() + isize - 1;
|
||||
int mateEnd = read.getAlignmentStart() + isize - 1;
|
||||
adaptorStart = mateEnd + 1;
|
||||
adaptorEnd = mateEnd + adaptorLength;
|
||||
}
|
||||
|
|
@ -236,47 +236,47 @@ public class ReadUtils {
|
|||
|
||||
/**
|
||||
*
|
||||
* @param rec original SAM record
|
||||
* @param read original SAM record
|
||||
* @param adaptorLength length of adaptor sequence
|
||||
* @return a new read with adaptor sequence hard-clipped out or null if read is fully clipped
|
||||
*/
|
||||
public static GATKSAMRecord hardClipAdaptorSequence(final SAMRecord rec, int adaptorLength) {
|
||||
public static GATKSAMRecord hardClipAdaptorSequence(final GATKSAMRecord read, int adaptorLength) {
|
||||
|
||||
Pair<Integer, Integer> adaptorBoundaries = getAdaptorBoundaries(rec, adaptorLength);
|
||||
GATKSAMRecord result = (GATKSAMRecord)rec;
|
||||
Pair<Integer, Integer> adaptorBoundaries = getAdaptorBoundaries(read, adaptorLength);
|
||||
GATKSAMRecord result = (GATKSAMRecord)read;
|
||||
|
||||
if ( adaptorBoundaries != null ) {
|
||||
if ( rec.getReadNegativeStrandFlag() && adaptorBoundaries.second >= rec.getAlignmentStart() && adaptorBoundaries.first < rec.getAlignmentEnd() )
|
||||
result = hardClipStartOfRead(rec, adaptorBoundaries.second);
|
||||
else if ( !rec.getReadNegativeStrandFlag() && adaptorBoundaries.first <= rec.getAlignmentEnd() )
|
||||
result = hardClipEndOfRead(rec, adaptorBoundaries.first);
|
||||
if ( read.getReadNegativeStrandFlag() && adaptorBoundaries.second >= read.getAlignmentStart() && adaptorBoundaries.first < read.getAlignmentEnd() )
|
||||
result = hardClipStartOfRead(read, adaptorBoundaries.second);
|
||||
else if ( !read.getReadNegativeStrandFlag() && adaptorBoundaries.first <= read.getAlignmentEnd() )
|
||||
result = hardClipEndOfRead(read, adaptorBoundaries.first);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// return true if the read needs to be completely clipped
|
||||
private static GATKSAMRecord hardClipStartOfRead(SAMRecord oldRec, int stopPosition) {
|
||||
private static GATKSAMRecord hardClipStartOfRead(GATKSAMRecord oldRec, int stopPosition) {
|
||||
|
||||
if ( stopPosition >= oldRec.getAlignmentEnd() ) {
|
||||
// BAM representation issue -- we can't clip away all bases in a read, just leave it alone and let the filter deal with it
|
||||
//System.out.printf("Entire read needs to be clipped: %50s %n", rec.getReadName());
|
||||
//System.out.printf("Entire read needs to be clipped: %50s %n", read.getReadName());
|
||||
return null;
|
||||
}
|
||||
|
||||
GATKSAMRecord rec;
|
||||
GATKSAMRecord read;
|
||||
try {
|
||||
rec = (GATKSAMRecord)oldRec.clone();
|
||||
read = (GATKSAMRecord)oldRec.clone();
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
|
||||
//System.out.printf("Clipping start of read: %50s start=%d adaptorEnd=%d isize=%d %n",
|
||||
// rec.getReadName(), rec.getAlignmentStart(), stopPosition, rec.getInferredInsertSize());
|
||||
// read.getReadName(), read.getAlignmentStart(), stopPosition, read.getInferredInsertSize());
|
||||
|
||||
Cigar oldCigar = rec.getCigar();
|
||||
Cigar oldCigar = read.getCigar();
|
||||
LinkedList<CigarElement> newCigarElements = new LinkedList<CigarElement>();
|
||||
int currentPos = rec.getAlignmentStart();
|
||||
int currentPos = read.getAlignmentStart();
|
||||
int basesToClip = 0;
|
||||
int basesAlreadyClipped = 0;
|
||||
|
||||
|
|
@ -315,48 +315,48 @@ public class ReadUtils {
|
|||
}
|
||||
|
||||
// copy over the unclipped bases
|
||||
final byte[] bases = rec.getReadBases();
|
||||
final byte[] quals = rec.getBaseQualities();
|
||||
final byte[] bases = read.getReadBases();
|
||||
final byte[] quals = read.getBaseQualities();
|
||||
int newLength = bases.length - basesToClip;
|
||||
byte[] newBases = new byte[newLength];
|
||||
byte[] newQuals = new byte[newLength];
|
||||
System.arraycopy(bases, basesToClip, newBases, 0, newLength);
|
||||
System.arraycopy(quals, basesToClip, newQuals, 0, newLength);
|
||||
rec.setReadBases(newBases);
|
||||
rec.setBaseQualities(newQuals);
|
||||
read.setReadBases(newBases);
|
||||
read.setBaseQualities(newQuals);
|
||||
|
||||
// now add a CIGAR element for the clipped bases
|
||||
newCigarElements.addFirst(new CigarElement(basesToClip + basesAlreadyClipped, CigarOperator.H));
|
||||
Cigar newCigar = new Cigar(newCigarElements);
|
||||
rec.setCigar(newCigar);
|
||||
read.setCigar(newCigar);
|
||||
|
||||
// adjust the start accordingly
|
||||
rec.setAlignmentStart(stopPosition + 1);
|
||||
read.setAlignmentStart(stopPosition + 1);
|
||||
|
||||
return rec;
|
||||
return read;
|
||||
}
|
||||
|
||||
private static GATKSAMRecord hardClipEndOfRead(SAMRecord oldRec, int startPosition) {
|
||||
private static GATKSAMRecord hardClipEndOfRead(GATKSAMRecord oldRec, int startPosition) {
|
||||
|
||||
if ( startPosition <= oldRec.getAlignmentStart() ) {
|
||||
// BAM representation issue -- we can't clip away all bases in a read, just leave it alone and let the filter deal with it
|
||||
//System.out.printf("Entire read needs to be clipped: %50s %n", rec.getReadName());
|
||||
//System.out.printf("Entire read needs to be clipped: %50s %n", read.getReadName());
|
||||
return null;
|
||||
}
|
||||
|
||||
GATKSAMRecord rec;
|
||||
GATKSAMRecord read;
|
||||
try {
|
||||
rec = (GATKSAMRecord)oldRec.clone();
|
||||
read = (GATKSAMRecord)oldRec.clone();
|
||||
} catch (Exception e) {
|
||||
return null;
|
||||
}
|
||||
|
||||
//System.out.printf("Clipping end of read: %50s adaptorStart=%d end=%d isize=%d %n",
|
||||
// rec.getReadName(), startPosition, rec.getAlignmentEnd(), rec.getInferredInsertSize());
|
||||
// read.getReadName(), startPosition, read.getAlignmentEnd(), read.getInferredInsertSize());
|
||||
|
||||
Cigar oldCigar = rec.getCigar();
|
||||
Cigar oldCigar = read.getCigar();
|
||||
LinkedList<CigarElement> newCigarElements = new LinkedList<CigarElement>();
|
||||
int currentPos = rec.getAlignmentStart();
|
||||
int currentPos = read.getAlignmentStart();
|
||||
int basesToKeep = 0;
|
||||
int basesAlreadyClipped = 0;
|
||||
|
||||
|
|
@ -402,41 +402,41 @@ public class ReadUtils {
|
|||
}
|
||||
|
||||
// copy over the unclipped bases
|
||||
final byte[] bases = rec.getReadBases();
|
||||
final byte[] quals = rec.getBaseQualities();
|
||||
final byte[] bases = read.getReadBases();
|
||||
final byte[] quals = read.getBaseQualities();
|
||||
byte[] newBases = new byte[basesToKeep];
|
||||
byte[] newQuals = new byte[basesToKeep];
|
||||
System.arraycopy(bases, 0, newBases, 0, basesToKeep);
|
||||
System.arraycopy(quals, 0, newQuals, 0, basesToKeep);
|
||||
rec.setReadBases(newBases);
|
||||
rec.setBaseQualities(newQuals);
|
||||
read.setReadBases(newBases);
|
||||
read.setBaseQualities(newQuals);
|
||||
|
||||
// now add a CIGAR element for the clipped bases
|
||||
newCigarElements.add(new CigarElement((bases.length - basesToKeep) + basesAlreadyClipped, CigarOperator.H));
|
||||
Cigar newCigar = new Cigar(newCigarElements);
|
||||
rec.setCigar(newCigar);
|
||||
read.setCigar(newCigar);
|
||||
|
||||
// adjust the stop accordingly
|
||||
// rec.setAlignmentEnd(startPosition - 1);
|
||||
// read.setAlignmentEnd(startPosition - 1);
|
||||
|
||||
return rec;
|
||||
return read;
|
||||
}
|
||||
|
||||
/**
|
||||
* Hard clips away (i.e.g, removes from the read) bases that were previously soft clipped.
|
||||
*
|
||||
* @param rec
|
||||
* @param read
|
||||
* @return
|
||||
*/
|
||||
@Requires("rec != null")
|
||||
@Requires("read != null")
|
||||
@Ensures("result != null")
|
||||
public static SAMRecord hardClipSoftClippedBases(SAMRecord rec) {
|
||||
List<CigarElement> cigarElts = rec.getCigar().getCigarElements();
|
||||
public static GATKSAMRecord hardClipSoftClippedBases(GATKSAMRecord read) {
|
||||
List<CigarElement> cigarElts = read.getCigar().getCigarElements();
|
||||
|
||||
if ( cigarElts.size() == 1 ) // can't be soft clipped, just return
|
||||
return rec;
|
||||
return read;
|
||||
|
||||
int keepStart = 0, keepEnd = rec.getReadLength() - 1;
|
||||
int keepStart = 0, keepEnd = read.getReadLength() - 1;
|
||||
List<CigarElement> newCigarElements = new LinkedList<CigarElement>();
|
||||
|
||||
for ( int i = 0; i < cigarElts.size(); i++ ) {
|
||||
|
|
@ -447,7 +447,7 @@ public class ReadUtils {
|
|||
if ( i == 0 )
|
||||
keepStart = l;
|
||||
else
|
||||
keepEnd = rec.getReadLength() - l - 1;
|
||||
keepEnd = read.getReadLength() - l - 1;
|
||||
newCigarElements.add(new CigarElement(l, CigarOperator.HARD_CLIP));
|
||||
break;
|
||||
|
||||
|
|
@ -477,54 +477,54 @@ public class ReadUtils {
|
|||
}
|
||||
mergedCigarElements.add(new CigarElement(currentOperatorLength, currentOperator));
|
||||
|
||||
return hardClipBases(rec, keepStart, keepEnd, mergedCigarElements);
|
||||
return hardClipBases(read, keepStart, keepEnd, mergedCigarElements);
|
||||
}
|
||||
|
||||
/**
|
||||
* Hard clips out the bases in rec, keeping the bases from keepStart to keepEnd, inclusive. Note these
|
||||
* Hard clips out the bases in read, keeping the bases from keepStart to keepEnd, inclusive. Note these
|
||||
* are offsets, so they are 0 based
|
||||
*
|
||||
* @param rec
|
||||
* @param read
|
||||
* @param keepStart
|
||||
* @param keepEnd
|
||||
* @param newCigarElements
|
||||
* @return
|
||||
*/
|
||||
@Requires({
|
||||
"rec != null",
|
||||
"read != null",
|
||||
"keepStart >= 0",
|
||||
"keepEnd < rec.getReadLength()",
|
||||
"rec.getReadUnmappedFlag() || newCigarElements != null"})
|
||||
"keepEnd < read.getReadLength()",
|
||||
"read.getReadUnmappedFlag() || newCigarElements != null"})
|
||||
@Ensures("result != null")
|
||||
public static SAMRecord hardClipBases(SAMRecord rec, int keepStart, int keepEnd, List<CigarElement> newCigarElements) {
|
||||
public static GATKSAMRecord hardClipBases(GATKSAMRecord read, int keepStart, int keepEnd, List<CigarElement> newCigarElements) {
|
||||
int newLength = keepEnd - keepStart + 1;
|
||||
if ( newLength != rec.getReadLength() ) {
|
||||
if ( newLength != read.getReadLength() ) {
|
||||
try {
|
||||
rec = SimplifyingSAMFileWriter.simplifyRead((SAMRecord)rec.clone());
|
||||
read = (GATKSAMRecord)read.clone();
|
||||
// copy over the unclipped bases
|
||||
final byte[] bases = rec.getReadBases();
|
||||
final byte[] quals = rec.getBaseQualities();
|
||||
final byte[] bases = read.getReadBases();
|
||||
final byte[] quals = read.getBaseQualities();
|
||||
byte[] newBases = new byte[newLength];
|
||||
byte[] newQuals = new byte[newLength];
|
||||
System.arraycopy(bases, keepStart, newBases, 0, newLength);
|
||||
System.arraycopy(quals, keepStart, newQuals, 0, newLength);
|
||||
rec.setReadBases(newBases);
|
||||
rec.setBaseQualities(newQuals);
|
||||
read.setReadBases(newBases);
|
||||
read.setBaseQualities(newQuals);
|
||||
|
||||
// now add a CIGAR element for the clipped bases, if the read isn't unmapped
|
||||
if ( ! rec.getReadUnmappedFlag() ) {
|
||||
if ( ! read.getReadUnmappedFlag() ) {
|
||||
Cigar newCigar = new Cigar(newCigarElements);
|
||||
rec.setCigar(newCigar);
|
||||
read.setCigar(newCigar);
|
||||
}
|
||||
} catch ( CloneNotSupportedException e ) {
|
||||
throw new ReviewedStingException("WTF, where did clone go?", e);
|
||||
}
|
||||
}
|
||||
|
||||
return rec;
|
||||
return read;
|
||||
}
|
||||
|
||||
public static SAMRecord replaceSoftClipsWithMatches(SAMRecord read) {
|
||||
public static GATKSAMRecord replaceSoftClipsWithMatches(GATKSAMRecord read) {
|
||||
List<CigarElement> newCigarElements = new ArrayList<CigarElement>();
|
||||
|
||||
for ( CigarElement ce : read.getCigar().getCigarElements() ) {
|
||||
|
|
@ -561,15 +561,15 @@ public class ReadUtils {
|
|||
|
||||
/**
|
||||
*
|
||||
* @param rec original SAM record
|
||||
* @param read original SAM record
|
||||
* @return a new read with adaptor sequence hard-clipped out or null if read is fully clipped
|
||||
*/
|
||||
public static GATKSAMRecord hardClipAdaptorSequence(final SAMRecord rec) {
|
||||
return hardClipAdaptorSequence(rec, DEFAULT_ADAPTOR_SIZE);
|
||||
public static GATKSAMRecord hardClipAdaptorSequence(final GATKSAMRecord read) {
|
||||
return hardClipAdaptorSequence(read, DEFAULT_ADAPTOR_SIZE);
|
||||
}
|
||||
|
||||
public static OverlapType readPairBaseOverlapType(final SAMRecord rec, long basePos) {
|
||||
return readPairBaseOverlapType(rec, basePos, DEFAULT_ADAPTOR_SIZE);
|
||||
public static OverlapType readPairBaseOverlapType(final SAMRecord read, long basePos) {
|
||||
return readPairBaseOverlapType(read, basePos, DEFAULT_ADAPTOR_SIZE);
|
||||
}
|
||||
|
||||
public static boolean is454Read(SAMRecord read) {
|
||||
|
|
@ -601,10 +601,10 @@ public class ReadUtils {
|
|||
readFlagNames.put(0x400, "Duplicate");
|
||||
}
|
||||
|
||||
public static String readFlagsAsString(SAMRecord rec) {
|
||||
public static String readFlagsAsString(GATKSAMRecord read) {
|
||||
String flags = "";
|
||||
for (int flag : readFlagNames.keySet()) {
|
||||
if ((rec.getFlags() & flag) != 0) {
|
||||
if ((read.getFlags() & flag) != 0) {
|
||||
flags += readFlagNames.get(flag) + " ";
|
||||
}
|
||||
}
|
||||
|
|
@ -618,7 +618,7 @@ public class ReadUtils {
|
|||
* @param reads
|
||||
* @return
|
||||
*/
|
||||
public final static List<SAMRecord> coordinateSortReads(List<SAMRecord> reads) {
|
||||
public final static List<GATKSAMRecord> coordinateSortReads(List<GATKSAMRecord> reads) {
|
||||
final SAMRecordComparator comparer = new SAMRecordCoordinateComparator();
|
||||
Collections.sort(reads, comparer);
|
||||
return reads;
|
||||
|
|
@ -647,7 +647,7 @@ public class ReadUtils {
|
|||
* @param interval the interval
|
||||
* @return the overlap type as described by ReadAndIntervalOverlap enum (see above)
|
||||
*/
|
||||
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) {
|
||||
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(GATKSAMRecord read, GenomeLoc interval) {
|
||||
|
||||
int sStart = getRefCoordSoftUnclippedStart(read);
|
||||
int sStop = getRefCoordSoftUnclippedEnd(read);
|
||||
|
|
@ -685,7 +685,7 @@ public class ReadUtils {
|
|||
}
|
||||
|
||||
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
|
||||
public static int getRefCoordSoftUnclippedStart(SAMRecord read) {
|
||||
public static int getRefCoordSoftUnclippedStart(GATKSAMRecord read) {
|
||||
int start = read.getUnclippedStart();
|
||||
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||
|
|
@ -697,7 +697,7 @@ public class ReadUtils {
|
|||
}
|
||||
|
||||
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
|
||||
public static int getRefCoordSoftUnclippedEnd(SAMRecord read) {
|
||||
public static int getRefCoordSoftUnclippedEnd(GATKSAMRecord read) {
|
||||
int stop = read.getUnclippedStart();
|
||||
|
||||
if (readIsEntirelyInsertion(read))
|
||||
|
|
@ -716,7 +716,7 @@ public class ReadUtils {
|
|||
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
||||
}
|
||||
|
||||
private static boolean readIsEntirelyInsertion(SAMRecord read) {
|
||||
private static boolean readIsEntirelyInsertion(GATKSAMRecord read) {
|
||||
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||
if (cigarElement.getOperator() != CigarOperator.INSERTION)
|
||||
return false;
|
||||
|
|
@ -730,7 +730,7 @@ public class ReadUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Pre-processes the results of getReadCoordinateForReferenceCoordinate(SAMRecord, int) in case it falls in
|
||||
* Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) in case it falls in
|
||||
* a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns
|
||||
* the base prior to the deletion. If clipping the right tail (end of the read) returns the base after the
|
||||
* deletion.
|
||||
|
|
@ -742,7 +742,7 @@ public class ReadUtils {
|
|||
*/
|
||||
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
|
||||
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
||||
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord, ClippingTail tail) {
|
||||
public static int getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord, ClippingTail tail) {
|
||||
Pair<Integer, Boolean> result = getReadCoordinateForReferenceCoordinate(read, refCoord);
|
||||
int readCoord = result.getFirst();
|
||||
|
||||
|
|
@ -760,7 +760,7 @@ public class ReadUtils {
|
|||
* Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use when faced with
|
||||
* a deletion.
|
||||
*
|
||||
* SUGGESTION: Use getReadCoordinateForReferenceCoordinate(SAMRecord, int, ClippingTail) instead to get a
|
||||
* SUGGESTION: Use getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int, ClippingTail) instead to get a
|
||||
* pre-processed result according to normal clipping needs. Or you can use this function and tailor the
|
||||
* behavior to your needs.
|
||||
*
|
||||
|
|
@ -770,7 +770,7 @@ public class ReadUtils {
|
|||
*/
|
||||
@Requires({"refCoord >= getRefCoordSoftUnclippedStart(read)", "refCoord <= getRefCoordSoftUnclippedEnd(read)"})
|
||||
@Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"})
|
||||
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
|
||||
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord) {
|
||||
int readBases = 0;
|
||||
int refBases = 0;
|
||||
boolean fallsInsideDeletion = false;
|
||||
|
|
@ -851,13 +851,13 @@ public class ReadUtils {
|
|||
return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
|
||||
}
|
||||
|
||||
public static SAMRecord unclipSoftClippedBases(SAMRecord rec) {
|
||||
int newReadStart = rec.getAlignmentStart();
|
||||
int newReadEnd = rec.getAlignmentEnd();
|
||||
List<CigarElement> newCigarElements = new ArrayList<CigarElement>(rec.getCigar().getCigarElements().size());
|
||||
public static GATKSAMRecord unclipSoftClippedBases(GATKSAMRecord read) {
|
||||
int newReadStart = read.getAlignmentStart();
|
||||
int newReadEnd = read.getAlignmentEnd();
|
||||
List<CigarElement> newCigarElements = new ArrayList<CigarElement>(read.getCigar().getCigarElements().size());
|
||||
int heldOver = -1;
|
||||
boolean sSeen = false;
|
||||
for ( CigarElement e : rec.getCigar().getCigarElements() ) {
|
||||
for ( CigarElement e : read.getCigar().getCigarElements() ) {
|
||||
if ( e.getOperator().equals(CigarOperator.S) ) {
|
||||
newCigarElements.add(new CigarElement(e.getLength(),CigarOperator.M));
|
||||
if ( sSeen ) {
|
||||
|
|
@ -872,7 +872,7 @@ public class ReadUtils {
|
|||
}
|
||||
// merge duplicate operators together
|
||||
int idx = 0;
|
||||
List<CigarElement> finalCigarElements = new ArrayList<CigarElement>(rec.getCigar().getCigarElements().size());
|
||||
List<CigarElement> finalCigarElements = new ArrayList<CigarElement>(read.getCigar().getCigarElements().size());
|
||||
while ( idx < newCigarElements.size() -1 ) {
|
||||
if ( newCigarElements.get(idx).getOperator().equals(newCigarElements.get(idx+1).getOperator()) ) {
|
||||
int combSize = newCigarElements.get(idx).getLength();
|
||||
|
|
@ -889,10 +889,10 @@ public class ReadUtils {
|
|||
idx++;
|
||||
}
|
||||
|
||||
rec.setCigar(new Cigar(finalCigarElements));
|
||||
rec.setAlignmentStart(newReadStart);
|
||||
read.setCigar(new Cigar(finalCigarElements));
|
||||
read.setAlignmentStart(newReadStart);
|
||||
|
||||
return rec;
|
||||
return read;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -905,7 +905,7 @@ public class ReadUtils {
|
|||
|
||||
@Requires({"read1 != null", "read2 != null"})
|
||||
@Ensures("result == 0 || result == 1 || result == -1")
|
||||
public static int compareSAMRecords(SAMRecord read1, SAMRecord read2) {
|
||||
public static int compareSAMRecords(GATKSAMRecord read1, GATKSAMRecord read2) {
|
||||
AlignmentStartComparator comp = new AlignmentStartComparator();
|
||||
return comp.compare(read1, read2);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,11 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
/**
|
||||
|
|
@ -38,7 +37,7 @@ public class AllLocusViewUnitTest extends LocusViewTemplate {
|
|||
* @param reads
|
||||
*/
|
||||
@Override
|
||||
protected void testReadsInContext( LocusView view, List<GenomeLoc> range, List<SAMRecord> reads ) {
|
||||
protected void testReadsInContext( LocusView view, List<GenomeLoc> range, List<GATKSAMRecord> reads ) {
|
||||
AllLocusView allLocusView = (AllLocusView)view;
|
||||
|
||||
// TODO: Should skip over loci not in the given range.
|
||||
|
|
@ -52,7 +51,7 @@ public class AllLocusViewUnitTest extends LocusViewTemplate {
|
|||
Assert.assertEquals(locusContext.getLocation(), site, "Locus context location is incorrect");
|
||||
int expectedReadsAtSite = 0;
|
||||
|
||||
for( SAMRecord read: reads ) {
|
||||
for( GATKSAMRecord read: reads ) {
|
||||
if(genomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) {
|
||||
Assert.assertTrue(locusContext.getReads().contains(read),"Target locus context does not contain reads");
|
||||
expectedReadsAtSite++;
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
/**
|
||||
|
|
@ -41,7 +41,7 @@ public class CoveredLocusViewUnitTest extends LocusViewTemplate {
|
|||
* @param reads
|
||||
*/
|
||||
@Override
|
||||
protected void testReadsInContext( LocusView view, List<GenomeLoc> range, List<SAMRecord> reads ) {
|
||||
protected void testReadsInContext( LocusView view, List<GenomeLoc> range, List<GATKSAMRecord> reads ) {
|
||||
CoveredLocusView coveredLocusView = (CoveredLocusView)view;
|
||||
|
||||
// TODO: Should skip over loci not in the given range.
|
||||
|
|
@ -53,7 +53,7 @@ public class CoveredLocusViewUnitTest extends LocusViewTemplate {
|
|||
GenomeLoc site = genomeLocParser.createGenomeLoc("chr1",i);
|
||||
|
||||
int expectedReadsAtSite = 0;
|
||||
for( SAMRecord read: reads ) {
|
||||
for( GATKSAMRecord read: reads ) {
|
||||
if( genomeLocParser.createGenomeLoc(read).containsP(site) )
|
||||
expectedReadsAtSite++;
|
||||
}
|
||||
|
|
@ -67,7 +67,7 @@ public class CoveredLocusViewUnitTest extends LocusViewTemplate {
|
|||
Assert.assertEquals(locusContext.getLocation(), site, "Target locus context location is incorrect");
|
||||
Assert.assertEquals(locusContext.getReads().size(), expectedReadsAtSite, "Found wrong number of reads at site");
|
||||
|
||||
for( SAMRecord read: reads ) {
|
||||
for( GATKSAMRecord read: reads ) {
|
||||
if(genomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation()))
|
||||
Assert.assertTrue(locusContext.getReads().contains(read),"Target locus context does not contain reads");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
|||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
|
@ -55,12 +56,12 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
testReadsInContext(view, shard.getGenomeLocs(), Collections.<SAMRecord>emptyList());
|
||||
testReadsInContext(view, shard.getGenomeLocs(), Collections.<GATKSAMRecord>emptyList());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void singleReadTest() {
|
||||
SAMRecord read = buildSAMRecord("chr1", 1, 5);
|
||||
GATKSAMRecord read = buildSAMRecord("read1","chr1", 1, 5);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5);
|
||||
|
|
@ -76,7 +77,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void readCoveringFirstPartTest() {
|
||||
SAMRecord read = buildSAMRecord("chr1", 1, 5);
|
||||
GATKSAMRecord read = buildSAMRecord("read1","chr1", 1, 5);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
|
|
@ -90,7 +91,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void readCoveringLastPartTest() {
|
||||
SAMRecord read = buildSAMRecord("chr1", 6, 10);
|
||||
GATKSAMRecord read = buildSAMRecord("read1","chr1", 6, 10);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
|
|
@ -104,7 +105,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void readCoveringMiddleTest() {
|
||||
SAMRecord read = buildSAMRecord("chr1", 3, 7);
|
||||
GATKSAMRecord read = buildSAMRecord("read1","chr1", 3, 7);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
|
|
@ -118,7 +119,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void readAndLocusOverlapAtLastBase() {
|
||||
SAMRecord read = buildSAMRecord("chr1", 1, 5);
|
||||
GATKSAMRecord read = buildSAMRecord("read1","chr1", 1, 5);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 5, 5)));
|
||||
|
|
@ -132,7 +133,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void readOverlappingStartTest() {
|
||||
SAMRecord read = buildSAMRecord("chr1", 1, 10);
|
||||
GATKSAMRecord read = buildSAMRecord("read1","chr1", 1, 10);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 6, 15)));
|
||||
|
|
@ -146,7 +147,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void readOverlappingEndTest() {
|
||||
SAMRecord read = buildSAMRecord("chr1", 6, 15);
|
||||
GATKSAMRecord read = buildSAMRecord("read1","chr1", 6, 15);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
|
|
@ -160,8 +161,8 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void readsSpanningTest() {
|
||||
SAMRecord read1 = buildSAMRecord("chr1", 1, 5);
|
||||
SAMRecord read2 = buildSAMRecord("chr1", 6, 10);
|
||||
GATKSAMRecord read1 = buildSAMRecord("read1","chr1", 1, 5);
|
||||
GATKSAMRecord read2 = buildSAMRecord("read2","chr1", 6, 10);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
|
|
@ -170,17 +171,17 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
List<GATKSAMRecord> expectedReads = new ArrayList<GATKSAMRecord>();
|
||||
Collections.addAll(expectedReads, read1, read2);
|
||||
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void duplicateReadsTest() {
|
||||
SAMRecord read1 = buildSAMRecord("chr1", 1, 5);
|
||||
SAMRecord read2 = buildSAMRecord("chr1", 1, 5);
|
||||
SAMRecord read3 = buildSAMRecord("chr1", 6, 10);
|
||||
SAMRecord read4 = buildSAMRecord("chr1", 6, 10);
|
||||
GATKSAMRecord read1 = buildSAMRecord("read1","chr1", 1, 5);
|
||||
GATKSAMRecord read2 = buildSAMRecord("read2","chr1", 1, 5);
|
||||
GATKSAMRecord read3 = buildSAMRecord("read3","chr1", 6, 10);
|
||||
GATKSAMRecord read4 = buildSAMRecord("read4","chr1", 6, 10);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
|
|
@ -189,17 +190,17 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
List<GATKSAMRecord> expectedReads = new ArrayList<GATKSAMRecord>();
|
||||
Collections.addAll(expectedReads, read1, read2, read3, read4);
|
||||
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void cascadingReadsWithinBoundsTest() {
|
||||
SAMRecord read1 = buildSAMRecord("chr1", 2, 6);
|
||||
SAMRecord read2 = buildSAMRecord("chr1", 3, 7);
|
||||
SAMRecord read3 = buildSAMRecord("chr1", 4, 8);
|
||||
SAMRecord read4 = buildSAMRecord("chr1", 5, 9);
|
||||
GATKSAMRecord read1 = buildSAMRecord("read1","chr1", 2, 6);
|
||||
GATKSAMRecord read2 = buildSAMRecord("read2","chr1", 3, 7);
|
||||
GATKSAMRecord read3 = buildSAMRecord("read3","chr1", 4, 8);
|
||||
GATKSAMRecord read4 = buildSAMRecord("read4","chr1", 5, 9);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
|
|
@ -208,19 +209,19 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
List<GATKSAMRecord> expectedReads = new ArrayList<GATKSAMRecord>();
|
||||
Collections.addAll(expectedReads, read1, read2, read3, read4);
|
||||
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void cascadingReadsAtBoundsTest() {
|
||||
SAMRecord read1 = buildSAMRecord("chr1", 1, 5);
|
||||
SAMRecord read2 = buildSAMRecord("chr1", 2, 6);
|
||||
SAMRecord read3 = buildSAMRecord("chr1", 3, 7);
|
||||
SAMRecord read4 = buildSAMRecord("chr1", 4, 8);
|
||||
SAMRecord read5 = buildSAMRecord("chr1", 5, 9);
|
||||
SAMRecord read6 = buildSAMRecord("chr1", 6, 10);
|
||||
GATKSAMRecord read1 = buildSAMRecord("read1","chr1", 1, 5);
|
||||
GATKSAMRecord read2 = buildSAMRecord("read2","chr1", 2, 6);
|
||||
GATKSAMRecord read3 = buildSAMRecord("read3","chr1", 3, 7);
|
||||
GATKSAMRecord read4 = buildSAMRecord("read4","chr1", 4, 8);
|
||||
GATKSAMRecord read5 = buildSAMRecord("read5","chr1", 5, 9);
|
||||
GATKSAMRecord read6 = buildSAMRecord("read6","chr1", 6, 10);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4, read5, read6);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
|
||||
|
|
@ -229,25 +230,25 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
List<GATKSAMRecord> expectedReads = new ArrayList<GATKSAMRecord>();
|
||||
Collections.addAll(expectedReads, read1, read2, read3, read4, read5, read6);
|
||||
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void cascadingReadsOverlappingBoundsTest() {
|
||||
SAMRecord read01 = buildSAMRecord("chr1", 1, 5);
|
||||
SAMRecord read02 = buildSAMRecord("chr1", 2, 6);
|
||||
SAMRecord read03 = buildSAMRecord("chr1", 3, 7);
|
||||
SAMRecord read04 = buildSAMRecord("chr1", 4, 8);
|
||||
SAMRecord read05 = buildSAMRecord("chr1", 5, 9);
|
||||
SAMRecord read06 = buildSAMRecord("chr1", 6, 10);
|
||||
SAMRecord read07 = buildSAMRecord("chr1", 7, 11);
|
||||
SAMRecord read08 = buildSAMRecord("chr1", 8, 12);
|
||||
SAMRecord read09 = buildSAMRecord("chr1", 9, 13);
|
||||
SAMRecord read10 = buildSAMRecord("chr1", 10, 14);
|
||||
SAMRecord read11 = buildSAMRecord("chr1", 11, 15);
|
||||
SAMRecord read12 = buildSAMRecord("chr1", 12, 16);
|
||||
GATKSAMRecord read01 = buildSAMRecord("read1","chr1", 1, 5);
|
||||
GATKSAMRecord read02 = buildSAMRecord("read2","chr1", 2, 6);
|
||||
GATKSAMRecord read03 = buildSAMRecord("read3","chr1", 3, 7);
|
||||
GATKSAMRecord read04 = buildSAMRecord("read4","chr1", 4, 8);
|
||||
GATKSAMRecord read05 = buildSAMRecord("read5","chr1", 5, 9);
|
||||
GATKSAMRecord read06 = buildSAMRecord("read6","chr1", 6, 10);
|
||||
GATKSAMRecord read07 = buildSAMRecord("read7","chr1", 7, 11);
|
||||
GATKSAMRecord read08 = buildSAMRecord("read8","chr1", 8, 12);
|
||||
GATKSAMRecord read09 = buildSAMRecord("read9","chr1", 9, 13);
|
||||
GATKSAMRecord read10 = buildSAMRecord("read10","chr1", 10, 14);
|
||||
GATKSAMRecord read11 = buildSAMRecord("read11","chr1", 11, 15);
|
||||
GATKSAMRecord read12 = buildSAMRecord("read12","chr1", 12, 16);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read01, read02, read03, read04, read05, read06,
|
||||
read07, read08, read09, read10, read11, read12);
|
||||
|
||||
|
|
@ -257,7 +258,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
|
||||
List<GATKSAMRecord> expectedReads = new ArrayList<GATKSAMRecord>();
|
||||
Collections.addAll(expectedReads, read01, read02, read03, read04, read05, read06,
|
||||
read07, read08, read09, read10, read11, read12);
|
||||
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
|
||||
|
|
@ -277,7 +278,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
* @param bounds
|
||||
* @param reads
|
||||
*/
|
||||
protected abstract void testReadsInContext(LocusView view, List<GenomeLoc> bounds, List<SAMRecord> reads);
|
||||
protected abstract void testReadsInContext(LocusView view, List<GenomeLoc> bounds, List<GATKSAMRecord> reads);
|
||||
|
||||
/**
|
||||
* Fake a reference sequence file. Essentially, seek a header with a bunch of dummy data.
|
||||
|
|
@ -321,12 +322,13 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
*
|
||||
* @return New SAM Record
|
||||
*/
|
||||
protected SAMRecord buildSAMRecord(String contig, int alignmentStart, int alignmentEnd) {
|
||||
protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) {
|
||||
SAMFileHeader header = new SAMFileHeader();
|
||||
header.setSequenceDictionary(sequenceSourceFile.getSequenceDictionary());
|
||||
|
||||
SAMRecord record = new SAMRecord(header);
|
||||
GATKSAMRecord record = new GATKSAMRecord(header);
|
||||
|
||||
record.setReadName(readName);
|
||||
record.setReferenceIndex(sequenceSourceFile.getSequenceDictionary().getSequenceIndex(contig));
|
||||
record.setAlignmentStart(alignmentStart);
|
||||
Cigar cigar = new Cigar();
|
||||
|
|
|
|||
|
|
@ -25,13 +25,10 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import com.google.caliper.Param;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
|
|
@ -41,9 +38,9 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
|
|||
import org.broadinstitute.sting.gatk.walkers.qc.CountLociWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.File;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.Collections;
|
||||
|
||||
/**
|
||||
|
|
@ -126,7 +123,7 @@ class CountBasesInReadPerformanceWalker extends ReadWalker<Integer,Long> {
|
|||
private long Gs;
|
||||
private long Ts;
|
||||
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
|
||||
for(byte base: read.getReadBases()) {
|
||||
switch(base) {
|
||||
case 'A': As++; break;
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import net.sf.samtools.util.CloseableIterator;
|
|||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
|
|
@ -38,8 +39,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private final LocusIteratorByState makeLTBS(List<SAMRecord> reads, ReadProperties readAttributes) {
|
||||
return new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),
|
||||
readAttributes, genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
return new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()), readAttributes, genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -212,12 +212,12 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
Assert.assertEquals(alignmentContext.getLocation().getStart(),currentLocus,"Current locus returned by alignment context is incorrect");
|
||||
|
||||
if(currentLocus == firstLocus) {
|
||||
List<SAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
|
||||
List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
|
||||
Assert.assertEquals(readsAtLocus.size(),1,"Wrong number of reads at locus " + currentLocus);
|
||||
Assert.assertSame(readsAtLocus.get(0),leadingRead,"leadingRead absent from pileup at locus " + currentLocus);
|
||||
}
|
||||
else if(currentLocus == secondLocus) {
|
||||
List<SAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
|
||||
List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
|
||||
Assert.assertEquals(readsAtLocus.size(),2,"Wrong number of reads at locus " + currentLocus);
|
||||
Assert.assertSame(readsAtLocus.get(0),indelOnlyRead,"indelOnlyRead absent from pileup at locus " + currentLocus);
|
||||
Assert.assertSame(readsAtLocus.get(1),fullMatchAfterIndel,"fullMatchAfterIndel absent from pileup at locus " + currentLocus);
|
||||
|
|
@ -265,7 +265,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
Assert.assertTrue(li.hasNext(),"Missing first locus at " + firstLocus);
|
||||
AlignmentContext alignmentContext = li.next();
|
||||
Assert.assertEquals(alignmentContext.getLocation().getStart(),firstLocus,"Incorrect locus at this position; should be " + firstLocus);
|
||||
List<SAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
|
||||
List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
|
||||
Assert.assertEquals(readsAtLocus.size(),1,"Wrong number of reads at locus " + firstLocus);
|
||||
Assert.assertSame(readsAtLocus.get(0),leadingRead,"leadingRead absent from pileup at locus " + firstLocus);
|
||||
|
||||
|
|
|
|||
|
|
@ -10,6 +10,8 @@ import net.sf.samtools.SAMFileHeader;
|
|||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.assertTrue;
|
||||
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
|
@ -94,8 +96,8 @@ public class PrintReadsWalkerUnitTest extends BaseTest {
|
|||
walker.out = writer;
|
||||
|
||||
SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000);
|
||||
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50);
|
||||
SAMRecord ret = walker.map(bases, rec,null);
|
||||
GATKSAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50);
|
||||
SAMRecord ret = walker.map(bases, rec, null);
|
||||
assertTrue(ret == rec);
|
||||
assertTrue(ret.getReadName().equals(rec.getReadName()));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ public class ReadUtilsUnitTest extends BaseTest {
|
|||
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_QUALITY_TAG, REDUCED_READ_COUNTS);
|
||||
}
|
||||
|
||||
private void testReadBasesAndQuals(SAMRecord read, int expectedStart, int expectedStop) {
|
||||
private void testReadBasesAndQuals(GATKSAMRecord read, int expectedStart, int expectedStop) {
|
||||
SAMRecord clipped = ReadUtils.hardClipBases(read, expectedStart, expectedStop - 1, null);
|
||||
String expectedBases = BASES.substring(expectedStart, expectedStop);
|
||||
String expectedQuals = QUALS.substring(expectedStart, expectedStop);
|
||||
|
|
|
|||
|
|
@ -26,9 +26,9 @@
|
|||
package org.broadinstitute.sting.utils.clipreads;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
|
@ -44,7 +44,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
|
||||
// TODO: Add error messages on failed tests
|
||||
|
||||
SAMRecord read, expected;
|
||||
GATKSAMRecord read, expected;
|
||||
ReadClipper readClipper;
|
||||
final static String BASES = "ACTG";
|
||||
final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
|
||||
|
|
@ -65,7 +65,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(0,0), new SAMRecord(read.getHeader()));
|
||||
Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(0,0), new GATKSAMRecord(read.getHeader()));
|
||||
//clip 1 base
|
||||
expected = readClipper.hardClipBothEndsByReferenceCoordinates(0,3);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,3).getBytes());
|
||||
|
|
@ -79,7 +79,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
logger.warn("Executing testHardClipByReadCoordinates");
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipByReadCoordinates(0,3), new SAMRecord(read.getHeader()));
|
||||
Assert.assertEquals(readClipper.hardClipByReadCoordinates(0,3), new GATKSAMRecord(read.getHeader()));
|
||||
|
||||
//clip 1 base at start
|
||||
expected = readClipper.hardClipByReadCoordinates(0,0);
|
||||
|
|
@ -112,7 +112,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
logger.warn("Executing testHardClipByReferenceCoordinates");
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(1,4), new SAMRecord(read.getHeader()));
|
||||
Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(1,4), new GATKSAMRecord(read.getHeader()));
|
||||
|
||||
//clip 1 base at start
|
||||
expected = readClipper.hardClipByReferenceCoordinates(-1,1);
|
||||
|
|
@ -145,7 +145,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail");
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(4), new SAMRecord(read.getHeader()));
|
||||
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(4), new GATKSAMRecord(read.getHeader()));
|
||||
|
||||
//clip 1 base at start
|
||||
expected = readClipper.hardClipByReferenceCoordinatesLeftTail(1);
|
||||
|
|
@ -166,7 +166,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
logger.warn("Executing testHardClipByReferenceCoordinatesRightTail");
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(1), new SAMRecord(read.getHeader()));
|
||||
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(1), new GATKSAMRecord(read.getHeader()));
|
||||
|
||||
//clip 1 base at end
|
||||
expected = readClipper.hardClipByReferenceCoordinatesRightTail(3);
|
||||
|
|
@ -188,7 +188,7 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipLowQualEnds((byte)64), new SAMRecord(read.getHeader()));
|
||||
Assert.assertEquals(readClipper.hardClipLowQualEnds((byte)64), new GATKSAMRecord(read.getHeader()));
|
||||
|
||||
//clip 1 base at start
|
||||
expected = readClipper.hardClipLowQualEnds((byte)34);
|
||||
|
|
|
|||
|
|
@ -25,12 +25,12 @@
|
|||
package org.broadinstitute.sting.utils.fragments;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeTest;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -52,16 +52,16 @@ public class FragmentUtilsUnitTest extends BaseTest {
|
|||
boolean leftIsFirst, boolean leftIsNegative) {
|
||||
super(FragmentUtilsTest.class, String.format("%s-leftIsFirst:%b-leftIsNegative:%b", name, leftIsFirst, leftIsNegative));
|
||||
|
||||
List<SAMRecord> pair = ArtificialSAMUtils.createPair(header, "readpair", readLen, leftStart, rightStart, leftIsFirst, leftIsNegative);
|
||||
SAMRecord left = pair.get(0);
|
||||
SAMRecord right = pair.get(1);
|
||||
List<GATKSAMRecord> pair = ArtificialSAMUtils.createPair(header, "readpair", readLen, leftStart, rightStart, leftIsFirst, leftIsNegative);
|
||||
GATKSAMRecord left = pair.get(0);
|
||||
GATKSAMRecord right = pair.get(1);
|
||||
|
||||
for ( int pos = leftStart; pos < rightStart + readLen; pos++) {
|
||||
boolean posCoveredByLeft = pos >= left.getAlignmentStart() && pos <= left.getAlignmentEnd();
|
||||
boolean posCoveredByRight = pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd();
|
||||
|
||||
if ( posCoveredByLeft || posCoveredByRight ) {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
List<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
|
||||
List<Integer> offsets = new ArrayList<Integer>();
|
||||
|
||||
if ( posCoveredByLeft ) {
|
||||
|
|
@ -89,9 +89,9 @@ public class FragmentUtilsUnitTest extends BaseTest {
|
|||
private class TestState {
|
||||
int expectedSingletons, expectedPairs;
|
||||
ReadBackedPileup pileup;
|
||||
List<SAMRecord> rawReads;
|
||||
List<GATKSAMRecord> rawReads;
|
||||
|
||||
private TestState(final int expectedSingletons, final int expectedPairs, final ReadBackedPileup pileup, final List<SAMRecord> rawReads) {
|
||||
private TestState(final int expectedSingletons, final int expectedPairs, final ReadBackedPileup pileup, final List<GATKSAMRecord> rawReads) {
|
||||
this.expectedSingletons = expectedSingletons;
|
||||
this.expectedPairs = expectedPairs;
|
||||
this.pileup = pileup;
|
||||
|
|
@ -131,7 +131,7 @@ public class FragmentUtilsUnitTest extends BaseTest {
|
|||
@Test(enabled = true, dataProvider = "fragmentUtilsTest")
|
||||
public void testAsListOfReadsFromPileup(FragmentUtilsTest test) {
|
||||
for ( TestState testState : test.statesForPileup ) {
|
||||
FragmentCollection<SAMRecord> fp = FragmentUtils.create(testState.pileup.getReads());
|
||||
FragmentCollection<GATKSAMRecord> fp = FragmentUtils.create(testState.pileup.getReads());
|
||||
Assert.assertEquals(fp.getOverlappingPairs().size(), testState.expectedPairs);
|
||||
Assert.assertEquals(fp.getSingletonReads().size(), testState.expectedSingletons);
|
||||
}
|
||||
|
|
@ -140,7 +140,7 @@ public class FragmentUtilsUnitTest extends BaseTest {
|
|||
@Test(enabled = true, dataProvider = "fragmentUtilsTest")
|
||||
public void testAsListOfReads(FragmentUtilsTest test) {
|
||||
for ( TestState testState : test.statesForReads ) {
|
||||
FragmentCollection<SAMRecord> fp = FragmentUtils.create(testState.rawReads);
|
||||
FragmentCollection<GATKSAMRecord> fp = FragmentUtils.create(testState.rawReads);
|
||||
Assert.assertEquals(fp.getOverlappingPairs().size(), testState.expectedPairs);
|
||||
Assert.assertEquals(fp.getSingletonReads().size(), testState.expectedSingletons);
|
||||
}
|
||||
|
|
@ -148,10 +148,10 @@ public class FragmentUtilsUnitTest extends BaseTest {
|
|||
|
||||
@Test(enabled = true, expectedExceptions = IllegalArgumentException.class)
|
||||
public void testOutOfOrder() {
|
||||
final List<SAMRecord> pair = ArtificialSAMUtils.createPair(header, "readpair", 100, 1, 50, true, true);
|
||||
final SAMRecord left = pair.get(0);
|
||||
final SAMRecord right = pair.get(1);
|
||||
final List<SAMRecord> reads = Arrays.asList(right, left); // OUT OF ORDER!
|
||||
final List<GATKSAMRecord> pair = ArtificialSAMUtils.createPair(header, "readpair", 100, 1, 50, true, true);
|
||||
final GATKSAMRecord left = pair.get(0);
|
||||
final GATKSAMRecord right = pair.get(1);
|
||||
final List<GATKSAMRecord> reads = Arrays.asList(right, left); // OUT OF ORDER!
|
||||
final List<Integer> offsets = Arrays.asList(0, 50);
|
||||
final ReadBackedPileup pileup = new ReadBackedPileupImpl(null, reads, offsets);
|
||||
FragmentUtils.create(pileup); // should throw exception
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ package org.broadinstitute.sting.utils.pileup;
|
|||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
|
||||
|
|
@ -50,27 +50,25 @@ public class ReadBackedPileupUnitTest {
|
|||
header.addReadGroup(readGroupOne);
|
||||
header.addReadGroup(readGroupTwo);
|
||||
|
||||
SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10);
|
||||
GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10);
|
||||
read1.setAttribute("RG",readGroupOne.getId());
|
||||
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
|
||||
GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
|
||||
read2.setAttribute("RG",readGroupTwo.getId());
|
||||
SAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,10);
|
||||
GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,10);
|
||||
read3.setAttribute("RG",readGroupOne.getId());
|
||||
SAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,10);
|
||||
GATKSAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,10);
|
||||
read4.setAttribute("RG",readGroupTwo.getId());
|
||||
SAMRecord read5 = ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,10);
|
||||
GATKSAMRecord read5 = ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,10);
|
||||
read5.setAttribute("RG",readGroupTwo.getId());
|
||||
SAMRecord read6 = ArtificialSAMUtils.createArtificialRead(header,"read6",0,1,10);
|
||||
GATKSAMRecord read6 = ArtificialSAMUtils.createArtificialRead(header,"read6",0,1,10);
|
||||
read6.setAttribute("RG",readGroupOne.getId());
|
||||
SAMRecord read7 = ArtificialSAMUtils.createArtificialRead(header,"read7",0,1,10);
|
||||
GATKSAMRecord read7 = ArtificialSAMUtils.createArtificialRead(header,"read7",0,1,10);
|
||||
read7.setAttribute("RG",readGroupOne.getId());
|
||||
|
||||
ReadBackedPileup pileup = new ReadBackedPileupImpl(null,
|
||||
Arrays.asList(read1,read2,read3,read4,read5,read6,read7),
|
||||
Arrays.asList(1,1,1,1,1,1,1));
|
||||
ReadBackedPileup pileup = new ReadBackedPileupImpl(null, Arrays.asList(read1,read2,read3,read4,read5,read6,read7), Arrays.asList(1,1,1,1,1,1,1));
|
||||
|
||||
ReadBackedPileup rg1Pileup = pileup.getPileupForReadGroup("rg1");
|
||||
List<SAMRecord> rg1Reads = rg1Pileup.getReads();
|
||||
List<GATKSAMRecord> rg1Reads = rg1Pileup.getReads();
|
||||
Assert.assertEquals(rg1Reads.size(), 4, "Wrong number of reads in read group rg1");
|
||||
Assert.assertEquals(rg1Reads.get(0), read1, "Read " + read1.getReadName() + " should be in rg1 but isn't");
|
||||
Assert.assertEquals(rg1Reads.get(1), read3, "Read " + read3.getReadName() + " should be in rg1 but isn't");
|
||||
|
|
@ -78,7 +76,7 @@ public class ReadBackedPileupUnitTest {
|
|||
Assert.assertEquals(rg1Reads.get(3), read7, "Read " + read7.getReadName() + " should be in rg1 but isn't");
|
||||
|
||||
ReadBackedPileup rg2Pileup = pileup.getPileupForReadGroup("rg2");
|
||||
List<SAMRecord> rg2Reads = rg2Pileup.getReads();
|
||||
List<GATKSAMRecord> rg2Reads = rg2Pileup.getReads();
|
||||
Assert.assertEquals(rg2Reads.size(), 3, "Wrong number of reads in read group rg2");
|
||||
Assert.assertEquals(rg2Reads.get(0), read2, "Read " + read2.getReadName() + " should be in rg2 but isn't");
|
||||
Assert.assertEquals(rg2Reads.get(1), read4, "Read " + read4.getReadName() + " should be in rg2 but isn't");
|
||||
|
|
@ -92,16 +90,16 @@ public class ReadBackedPileupUnitTest {
|
|||
public void testSplitByNullReadGroups() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1,1,1000);
|
||||
|
||||
SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10);
|
||||
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
|
||||
SAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,10);
|
||||
GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10);
|
||||
GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
|
||||
GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,10);
|
||||
|
||||
ReadBackedPileup pileup = new ReadBackedPileupImpl(null,
|
||||
Arrays.asList(read1,read2,read3),
|
||||
Arrays.asList(1,1,1));
|
||||
|
||||
ReadBackedPileup nullRgPileup = pileup.getPileupForReadGroup(null);
|
||||
List<SAMRecord> nullRgReads = nullRgPileup.getReads();
|
||||
List<GATKSAMRecord> nullRgReads = nullRgPileup.getReads();
|
||||
Assert.assertEquals(nullRgPileup.getNumberOfElements(), 3, "Wrong number of reads in null read group");
|
||||
Assert.assertEquals(nullRgReads.get(0), read1, "Read " + read1.getReadName() + " should be in null rg but isn't");
|
||||
Assert.assertEquals(nullRgReads.get(1), read2, "Read " + read2.getReadName() + " should be in null rg but isn't");
|
||||
|
|
@ -125,13 +123,13 @@ public class ReadBackedPileupUnitTest {
|
|||
header.addReadGroup(readGroupOne);
|
||||
header.addReadGroup(readGroupTwo);
|
||||
|
||||
SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10);
|
||||
GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10);
|
||||
read1.setAttribute("RG",readGroupOne.getId());
|
||||
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
|
||||
GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
|
||||
read2.setAttribute("RG",readGroupTwo.getId());
|
||||
SAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,10);
|
||||
GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,10);
|
||||
read3.setAttribute("RG",readGroupOne.getId());
|
||||
SAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,10);
|
||||
GATKSAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,10);
|
||||
read4.setAttribute("RG",readGroupTwo.getId());
|
||||
|
||||
ReadBackedPileupImpl sample1Pileup = new ReadBackedPileupImpl(null,
|
||||
|
|
@ -147,14 +145,14 @@ public class ReadBackedPileupUnitTest {
|
|||
ReadBackedPileup compositePileup = new ReadBackedPileupImpl(null,sampleToPileupMap);
|
||||
|
||||
ReadBackedPileup rg1Pileup = compositePileup.getPileupForReadGroup("rg1");
|
||||
List<SAMRecord> rg1Reads = rg1Pileup.getReads();
|
||||
List<GATKSAMRecord> rg1Reads = rg1Pileup.getReads();
|
||||
|
||||
Assert.assertEquals(rg1Reads.size(), 2, "Wrong number of reads in read group rg1");
|
||||
Assert.assertEquals(rg1Reads.get(0), read1, "Read " + read1.getReadName() + " should be in rg1 but isn't");
|
||||
Assert.assertEquals(rg1Reads.get(1), read3, "Read " + read3.getReadName() + " should be in rg1 but isn't");
|
||||
|
||||
ReadBackedPileup rg2Pileup = compositePileup.getPileupForReadGroup("rg2");
|
||||
List<SAMRecord> rg2Reads = rg2Pileup.getReads();
|
||||
List<GATKSAMRecord> rg2Reads = rg2Pileup.getReads();
|
||||
|
||||
Assert.assertEquals(rg1Reads.size(), 2, "Wrong number of reads in read group rg2");
|
||||
Assert.assertEquals(rg2Reads.get(0), read2, "Read " + read2.getReadName() + " should be in rg2 but isn't");
|
||||
|
|
@ -175,9 +173,9 @@ public class ReadBackedPileupUnitTest {
|
|||
header.addReadGroup(readGroupOne);
|
||||
header.addReadGroup(readGroupTwo);
|
||||
|
||||
SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10);
|
||||
GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10);
|
||||
read1.setAttribute("RG",readGroupOne.getId());
|
||||
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
|
||||
GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
|
||||
read2.setAttribute("RG",readGroupTwo.getId());
|
||||
|
||||
Map<String,ReadBackedPileupImpl> sampleToPileupMap = new HashMap<String,ReadBackedPileupImpl>();
|
||||
|
|
|
|||
Loading…
Reference in New Issue