GATKSAMRecord refactor

The GATK engine will now provide a GATKSAMRecord to all tools which incorporates the functionality used by the GATK to the bam file (ReadGroups, Reduced Reads, ...).

* No tools should create SAMRecord anymore, use GATKSAMRecord instead *
This commit is contained in:
Mauricio Carneiro 2011-11-03 15:12:55 -04:00
parent f1df6c0c81
commit e89ff063fc
51 changed files with 1092 additions and 1075 deletions

View File

@ -25,7 +25,6 @@
package org.broadinstitute.sting.alignment;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
@ -35,6 +34,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Iterator;
@ -72,12 +72,13 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
/**
* Aligns a read to the given reference.
*
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
* @param read Read to align.
* @return Number of reads aligned by this map (aka 1).
*/
@Override
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
//logger.info(String.format("examining read %s", read.getReadName()));
byte[] bases = read.getReadBases();

View File

@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.File;
@ -92,12 +93,13 @@ public class AlignmentWalker extends ReadWalker<Integer,Integer> {
/**
* Aligns a read to the given reference.
*
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
* @param read Read to align.
* @return Number of alignments found for this read.
*/
@Override
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
SAMRecord alignedRead = aligner.align(read,header);
out.addAlignment(alignedRead);
return 1;

View File

@ -25,7 +25,6 @@
package org.broadinstitute.sting.alignment;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
@ -34,6 +33,7 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.PrintStream;
import java.util.Iterator;
@ -79,12 +79,13 @@ public class CountBestAlignmentsWalker extends ReadWalker<Integer,Integer> {
/**
* Aligns a read to the given reference.
*
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
* @param read Read to align.
* @return Number of alignments found for this read.
*/
@Override
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
Iterator<Alignment[]> alignmentIterator = aligner.getAllAlignments(read.getReadBases()).iterator();
if(alignmentIterator.hasNext()) {
int numAlignments = alignmentIterator.next().length;

View File

@ -25,12 +25,12 @@
package org.broadinstitute.sting.gatk.contexts;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.HasGenomeLocation;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.List;
@ -130,7 +130,7 @@ public class AlignmentContext implements HasGenomeLocation {
*/
@Deprecated
//todo: unsafe and tailored for current usage only; both pileups can be null or worse, bot can be not null in theory
public List<SAMRecord> getReads() { return ( basePileup.getReads() ); }
public List<GATKSAMRecord> getReads() { return ( basePileup.getReads() ); }
/**
* Are there any reads associated with this locus?

View File

@ -1,10 +1,10 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Collections;
import java.util.List;
@ -132,7 +132,7 @@ public class AllLocusView extends LocusView {
* @param site Site at which to create the blank locus context.
* @return empty context.
*/
private final static List<SAMRecord> EMPTY_PILEUP_READS = Collections.emptyList();
private final static List<GATKSAMRecord> EMPTY_PILEUP_READS = Collections.emptyList();
private final static List<Integer> EMPTY_PILEUP_OFFSETS = Collections.emptyList();
private AlignmentContext createEmptyLocus( GenomeLoc site ) {
return new AlignmentContext(site,new ReadBackedPileupImpl(site, EMPTY_PILEUP_READS, EMPTY_PILEUP_OFFSETS));

View File

@ -45,6 +45,7 @@ import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileupImpl;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.util.*;
@ -377,10 +378,7 @@ public class LocusIteratorByState extends LocusIterator {
maxDeletionLength = Math.max(maxDeletionLength,state.getEventLength());
}
else nInsertions++;
indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
state.getReadEventStartOffset(),
state.getEventLength(),
state.getEventBases()) );
indelPile.add ( new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), state.getReadEventStartOffset(), state.getEventLength(), state.getEventBases()) );
} else {
// HACK: The readahead mechanism for LocusIteratorByState will effectively read past the current position
@ -402,9 +400,7 @@ public class LocusIteratorByState extends LocusIterator {
// we count such reads (with a longer deletion spanning over a deletion at the previous base we are
// about to report) only if includeReadsWithDeletionAtLoci is true.
size++;
indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
state.getReadOffset()-1,
-1) // length=-1 --> noevent
indelPile.add ( new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), state.getReadOffset()-1, -1) // length=-1 --> noevent
);
}
}
@ -442,12 +438,12 @@ public class LocusIteratorByState extends LocusIterator {
continue;
} else {
//observed_bases++;
pile.add(new PileupElement(state.getRead(), state.getReadOffset()));
pile.add(new PileupElement((GATKSAMRecord) state.getRead(), state.getReadOffset()));
size++;
}
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
size++;
pile.add(new PileupElement(state.getRead(), -1));
pile.add(new PileupElement((GATKSAMRecord) state.getRead(), -1));
nDeletions++;
}

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.*;
@ -57,9 +58,9 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
return "dups";
}
private List<SAMRecord> readsAtLoc(final SAMRecord read, PushbackIterator<SAMRecord> iter) {
private List<GATKSAMRecord> readsAtLoc(final GATKSAMRecord read, PushbackIterator<SAMRecord> iter) {
GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
ArrayList<SAMRecord> l = new ArrayList<SAMRecord>();
ArrayList<GATKSAMRecord> l = new ArrayList<GATKSAMRecord>();
l.add(read);
for (SAMRecord read2 : iter) {
@ -70,7 +71,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
iter.pushback(read2);
break;
} else {
l.add(read2);
l.add((GATKSAMRecord) read2);
}
}
@ -84,15 +85,15 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
* @param reads the list of reads to split into unique molecular samples
* @return
*/
protected Set<List<SAMRecord>> uniqueReadSets(List<SAMRecord> reads) {
Set<List<SAMRecord>> readSets = new LinkedHashSet<List<SAMRecord>>();
protected Set<List<GATKSAMRecord>> uniqueReadSets(List<GATKSAMRecord> reads) {
Set<List<GATKSAMRecord>> readSets = new LinkedHashSet<List<GATKSAMRecord>>();
// for each read, find duplicates, and either add the read to its duplicate list or start a new one
for ( SAMRecord read : reads ) {
List<SAMRecord> readSet = findDuplicateReads(read, readSets);
for ( GATKSAMRecord read : reads ) {
List<GATKSAMRecord> readSet = findDuplicateReads(read, readSets);
if ( readSet == null ) {
readSets.add(new ArrayList<SAMRecord>(Arrays.asList(read))); // copy so I can add to the list
readSets.add(new ArrayList<GATKSAMRecord>(Arrays.asList(read))); // copy so I can add to the list
} else {
readSet.add(read);
}
@ -110,13 +111,13 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
* @param readSets
* @return The list of duplicate reads that read is a member of, or null if it's the only one of its kind
*/
protected List<SAMRecord> findDuplicateReads(SAMRecord read, Set<List<SAMRecord>> readSets ) {
protected List<GATKSAMRecord> findDuplicateReads(GATKSAMRecord read, Set<List<GATKSAMRecord>> readSets ) {
if ( read.getReadPairedFlag() ) {
// paired
final GenomeLoc readMateLoc = engine.getGenomeLocParser().createGenomeLoc(read.getMateReferenceName(), read.getMateAlignmentStart(), read.getMateAlignmentStart());
for (List<SAMRecord> reads : readSets) {
SAMRecord key = reads.get(0);
for (List<GATKSAMRecord> reads : readSets) {
GATKSAMRecord key = reads.get(0);
// read and key start at the same place, and either the this read and the key
// share a mate location or the read is flagged as a duplicate
@ -131,8 +132,8 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
}
}
} else {
for (List<SAMRecord> reads : readSets) {
SAMRecord key = reads.get(0);
for (List<GATKSAMRecord> reads : readSets) {
GATKSAMRecord key = reads.get(0);
boolean v = (! key.getReadPairedFlag()) && read.getAlignmentStart() == key.getAlignmentStart() && ( key.getDuplicateReadFlag() || read.getDuplicateReadFlag() ) && read.getReadLength() == key.getReadLength();
//System.out.printf("%s %s %b %b %d %d %d %d => %b%n",
// read.getReadPairedFlag(), key.getReadPairedFlag(), read.getDuplicateReadFlag(), key.getDuplicateReadFlag(),
@ -179,7 +180,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
// get the genome loc from the read
GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
Set<List<SAMRecord>> readSets = uniqueReadSets(readsAtLoc(read, iter));
Set<List<GATKSAMRecord>> readSets = uniqueReadSets(readsAtLoc((GATKSAMRecord) read, iter));
if ( DEBUG ) logger.debug(String.format("*** TraverseDuplicates.traverse at %s with %d read sets", site, readSets.size()));
// Jump forward in the reference to this locus location

View File

@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/*
* Copyright (c) 2009 The Broad Institute
@ -100,9 +101,9 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
// if the read is mapped, create a metadata tracker
ReadMetaDataTracker tracker = (read.getReferenceIndex() >= 0) ? rodView.getReferenceOrderedDataForRead(read) : null;
final boolean keepMeP = walker.filter(refContext, read);
final boolean keepMeP = walker.filter(refContext, (GATKSAMRecord) read);
if (keepMeP) {
M x = walker.map(refContext, read, tracker); // the tracker can be null
M x = walker.map(refContext, (GATKSAMRecord) read, tracker); // the tracker can be null
sum = walker.reduce(x, sum);
}

View File

@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers;
import net.sf.picard.reference.ReferenceSequence;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.picard.reference.ReferenceSequenceFileFactory;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.StringUtil;
import org.broadinstitute.sting.commandline.Advanced;
import org.broadinstitute.sting.commandline.Argument;
@ -43,6 +42,7 @@ import org.broadinstitute.sting.utils.clipreads.ClippingOp;
import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation;
import org.broadinstitute.sting.utils.clipreads.ReadClipper;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.io.File;
@ -292,11 +292,12 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
/**
* The reads map function.
*
*
* @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord
* @param read the read itself, as a GATKSAMRecord
* @return the ReadClipper object describing what should be done to clip this read
*/
public ReadClipperWithData map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
public ReadClipperWithData map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES ) {
read = ReadUtils.replaceSoftClipsWithMatches(read);
@ -323,7 +324,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
*/
private void clipSequences(ReadClipperWithData clipper) {
if (sequencesToClip != null) { // don't bother if we don't have any sequences to clip
SAMRecord read = clipper.getRead();
GATKSAMRecord read = clipper.getRead();
ClippingData data = clipper.getData();
for (SeqToClip stc : sequencesToClip) {
@ -360,7 +361,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
* @param stop
* @return
*/
private Pair<Integer, Integer> strandAwarePositions(SAMRecord read, int start, int stop) {
private Pair<Integer, Integer> strandAwarePositions(GATKSAMRecord read, int start, int stop) {
if (read.getReadNegativeStrandFlag())
return new Pair<Integer, Integer>(read.getReadLength() - stop - 1, read.getReadLength() - start - 1);
else
@ -374,7 +375,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
*/
private void clipCycles(ReadClipperWithData clipper) {
if (cyclesToClip != null) {
SAMRecord read = clipper.getRead();
GATKSAMRecord read = clipper.getRead();
ClippingData data = clipper.getData();
for (Pair<Integer, Integer> p : cyclesToClip) { // iterate over each cycle range
@ -416,7 +417,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
* @param clipper
*/
private void clipBadQualityScores(ReadClipperWithData clipper) {
SAMRecord read = clipper.getRead();
GATKSAMRecord read = clipper.getRead();
ClippingData data = clipper.getData();
int readLen = read.getReadBases().length;
byte[] quals = read.getBaseQualities();
@ -458,7 +459,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
if ( clipper == null )
return data;
SAMRecord clippedRead = clipper.clipRead(clippingRepresentation);
GATKSAMRecord clippedRead = clipper.clipRead(clippingRepresentation);
if (outputBam != null) {
outputBam.addAlignment(clippedRead);
} else {
@ -575,7 +576,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
public class ReadClipperWithData extends ReadClipper {
private ClippingData data;
public ReadClipperWithData(SAMRecord read, List<SeqToClip> clipSeqs) {
public ReadClipperWithData(GATKSAMRecord read, List<SeqToClip> clipSeqs) {
super(read);
data = new ClippingData(clipSeqs);
}

View File

@ -1,10 +1,10 @@
package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.List;
import java.util.Set;
@ -20,11 +20,11 @@ import java.util.Set;
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class})
public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context?
public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets ) {
public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<GATKSAMRecord>> readSets ) {
return true; // We are keeping all the reads
}
public abstract MapType map(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets );
public abstract MapType map(GenomeLoc loc, AlignmentContext context, Set<List<GATKSAMRecord>> readSets );
// Given result of map function
public abstract ReduceType reduceInit();

View File

@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
@ -71,21 +72,23 @@ public class FindReadsWithNamesWalker extends ReadWalker<SAMRecord, SAMFileWrite
/**
* The reads filter function.
*
* @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord
* @return true if the read passes the filter, false if it doesn't
*/
public boolean filter(ReferenceContext ref, SAMRecord read) {
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
return namesToKeep.contains(read.getReadName());
}
/**
* The reads map function.
*
* @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord
* @return the read itself
*/
public SAMRecord map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
return read;
}

View File

@ -1,9 +1,9 @@
package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.PrintStream;
import java.text.DecimalFormat;
@ -119,7 +119,7 @@ public class FlagStatWalker extends ReadWalker<Integer, Integer> {
private FlagStat myStat = new FlagStat();
public Integer map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
public Integer map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
myStat.readCount++;
if (read.getReadFailsVendorQualityCheckFlag()) {
myStat.QC_failure++;

View File

@ -40,6 +40,7 @@ import java.util.TreeSet;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
@ -136,11 +137,12 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
/**
* The reads filter function.
*
* @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord
* @return true if the read passes the filter, false if it doesn't
*/
public boolean filter(ReferenceContext ref, SAMRecord read) {
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
// check the read group
if ( readGroup != null ) {
SAMReadGroupRecord myReadGroup = read.getReadGroup();
@ -180,11 +182,12 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
/**
* The reads map function.
*
* @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord
* @return the read itself
*/
public SAMRecord map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
return read;
}

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
* Created by IntelliJ IDEA.
@ -20,11 +21,11 @@ public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, Re
/** Must return true for reads that need to be processed. Reads, for which this method return false will
* be skipped by the engine and never passed to the walker.
*/
public boolean filter(ReferenceContext ref, SAMRecord read) {
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
// We are keeping all the reads
return true;
}
// Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
public abstract MapType map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker);
public abstract MapType map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker);
}

View File

@ -33,6 +33,7 @@ import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.util.ArrayList;
@ -60,7 +61,7 @@ public class SplitSamFileWalker extends ReadWalker<SAMRecord, Map<String, SAMFil
logger.info("SplitSamFile version: " + VERSION);
}
public SAMRecord map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
public SAMRecord map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
return read;
}

View File

@ -1,13 +1,13 @@
package org.broadinstitute.sting.gatk.walkers.diagnostics;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.PrintStream;
import java.util.List;
@ -69,12 +69,12 @@ public class ReadLengthDistribution extends ReadWalker<Integer, Integer> {
}
public boolean filter(ReferenceContext ref, SAMRecord read) {
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
return ( !read.getReadPairedFlag() || read.getReadPairedFlag() && read.getFirstOfPairFlag());
}
@Override
public Integer map(ReferenceContext referenceContext, SAMRecord samRecord, ReadMetaDataTracker readMetaDataTracker) {
public Integer map(ReferenceContext referenceContext, GATKSAMRecord samRecord, ReadMetaDataTracker readMetaDataTracker) {
GATKReportTable table = report.getTable("ReadLengthDistribution");
int length = Math.abs(samRecord.getReadLength());

View File

@ -9,6 +9,7 @@ import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.*;
@ -180,8 +181,8 @@ public class ConstrainedMateFixingManager {
addRead(newRead, readWasModified, true);
}
public void addReads(List<SAMRecord> newReads, Set<SAMRecord> modifiedReads) {
for ( SAMRecord newRead : newReads )
public void addReads(List<GATKSAMRecord> newReads, Set<GATKSAMRecord> modifiedReads) {
for ( GATKSAMRecord newRead : newReads )
addRead(newRead, modifiedReads.contains(newRead), false);
}

View File

@ -47,6 +47,7 @@ import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.NWaySAMFileWriter;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
@ -281,10 +282,10 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// the reads and known indels that fall into the current interval
private final ReadBin readsToClean = new ReadBin();
private final ArrayList<SAMRecord> readsNotToClean = new ArrayList<SAMRecord>();
private final ArrayList<GATKSAMRecord> readsNotToClean = new ArrayList<GATKSAMRecord>();
private final ArrayList<VariantContext> knownIndelsToTry = new ArrayList<VariantContext>();
private final HashSet<Object> indelRodsSeen = new HashSet<Object>();
private final HashSet<SAMRecord> readsActuallyCleaned = new HashSet<SAMRecord>();
private final HashSet<GATKSAMRecord> readsActuallyCleaned = new HashSet<GATKSAMRecord>();
private static final int MAX_QUAL = 99;
@ -469,7 +470,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
readsActuallyCleaned.clear();
}
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( currentInterval == null ) {
emit(read);
return 0;
@ -535,7 +536,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// TODO -- it would be nice if we could use indels from 454 reads as alternate consenses
}
private void cleanAndCallMap(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker, GenomeLoc readLoc) {
private void cleanAndCallMap(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker, GenomeLoc readLoc) {
if ( readsToClean.size() > 0 ) {
GenomeLoc earliestPossibleMove = getToolkit().getGenomeLocParser().createGenomeLoc(readsToClean.getReads().get(0));
if ( manager.canMoveReads(earliestPossibleMove) )
@ -656,14 +657,14 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
private void clean(ReadBin readsToClean) {
final List<SAMRecord> reads = readsToClean.getReads();
final List<GATKSAMRecord> reads = readsToClean.getReads();
if ( reads.size() == 0 )
return;
byte[] reference = readsToClean.getReference(referenceReader);
int leftmostIndex = readsToClean.getLocation().getStart();
final ArrayList<SAMRecord> refReads = new ArrayList<SAMRecord>(); // reads that perfectly match ref
final ArrayList<GATKSAMRecord> refReads = new ArrayList<GATKSAMRecord>(); // reads that perfectly match ref
final ArrayList<AlignedRead> altReads = new ArrayList<AlignedRead>(); // reads that don't perfectly match
final LinkedList<AlignedRead> altAlignmentsToTest = new LinkedList<AlignedRead>(); // should we try to make an alt consensus from the read?
final Set<Consensus> altConsenses = new LinkedHashSet<Consensus>(); // list of alt consenses
@ -815,7 +816,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// however we don't have enough info to use the proper MAQ scoring system.
// For now, we will just arbitrarily add 10 to the mapping quality. [EB, 6/7/2010].
// TODO -- we need a better solution here
SAMRecord read = aRead.getRead();
GATKSAMRecord read = aRead.getRead();
read.setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + 10, 254));
// before we fix the attribute tags we first need to make sure we have enough of the reference sequence
@ -874,8 +875,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
}
}
private long determineReadsThatNeedCleaning(final List<SAMRecord> reads,
final ArrayList<SAMRecord> refReadsToPopulate,
private long determineReadsThatNeedCleaning(final List<GATKSAMRecord> reads,
final ArrayList<GATKSAMRecord> refReadsToPopulate,
final ArrayList<AlignedRead> altReadsToPopulate,
final LinkedList<AlignedRead> altAlignmentsToTest,
final Set<Consensus> altConsenses,
@ -884,7 +885,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
long totalRawMismatchSum = 0L;
for ( final SAMRecord read : reads ) {
for ( final GATKSAMRecord read : reads ) {
// we can not deal with screwy records
if ( read.getCigar().numCigarElements() == 0 ) {
@ -1372,7 +1373,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
}
private class AlignedRead {
private final SAMRecord read;
private final GATKSAMRecord read;
private byte[] readBases = null;
private byte[] baseQuals = null;
private Cigar newCigar = null;
@ -1380,12 +1381,12 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
private int mismatchScoreToReference = 0;
private long alignerMismatchScore = 0;
public AlignedRead(SAMRecord read) {
public AlignedRead(GATKSAMRecord read) {
this.read = read;
mismatchScoreToReference = 0;
}
public SAMRecord getRead() {
public GATKSAMRecord getRead() {
return read;
}
@ -1569,7 +1570,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
private class ReadBin implements HasGenomeLocation {
private final ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
private final ArrayList<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
private byte[] reference = null;
private GenomeLoc loc = null;
@ -1577,7 +1578,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// Return false if we can't process this read bin because the reads are not correctly overlapping.
// This can happen if e.g. there's a large known indel with no overlapping reads.
public void add(SAMRecord read) {
public void add(GATKSAMRecord read) {
GenomeLoc locForRead = getToolkit().getGenomeLocParser().createGenomeLoc(read);
if ( loc == null )
@ -1588,7 +1589,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
reads.add(read);
}
public List<SAMRecord> getReads() { return reads; }
public List<GATKSAMRecord> getReads() { return reads; }
public byte[] getReference(IndexedFastaSequenceFile referenceReader) {
// set up the reference if we haven't done so yet

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
@ -88,7 +89,7 @@ public class LeftAlignIndels extends ReadWalker<Integer, Integer> {
writer.addAlignment(read);
}
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
// we can not deal with screwy records
if ( read.getCigar().numCigarElements() == 0 ) {
emit(read);

View File

@ -57,6 +57,7 @@ import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.interval.OverlappingIntervalIterator;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -394,7 +395,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
@Override
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
// if ( read.getReadName().equals("428EFAAXX090610:2:36:1384:639#0") ) System.out.println("GOT READ");

View File

@ -24,7 +24,6 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.samples.Gender;
@ -32,6 +31,7 @@ import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
* Walks over the input data set, calculating the number of reads seen for diagnostic purposes.
@ -40,7 +40,7 @@ import org.broadinstitute.sting.gatk.walkers.Requires;
*/
@Requires({DataSource.READS, DataSource.REFERENCE})
public class CountMalesWalker extends ReadWalker<Integer, Integer> {
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
Sample sample = getSampleDB().getSample(read);
return sample.getGender() == Gender.MALE ? 1 : 0;
}

View File

@ -1,11 +1,11 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
* Walks over the input data set, calculating the number of reads seen for diagnostic purposes.
@ -38,7 +38,7 @@ import org.broadinstitute.sting.gatk.walkers.Requires;
*/
@Requires({DataSource.READS, DataSource.REFERENCE})
public class CountReadsWalker extends ReadWalker<Integer, Integer> {
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
return 1;
}

View File

@ -1,434 +1,434 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.collections.PrimitivePair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import java.io.*;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: asivache
* Date: Apr 9, 2010
* Time: 12:16:41 PM
* To change this template use File | Settings | File Templates.
*/
/**
* Walks over the input data set, calculating the number of reads seen for diagnostic purposes.
* Can also count the number of reads matching a given criterion using read filters (see the
* --read-filter command line argument). Simplest example of a read-backed analysis.
*/
@Requires({DataSource.READS})
public class CycleQualityWalker extends ReadWalker<Integer,Integer> {
@Output
protected PrintStream out;
@Argument(fullName="mappedOnly", shortName="mo", doc="when this flag is set (default), statistics will be collected "+
"on mapped reads only, while unmapped reads will be discarded", required=false)
protected boolean MAPPED_ONLY = true;
@Argument(fullName="maxReadLength", shortName="rl", doc="maximum read length", required=false)
protected int MAX_READ_LENGTH = 500;
@Argument(fullName="out_prefix",shortName="p",doc="prefix for output report and statistics files",required=true)
protected String PREFIX = null;
// @Argument(fullName="html",shortName="html",doc="produce html-formatted output (starting with h3-level tags) rather than plain text",required=false)
protected boolean HTML = false;
@Argument(fullName="qualThreshold", shortName="Q",doc="flag as problematic all cycles with av. qualities below the threshold (applies only to the generated report)",required=false)
protected double QTHRESHOLD = 10.0;
@Argument(fullName="useBothQualities",shortName="bothQ",required=false,doc="Generate statistics both for currently set and for "+
"original base qualities (OQ tag, must be present in the bam); two separate data files will be generated.")
protected boolean ASSESS_BOTH_QUALS = false;
private Map<String,CycleStats[]> cyclesByLaneMap = null;
private Map<String,CycleStats[]> cyclesByLibraryMap = null;
private Map<String,CycleStats[]> cyclesByLaneMapOrig = null;
private Map<String,CycleStats[]> cyclesByLibraryMapOrig = null;
public void initialize() {
if ( PREFIX == null ) throw new ReviewedStingException("Prefix for output file(s) must be specified");
cyclesByLaneMap = new HashMap<String,CycleStats[]>();
cyclesByLibraryMap = new HashMap<String,CycleStats[]>();
cyclesByLaneMapOrig = new HashMap<String,CycleStats[]>();
cyclesByLibraryMapOrig = new HashMap<String,CycleStats[]>();
}
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( AlignmentUtils.isReadUnmapped(read) ) return 0;
SAMReadGroupRecord rg = read.getReadGroup();
if ( rg == null ) throw new UserException.ReadMissingReadGroup(read);
String lane = read.getReadGroup().getPlatformUnit();
String library = read.getReadGroup().getLibrary();
if ( lane == null ) throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has no platform unit information");
if ( library == null ) throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has no library information");
int end = 0;
if ( read.getReadPairedFlag() ) {
if ( read.getFirstOfPairFlag() ) {
if ( read.getSecondOfPairFlag() )
throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes");
end = 1;
} else {
if ( ! read.getSecondOfPairFlag() )
throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes");
end = 2;
}
}
CycleStats[] byLane = cyclesByLaneMap.get(lane);
CycleStats[] byLib = cyclesByLibraryMap.get(library);
//byte [] quals = USE_ORIGINAL_QUALS ? AlignmentUtils.getOriginalQualsInCycleOrder(read) : AlignmentUtils.getQualsInCycleOrder(read);
byte [] quals = AlignmentUtils.getQualsInCycleOrder(read);
// if end == 0 (single end lane), we allocate array of length 1, otherwise we need two
// elements in the array in order to be able to collect statistics for each end in the pair independently
if ( byLane == null ) cyclesByLaneMap.put(lane,byLane = new CycleStats[(end==0?1:2)]);
if ( byLib == null ) cyclesByLibraryMap.put(library, byLib =new CycleStats[2]);
if ( end != 0 ) end--; // we will now use 'end' as index into the array of stats
if ( byLane[end] == null ) byLane[end] = new CycleStats(MAX_READ_LENGTH);
if ( byLib[end] == null ) byLib[end] =new CycleStats(MAX_READ_LENGTH);
byLane[end].add(quals);
byLib[end].add(quals);
return 1; //To change body of implemented methods use File | Settings | File Templates.
}
/**
* Provide an initial value for reduce computations.
*
* @return Initial value of reduce.
*/
public Integer reduceInit() {
return 0; //To change body of implemented methods use File | Settings | File Templates.
}
/**
* Reduces a single map with the accumulator provided as the ReduceType.
*
* @param value result of the map.
* @param sum accumulator for the reduce.
* @return accumulator with result of the map taken into account.
*/
public Integer reduce(Integer value, Integer sum) {
return sum.intValue()+value.intValue(); //To change body of implemented methods use File | Settings | File Templates.
}
public void onTraversalDone(Integer result) {
if ( HTML ) {
out.println("<h3>Cycle Quality QC</h3>\n");
out.println("File(s) analyzed: <br>");
for ( String fileName : getToolkit().getArguments().samFiles) out.println(fileName+"<br>");
out.println("<br>");
}
if ( HTML ) out.println("<br><br>");
out.println("\n"+result+" reads analyzed\n");
if ( HTML ) out.println("<br><br>");
out.println("by platform unit:");
if ( HTML ) out.println("<br>");
report2(cyclesByLaneMap, new File(PREFIX+".byLane.txt"),true);
out.println();
if ( HTML ) out.println("<br><br>");
out.println("\nby library:");
if ( HTML ) out.println("<br>");
report2(cyclesByLibraryMap, new File(PREFIX+".byLibrary.txt"),true);
out.println();
if ( HTML ) out.println("<br><br>");
}
private void report2(Map<String,CycleStats[]> m, File f,boolean summaryReport) {
long totalReads_1 =0;
long totalReads_2 =0;
long totalReads_unpaired = 0;
SortedSet<String> columns = new TreeSet<String>();
int maxLength = 0; // maximum read length across all lanes/read ends analyzed
for( Map.Entry<String,CycleStats[]> e : m.entrySet() ) {
if ( e.getValue()[0].getMaxReadLength() > maxLength ) maxLength = e.getValue()[0].getMaxReadLength();
if ( e.getValue().length == 1 || e.getValue().length == 2 && e.getValue()[1] == null ) {
totalReads_unpaired += e.getValue()[0].getReadCount(); // single end lane
} else {
totalReads_1 += e.getValue()[0].getReadCount();
totalReads_2 += e.getValue()[1].getReadCount();
if ( e.getValue()[1].getMaxReadLength() > maxLength ) maxLength = e.getValue()[1].getMaxReadLength();
}
columns.add(e.getKey());
}
if ( summaryReport ) {
if ( totalReads_1 == 0 && totalReads_2 != 0) {
out.println(" End 1: No reads");
if ( HTML ) out.println("<br>");
}
if ( totalReads_2 == 0 && totalReads_1 != 0 ) {
out.println(" End 2: No reads");
if ( HTML ) out.println("<br>");
}
if ( totalReads_1 == 0 && totalReads_2 == 0 && totalReads_unpaired == 0 ) {
out.println(" No reads found.");
if ( HTML ) out.println("<br>");
}
}
if ( totalReads_1 == 0 && totalReads_2 == 0 && totalReads_unpaired == 0 ) return;
try {
BufferedWriter w = new BufferedWriter(new FileWriter(f));
w.write("cycle");
for( String col : columns ) {
CycleStats[] data = m.get(col);
if ( summaryReport ) {
out.print(" ");
out.print(col);
}
CycleStats end1 = data[0];
int minL = ( end1 == null ? 0 : end1.getMinReadLength() );
int maxL = ( end1 == null ? 0 : end1.getMaxReadLength() );
if ( data.length == 2 && data[1] != null ) {
if ( summaryReport ) {
out.println(": paired");
if ( HTML ) out.println("<br>");
out.println(" Reads analyzed:");
if ( HTML ) out.println("<br>");
}
CycleStats end2 = data[1];
out.print( " End 1: "+ ( end1 == null ? 0 : end1.getReadCount()) );
if ( minL == maxL ) out.println("; read length = "+minL);
else out.println("; WARNING: variable read length = "+minL+"-"+maxL);
if ( HTML ) out.println("<br>");
out.print( " End 2: "+ ( end2 == null ? 0 : end2.getReadCount()) );
minL = ( end2 == null ? 0 : end2.getMinReadLength() );
maxL = ( end2 == null ? 0 : end2.getMaxReadLength() );
if ( minL == maxL ) out.println("; read length = "+minL);
else out.println("; WARNING: variable read length = "+minL+"-"+maxL);
if ( HTML ) out.println("<br>");
}
else {
out.println(": unpaired");
if ( HTML ) out.println("<br>");
out.print( " Reads analyzed: "+ ( end1 == null ? 0 : end1.getReadCount()) );
if ( minL == maxL ) out.println("; read length = "+minL);
else out.println("; WARNING: variable read length = "+minL+"-"+maxL);
if ( HTML ) out.println("<br>");
}
w.write('\t') ;
w.write(col);
if ( data.length == 1 || data.length == 2 && data[1] == null ) {
w.write(".unpaired");
w.write('\t');
w.write(col);
w.write(".unpaired.stddev");
} else {
w.write(".end1");
w.write('\t');
w.write(col);
w.write(".end1.stddev");
w.write('\t') ;
w.write(col);
w.write(".end2");
w.write('\t');
w.write(col);
w.write(".end2.stddev");
}
}
w.write('\n');
int cycle = 0;
Map<String,List<PrimitivePair.Int>> problems = new HashMap<String,List<PrimitivePair.Int>>();
while ( cycle < maxLength ) {
w.write(Integer.toString(cycle+1));
for ( String col : columns ) {
CycleStats[] data = m.get(col);
CycleStats end1 = data[0];
w.write('\t');
if ( end1 == null || cycle >= end1.getMaxReadLength() ) w.write(".\t.");
else {
double aq = end1.getCycleQualAverage(cycle);
w.write(String.format("%.4f\t%.4f",aq,end1.getCycleQualStdDev(cycle)));
recordProblem(aq,cycle, problems,col+".End1");
}
if ( data.length > 1 && data[1] != null ) {
w.write('\t');
CycleStats end2 = data[1];
if ( end2 == null || cycle >= end2.getMaxReadLength() ) w.write(".\t.");
else {
double aq = end2.getCycleQualAverage(cycle);
w.write(String.format("%.4f\t%.4f",aq,end2.getCycleQualStdDev(cycle)));
recordProblem(aq,cycle, problems,col+".End2");
}
}
}
w.write('\n');
cycle++;
}
w.close();
if ( HTML ) out.println("<hr>");
if ( HTML ) out.println("<br>");
out.println("\nOUTCOME (threshold at Q="+QTHRESHOLD+"):");
if ( HTML ) out.println("<br>");
for ( String col : columns ) {
List<PrimitivePair.Int> lp = problems.get(col+".End1");
out.print(" "+col+" End1:");
if ( lp == null ) {
out.print(" GOOD");
} else {
for ( PrimitivePair.Int p : lp ) {
out.print(" "+(p.first+1)+"-");
if ( p.second >= 0 ) out.print((p.second+1));
else out.print("END");
}
}
out.println();
if ( HTML ) out.println("<br>");
lp = problems.get(col+".End2");
out.print(" "+col+" End2:");
if ( lp == null ) {
out.print(" GOOD");
} else {
for ( PrimitivePair.Int p : lp ) {
out.print(" "+(p.first+1)+"-");
if ( p.second >= 0 ) out.print(p.second);
else out.print("END");
}
}
out.println();
if ( HTML ) out.println("<br>");
}
} catch (IOException ioe) {
throw new UserException.CouldNotCreateOutputFile(f, "Failed to write report", ioe);
}
}
private void recordProblem(double q, int cycle, Map<String,List<PrimitivePair.Int>> problems, String name) {
PrimitivePair.Int p = null;
List<PrimitivePair.Int> lp = null;
if ( q < QTHRESHOLD ) { // there is a problem
if ( ! problems.containsKey(name) ) {
lp = new ArrayList<PrimitivePair.Int>();
p = new PrimitivePair.Int(cycle,-1);
lp.add(p);
problems.put(name,lp);
} else {
lp = problems.get(name);
p = lp.get(lp.size()-1);
}
if ( p.second != -1 ) { // if we are not already inside a run of bad qual bases
lp.add(new PrimitivePair.Int(cycle,-1)); // start new run
}
} else { // good base
if ( problems.containsKey(name) ) { // only if we had problem intervals at all, we need to check if the last one needs to be closed
lp = problems.get(name);
p = lp.get(lp.size()-1);
if ( p.second == -1 ) p.second = cycle - 1;
}
}
}
static class CycleStats {
private long readCount = 0;
private double[] cycleQualsAv = null;
private double[] cycleQualsSd = null;
private int minL = 1000000000; // read min. length
private int maxL = 0; // read max. length
public CycleStats(int N) {
readCount = 0;
cycleQualsAv = new double[N];
cycleQualsSd = new double[N];
}
public void add(byte[] quals) {
if ( quals.length > cycleQualsAv.length )
throw new UserException("A read of length "+quals.length+" encountered, which exceeds specified maximum read length");
if ( quals.length > maxL ) maxL = quals.length;
if ( quals.length < minL ) minL = quals.length;
readCount++;
for ( int i = 0 ; i < quals.length ; i++ ) {
// NOTE: in the update equaltions below, there is no need to check if readCount == 1 (i.e.
// we are initializing with the very first record) or not. Indeed, the arrays are initialized with
// 0; when the very first value arrives, readCount is 1 and cycleQuals[i] gets set to quals[i] (correct!);
// this will also make the second term in the update equation for Sd (quals[i]-cycleQualsAv[i]) equal
// to 0, so Sd will be initially set to 0.
double oldAvg = cycleQualsAv[i]; // save old mean, will need it for calculation of the variance
cycleQualsAv[i] += ( quals[i] - cycleQualsAv[i] ) / readCount; // update mean
cycleQualsSd[i] += ( quals[i] - oldAvg ) * ( quals[i] - cycleQualsAv[i] );
}
}
public long getReadCount() { return readCount; }
public int getMaxReadLength() { return maxL; }
public int getMinReadLength() { return minL; }
// long [] getCycleQualSums() { return cycleQuals; }
// long getCycleQualSum(int i) { return cycleQuals[i]; }
double getCycleQualAverage(int i) { return cycleQualsAv[i]; }
double getCycleQualStdDev(int i) { return Math.sqrt( cycleQualsSd[i]/(readCount-1) ); }
}
}
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.collections.PrimitivePair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.*;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: asivache
* Date: Apr 9, 2010
* Time: 12:16:41 PM
* To change this template use File | Settings | File Templates.
*/
/**
* Walks over the input data set, calculating the number of reads seen for diagnostic purposes.
* Can also count the number of reads matching a given criterion using read filters (see the
* --read-filter command line argument). Simplest example of a read-backed analysis.
*/
@Requires({DataSource.READS})
public class CycleQualityWalker extends ReadWalker<Integer,Integer> {
@Output
protected PrintStream out;
@Argument(fullName="mappedOnly", shortName="mo", doc="when this flag is set (default), statistics will be collected "+
"on mapped reads only, while unmapped reads will be discarded", required=false)
protected boolean MAPPED_ONLY = true;
@Argument(fullName="maxReadLength", shortName="rl", doc="maximum read length", required=false)
protected int MAX_READ_LENGTH = 500;
@Argument(fullName="out_prefix",shortName="p",doc="prefix for output report and statistics files",required=true)
protected String PREFIX = null;
// @Argument(fullName="html",shortName="html",doc="produce html-formatted output (starting with h3-level tags) rather than plain text",required=false)
protected boolean HTML = false;
@Argument(fullName="qualThreshold", shortName="Q",doc="flag as problematic all cycles with av. qualities below the threshold (applies only to the generated report)",required=false)
protected double QTHRESHOLD = 10.0;
@Argument(fullName="useBothQualities",shortName="bothQ",required=false,doc="Generate statistics both for currently set and for "+
"original base qualities (OQ tag, must be present in the bam); two separate data files will be generated.")
protected boolean ASSESS_BOTH_QUALS = false;
private Map<String,CycleStats[]> cyclesByLaneMap = null;
private Map<String,CycleStats[]> cyclesByLibraryMap = null;
private Map<String,CycleStats[]> cyclesByLaneMapOrig = null;
private Map<String,CycleStats[]> cyclesByLibraryMapOrig = null;
public void initialize() {
if ( PREFIX == null ) throw new ReviewedStingException("Prefix for output file(s) must be specified");
cyclesByLaneMap = new HashMap<String,CycleStats[]>();
cyclesByLibraryMap = new HashMap<String,CycleStats[]>();
cyclesByLaneMapOrig = new HashMap<String,CycleStats[]>();
cyclesByLibraryMapOrig = new HashMap<String,CycleStats[]>();
}
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( AlignmentUtils.isReadUnmapped(read) ) return 0;
SAMReadGroupRecord rg = read.getReadGroup();
if ( rg == null ) throw new UserException.ReadMissingReadGroup(read);
String lane = read.getReadGroup().getPlatformUnit();
String library = read.getReadGroup().getLibrary();
if ( lane == null ) throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has no platform unit information");
if ( library == null ) throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has no library information");
int end = 0;
if ( read.getReadPairedFlag() ) {
if ( read.getFirstOfPairFlag() ) {
if ( read.getSecondOfPairFlag() )
throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes");
end = 1;
} else {
if ( ! read.getSecondOfPairFlag() )
throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes");
end = 2;
}
}
CycleStats[] byLane = cyclesByLaneMap.get(lane);
CycleStats[] byLib = cyclesByLibraryMap.get(library);
//byte [] quals = USE_ORIGINAL_QUALS ? AlignmentUtils.getOriginalQualsInCycleOrder(read) : AlignmentUtils.getQualsInCycleOrder(read);
byte [] quals = AlignmentUtils.getQualsInCycleOrder(read);
// if end == 0 (single end lane), we allocate array of length 1, otherwise we need two
// elements in the array in order to be able to collect statistics for each end in the pair independently
if ( byLane == null ) cyclesByLaneMap.put(lane,byLane = new CycleStats[(end==0?1:2)]);
if ( byLib == null ) cyclesByLibraryMap.put(library, byLib =new CycleStats[2]);
if ( end != 0 ) end--; // we will now use 'end' as index into the array of stats
if ( byLane[end] == null ) byLane[end] = new CycleStats(MAX_READ_LENGTH);
if ( byLib[end] == null ) byLib[end] =new CycleStats(MAX_READ_LENGTH);
byLane[end].add(quals);
byLib[end].add(quals);
return 1; //To change body of implemented methods use File | Settings | File Templates.
}
/**
* Provide an initial value for reduce computations.
*
* @return Initial value of reduce.
*/
public Integer reduceInit() {
return 0; //To change body of implemented methods use File | Settings | File Templates.
}
/**
* Reduces a single map with the accumulator provided as the ReduceType.
*
* @param value result of the map.
* @param sum accumulator for the reduce.
* @return accumulator with result of the map taken into account.
*/
public Integer reduce(Integer value, Integer sum) {
return sum.intValue()+value.intValue(); //To change body of implemented methods use File | Settings | File Templates.
}
public void onTraversalDone(Integer result) {
if ( HTML ) {
out.println("<h3>Cycle Quality QC</h3>\n");
out.println("File(s) analyzed: <br>");
for ( String fileName : getToolkit().getArguments().samFiles) out.println(fileName+"<br>");
out.println("<br>");
}
if ( HTML ) out.println("<br><br>");
out.println("\n"+result+" reads analyzed\n");
if ( HTML ) out.println("<br><br>");
out.println("by platform unit:");
if ( HTML ) out.println("<br>");
report2(cyclesByLaneMap, new File(PREFIX+".byLane.txt"),true);
out.println();
if ( HTML ) out.println("<br><br>");
out.println("\nby library:");
if ( HTML ) out.println("<br>");
report2(cyclesByLibraryMap, new File(PREFIX+".byLibrary.txt"),true);
out.println();
if ( HTML ) out.println("<br><br>");
}
private void report2(Map<String,CycleStats[]> m, File f,boolean summaryReport) {
long totalReads_1 =0;
long totalReads_2 =0;
long totalReads_unpaired = 0;
SortedSet<String> columns = new TreeSet<String>();
int maxLength = 0; // maximum read length across all lanes/read ends analyzed
for( Map.Entry<String,CycleStats[]> e : m.entrySet() ) {
if ( e.getValue()[0].getMaxReadLength() > maxLength ) maxLength = e.getValue()[0].getMaxReadLength();
if ( e.getValue().length == 1 || e.getValue().length == 2 && e.getValue()[1] == null ) {
totalReads_unpaired += e.getValue()[0].getReadCount(); // single end lane
} else {
totalReads_1 += e.getValue()[0].getReadCount();
totalReads_2 += e.getValue()[1].getReadCount();
if ( e.getValue()[1].getMaxReadLength() > maxLength ) maxLength = e.getValue()[1].getMaxReadLength();
}
columns.add(e.getKey());
}
if ( summaryReport ) {
if ( totalReads_1 == 0 && totalReads_2 != 0) {
out.println(" End 1: No reads");
if ( HTML ) out.println("<br>");
}
if ( totalReads_2 == 0 && totalReads_1 != 0 ) {
out.println(" End 2: No reads");
if ( HTML ) out.println("<br>");
}
if ( totalReads_1 == 0 && totalReads_2 == 0 && totalReads_unpaired == 0 ) {
out.println(" No reads found.");
if ( HTML ) out.println("<br>");
}
}
if ( totalReads_1 == 0 && totalReads_2 == 0 && totalReads_unpaired == 0 ) return;
try {
BufferedWriter w = new BufferedWriter(new FileWriter(f));
w.write("cycle");
for( String col : columns ) {
CycleStats[] data = m.get(col);
if ( summaryReport ) {
out.print(" ");
out.print(col);
}
CycleStats end1 = data[0];
int minL = ( end1 == null ? 0 : end1.getMinReadLength() );
int maxL = ( end1 == null ? 0 : end1.getMaxReadLength() );
if ( data.length == 2 && data[1] != null ) {
if ( summaryReport ) {
out.println(": paired");
if ( HTML ) out.println("<br>");
out.println(" Reads analyzed:");
if ( HTML ) out.println("<br>");
}
CycleStats end2 = data[1];
out.print( " End 1: "+ ( end1 == null ? 0 : end1.getReadCount()) );
if ( minL == maxL ) out.println("; read length = "+minL);
else out.println("; WARNING: variable read length = "+minL+"-"+maxL);
if ( HTML ) out.println("<br>");
out.print( " End 2: "+ ( end2 == null ? 0 : end2.getReadCount()) );
minL = ( end2 == null ? 0 : end2.getMinReadLength() );
maxL = ( end2 == null ? 0 : end2.getMaxReadLength() );
if ( minL == maxL ) out.println("; read length = "+minL);
else out.println("; WARNING: variable read length = "+minL+"-"+maxL);
if ( HTML ) out.println("<br>");
}
else {
out.println(": unpaired");
if ( HTML ) out.println("<br>");
out.print( " Reads analyzed: "+ ( end1 == null ? 0 : end1.getReadCount()) );
if ( minL == maxL ) out.println("; read length = "+minL);
else out.println("; WARNING: variable read length = "+minL+"-"+maxL);
if ( HTML ) out.println("<br>");
}
w.write('\t') ;
w.write(col);
if ( data.length == 1 || data.length == 2 && data[1] == null ) {
w.write(".unpaired");
w.write('\t');
w.write(col);
w.write(".unpaired.stddev");
} else {
w.write(".end1");
w.write('\t');
w.write(col);
w.write(".end1.stddev");
w.write('\t') ;
w.write(col);
w.write(".end2");
w.write('\t');
w.write(col);
w.write(".end2.stddev");
}
}
w.write('\n');
int cycle = 0;
Map<String,List<PrimitivePair.Int>> problems = new HashMap<String,List<PrimitivePair.Int>>();
while ( cycle < maxLength ) {
w.write(Integer.toString(cycle+1));
for ( String col : columns ) {
CycleStats[] data = m.get(col);
CycleStats end1 = data[0];
w.write('\t');
if ( end1 == null || cycle >= end1.getMaxReadLength() ) w.write(".\t.");
else {
double aq = end1.getCycleQualAverage(cycle);
w.write(String.format("%.4f\t%.4f",aq,end1.getCycleQualStdDev(cycle)));
recordProblem(aq,cycle, problems,col+".End1");
}
if ( data.length > 1 && data[1] != null ) {
w.write('\t');
CycleStats end2 = data[1];
if ( end2 == null || cycle >= end2.getMaxReadLength() ) w.write(".\t.");
else {
double aq = end2.getCycleQualAverage(cycle);
w.write(String.format("%.4f\t%.4f",aq,end2.getCycleQualStdDev(cycle)));
recordProblem(aq,cycle, problems,col+".End2");
}
}
}
w.write('\n');
cycle++;
}
w.close();
if ( HTML ) out.println("<hr>");
if ( HTML ) out.println("<br>");
out.println("\nOUTCOME (threshold at Q="+QTHRESHOLD+"):");
if ( HTML ) out.println("<br>");
for ( String col : columns ) {
List<PrimitivePair.Int> lp = problems.get(col+".End1");
out.print(" "+col+" End1:");
if ( lp == null ) {
out.print(" GOOD");
} else {
for ( PrimitivePair.Int p : lp ) {
out.print(" "+(p.first+1)+"-");
if ( p.second >= 0 ) out.print((p.second+1));
else out.print("END");
}
}
out.println();
if ( HTML ) out.println("<br>");
lp = problems.get(col+".End2");
out.print(" "+col+" End2:");
if ( lp == null ) {
out.print(" GOOD");
} else {
for ( PrimitivePair.Int p : lp ) {
out.print(" "+(p.first+1)+"-");
if ( p.second >= 0 ) out.print(p.second);
else out.print("END");
}
}
out.println();
if ( HTML ) out.println("<br>");
}
} catch (IOException ioe) {
throw new UserException.CouldNotCreateOutputFile(f, "Failed to write report", ioe);
}
}
private void recordProblem(double q, int cycle, Map<String,List<PrimitivePair.Int>> problems, String name) {
PrimitivePair.Int p = null;
List<PrimitivePair.Int> lp = null;
if ( q < QTHRESHOLD ) { // there is a problem
if ( ! problems.containsKey(name) ) {
lp = new ArrayList<PrimitivePair.Int>();
p = new PrimitivePair.Int(cycle,-1);
lp.add(p);
problems.put(name,lp);
} else {
lp = problems.get(name);
p = lp.get(lp.size()-1);
}
if ( p.second != -1 ) { // if we are not already inside a run of bad qual bases
lp.add(new PrimitivePair.Int(cycle,-1)); // start new run
}
} else { // good base
if ( problems.containsKey(name) ) { // only if we had problem intervals at all, we need to check if the last one needs to be closed
lp = problems.get(name);
p = lp.get(lp.size()-1);
if ( p.second == -1 ) p.second = cycle - 1;
}
}
}
static class CycleStats {
private long readCount = 0;
private double[] cycleQualsAv = null;
private double[] cycleQualsSd = null;
private int minL = 1000000000; // read min. length
private int maxL = 0; // read max. length
public CycleStats(int N) {
readCount = 0;
cycleQualsAv = new double[N];
cycleQualsSd = new double[N];
}
public void add(byte[] quals) {
if ( quals.length > cycleQualsAv.length )
throw new UserException("A read of length "+quals.length+" encountered, which exceeds specified maximum read length");
if ( quals.length > maxL ) maxL = quals.length;
if ( quals.length < minL ) minL = quals.length;
readCount++;
for ( int i = 0 ; i < quals.length ; i++ ) {
// NOTE: in the update equaltions below, there is no need to check if readCount == 1 (i.e.
// we are initializing with the very first record) or not. Indeed, the arrays are initialized with
// 0; when the very first value arrives, readCount is 1 and cycleQuals[i] gets set to quals[i] (correct!);
// this will also make the second term in the update equation for Sd (quals[i]-cycleQualsAv[i]) equal
// to 0, so Sd will be initially set to 0.
double oldAvg = cycleQualsAv[i]; // save old mean, will need it for calculation of the variance
cycleQualsAv[i] += ( quals[i] - cycleQualsAv[i] ) / readCount; // update mean
cycleQualsSd[i] += ( quals[i] - oldAvg ) * ( quals[i] - cycleQualsAv[i] );
}
}
public long getReadCount() { return readCount; }
public int getMaxReadLength() { return maxL; }
public int getMinReadLength() { return minL; }
// long [] getCycleQualSums() { return cycleQuals; }
// long getCycleQualSum(int i) { return cycleQuals[i]; }
double getCycleQualAverage(int i) { return cycleQualsAv[i]; }
double getCycleQualStdDev(int i) { return Math.sqrt( cycleQualsSd[i]/(readCount-1) ); }
}
}

View File

@ -1,12 +1,12 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.PrintStream;
import java.util.Arrays;
@ -40,7 +40,7 @@ public class PrintLocusContextWalker extends LocusWalker<AlignmentContext, Integ
return lhs + rhs;
}
private String[] getReadNames( List<SAMRecord> reads ) {
private String[] getReadNames( List<GATKSAMRecord> reads ) {
String[] readNames = new String[ reads.size() ];
for( int i = 0; i < reads.size(); i++ ) {
readNames[i] = String.format("%nname = %s, start = %d, end = %d", reads.get(i).getReadName(), reads.get(i).getAlignmentStart(), reads.get(i).getAlignmentEnd());

View File

@ -1,142 +1,142 @@
/*
* Copyright (c) 2009 The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* * OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import java.io.PrintStream;
import java.util.Arrays;
/**
* User: depristo
* Date: May 5, 2010
* Time: 12:16:41 PM
*/
/**
* Walks over the input reads, printing out statistics about the read length, number of clipping events, and length
* of the clipping to the output stream.
*/
@Requires({DataSource.READS})
public class ReadClippingStatsWalker extends ReadWalker<ReadClippingStatsWalker.ReadClippingInfo,Integer> {
@Output
protected PrintStream out;
@Argument(fullName="mappedOnly", shortName="mo", doc="when this flag is set (default), statistics will be collected "+
"on mapped reads only, while unmapped reads will be discarded", required=false)
protected boolean MAPPED_ONLY = true;
@Argument(fullName="skip", shortName="skip", doc="When provided, only every skip reads are analyzed", required=false)
protected int SKIP = 1;
// public void initialize() {
//
// }
public class ReadClippingInfo {
SAMReadGroupRecord rg;
int readLength, nClippingEvents, nClippedBases;
}
public ReadClippingInfo map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( AlignmentUtils.isReadUnmapped(read) && MAPPED_ONLY)
return null;
ReadClippingInfo info = new ReadClippingInfo();
info.rg = read.getReadGroup();
if ( info.rg == null ) throw new UserException.ReadMissingReadGroup(read);
for ( CigarElement elt : read.getCigar().getCigarElements() ) {
if ( elt.getOperator() != CigarOperator.N )
switch ( elt.getOperator()) {
case H : // ignore hard clips
case S : // soft clip
info.nClippingEvents++;
info.nClippedBases += elt.getLength();
// note the fall through here
case M :
case D : // deletion w.r.t. the reference
case P : // ignore pads
case I : // insertion w.r.t. the reference
info.readLength += elt.getLength(); // Unless we have a reference skip, the read gets longer
break;
case N : // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
break;
default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + elt.getOperator());
}
}
return info; //To change body of implemented methods use File | Settings | File Templates.
}
/**
* Provide an initial value for reduce computations.
*
* @return Initial value of reduce.
*/
public Integer reduceInit() {
out.println(Utils.join(" \t", Arrays.asList("ReadGroup", "ReadLength", "NClippingEvents", "NClippedBases", "PercentClipped")));
return 0;
}
/**
* Reduces a single map with the accumulator provided as the ReduceType.
*
* @param info result of the map.
* @param sum accumulator for the reduce.
* @return accumulator with result of the map taken into account.
*/
public Integer reduce(ReadClippingInfo info, Integer sum) {
if ( info != null ) {
if ( sum % SKIP == 0 ) {
String id = info.rg.getReadGroupId();
out.printf("%s\t %d\t %d\t %d\t %.2f%n",
id, info.readLength, info.nClippingEvents, info.nClippedBases,
100.0 * MathUtils.ratio(info.nClippedBases, info.readLength));
}
return sum + 1; //To change body of implemented methods use File | Settings | File Templates.
} else {
return sum;
}
}
public void onTraversalDone(Integer result) {
}
/*
* Copyright (c) 2009 The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* * OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.PrintStream;
import java.util.Arrays;
/**
* User: depristo
* Date: May 5, 2010
* Time: 12:16:41 PM
*/
/**
* Walks over the input reads, printing out statistics about the read length, number of clipping events, and length
* of the clipping to the output stream.
*/
@Requires({DataSource.READS})
public class ReadClippingStatsWalker extends ReadWalker<ReadClippingStatsWalker.ReadClippingInfo,Integer> {
@Output
protected PrintStream out;
@Argument(fullName="mappedOnly", shortName="mo", doc="when this flag is set (default), statistics will be collected "+
"on mapped reads only, while unmapped reads will be discarded", required=false)
protected boolean MAPPED_ONLY = true;
@Argument(fullName="skip", shortName="skip", doc="When provided, only every skip reads are analyzed", required=false)
protected int SKIP = 1;
// public void initialize() {
//
// }
public class ReadClippingInfo {
SAMReadGroupRecord rg;
int readLength, nClippingEvents, nClippedBases;
}
public ReadClippingInfo map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( AlignmentUtils.isReadUnmapped(read) && MAPPED_ONLY)
return null;
ReadClippingInfo info = new ReadClippingInfo();
info.rg = read.getReadGroup();
if ( info.rg == null ) throw new UserException.ReadMissingReadGroup(read);
for ( CigarElement elt : read.getCigar().getCigarElements() ) {
if ( elt.getOperator() != CigarOperator.N )
switch ( elt.getOperator()) {
case H : // ignore hard clips
case S : // soft clip
info.nClippingEvents++;
info.nClippedBases += elt.getLength();
// note the fall through here
case M :
case D : // deletion w.r.t. the reference
case P : // ignore pads
case I : // insertion w.r.t. the reference
info.readLength += elt.getLength(); // Unless we have a reference skip, the read gets longer
break;
case N : // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
break;
default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + elt.getOperator());
}
}
return info; //To change body of implemented methods use File | Settings | File Templates.
}
/**
* Provide an initial value for reduce computations.
*
* @return Initial value of reduce.
*/
public Integer reduceInit() {
out.println(Utils.join(" \t", Arrays.asList("ReadGroup", "ReadLength", "NClippingEvents", "NClippedBases", "PercentClipped")));
return 0;
}
/**
* Reduces a single map with the accumulator provided as the ReduceType.
*
* @param info result of the map.
* @param sum accumulator for the reduce.
* @return accumulator with result of the map taken into account.
*/
public Integer reduce(ReadClippingInfo info, Integer sum) {
if ( info != null ) {
if ( sum % SKIP == 0 ) {
String id = info.rg.getReadGroupId();
out.printf("%s\t %d\t %d\t %d\t %.2f%n",
id, info.readLength, info.nClippingEvents, info.nClippedBases,
100.0 * MathUtils.ratio(info.nClippedBases, info.readLength));
}
return sum + 1; //To change body of implemented methods use File | Settings | File Templates.
} else {
return sum;
}
}
public void onTraversalDone(Integer result) {
}
}

View File

@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
@ -64,21 +65,23 @@ public class ReadValidationWalker extends ReadWalker<SAMRecord, SAMRecord> {
/**
* The reads filter function.
*
* @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord
* @return true if the read passes the filter, false if it doesn't
*/
public boolean filter(ReferenceContext ref, SAMRecord read) {
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
return true;
}
/**
* The reads map function.
*
* @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord
* @return the read itself
*/
public SAMRecord map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
return read;
}

View File

@ -364,11 +364,12 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
/**
* For each base in the read calculate a new recalibrated quality score and replace the quality scores in the read
*
* @param refBases References bases over the length of the read
* @param read The read to be recalibrated
* @return The read with quality scores replaced
*/
public SAMRecord map( ReferenceContext refBases, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
public SAMRecord map( ReferenceContext refBases, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
if( read.getReadLength() == 0 ) { // Some reads have '*' as the SEQ field and samtools returns length zero. We don't touch these reads.
return read;

View File

@ -4,9 +4,9 @@ import com.google.java.contract.Requires;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Iterator;
import java.util.Stack;
@ -39,14 +39,14 @@ public class ClippingOp {
* @param algorithm
* @param read
*/
public SAMRecord apply(ClippingRepresentation algorithm, SAMRecord read) {
public GATKSAMRecord apply(ClippingRepresentation algorithm, GATKSAMRecord read) {
byte[] quals = read.getBaseQualities();
byte[] bases = read.getReadBases();
switch (algorithm) {
// important note:
// it's not safe to call read.getReadBases()[i] = 'N' or read.getBaseQualities()[i] = 0
// because you're not guaranteed to get a pointer to the actual array of bytes in the SAMRecord
// because you're not guaranteed to get a pointer to the actual array of bytes in the GATKSAMRecord
case WRITE_NS:
for (int i = start; i <= stop; i++)
bases[i] = 'N';
@ -248,9 +248,9 @@ public class ClippingOp {
}
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1", "!read.getReadUnmappedFlag()"})
private SAMRecord hardClip (SAMRecord read, int start, int stop) {
private GATKSAMRecord hardClip (GATKSAMRecord read, int start, int stop) {
if (start == 0 && stop == read.getReadLength() - 1)
return new SAMRecord(read.getHeader());
return new GATKSAMRecord(read.getHeader());
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
CigarShift cigarShift = (read.getReadUnmappedFlag()) ? new CigarShift(new Cigar(), 0, 0) : hardClipCigar(read.getCigar(), start, stop);
@ -265,9 +265,9 @@ public class ClippingOp {
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength);
SAMRecord hardClippedRead;
GATKSAMRecord hardClippedRead;
try {
hardClippedRead = (SAMRecord) read.clone();
hardClippedRead = (GATKSAMRecord) read.clone();
} catch (CloneNotSupportedException e) {
throw new ReviewedStingException("Where did the clone go?");
}

View File

@ -3,8 +3,8 @@ package org.broadinstitute.sting.utils.clipreads;
import com.google.java.contract.Requires;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.util.ArrayList;
@ -14,7 +14,7 @@ import java.util.List;
* A simple collection of the clipping operations to apply to a read along with its read
*/
public class ReadClipper {
SAMRecord read;
GATKSAMRecord read;
boolean wasClipped;
List<ClippingOp> ops = null;
@ -23,7 +23,7 @@ public class ReadClipper {
*
* @param read
*/
public ReadClipper(final SAMRecord read) {
public ReadClipper(final GATKSAMRecord read) {
this.read = read;
this.wasClipped = false;
}
@ -46,19 +46,19 @@ public class ReadClipper {
return wasClipped;
}
public SAMRecord getRead() {
public GATKSAMRecord getRead() {
return read;
}
public SAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) {
public GATKSAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) {
return hardClipByReferenceCoordinates(-1, refStop);
}
public SAMRecord hardClipByReferenceCoordinatesRightTail(int refStart) {
public GATKSAMRecord hardClipByReferenceCoordinatesRightTail(int refStart) {
return hardClipByReferenceCoordinates(refStart, -1);
}
private int numDeletions(SAMRecord read) {
private int numDeletions(GATKSAMRecord read) {
int result = 0;
for (CigarElement e: read.getCigar().getCigarElements()) {
if ( e.getOperator() == CigarOperator.DELETION || e.getOperator() == CigarOperator.D )
@ -67,7 +67,7 @@ public class ReadClipper {
return result;
}
protected SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
@ -78,32 +78,32 @@ public class ReadClipper {
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
this.addOp(new ClippingOp(start, stop));
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
this.ops = null;
return clippedRead;
}
public SAMRecord hardClipByReadCoordinates(int start, int stop) {
public GATKSAMRecord hardClipByReadCoordinates(int start, int stop) {
this.addOp(new ClippingOp(start, stop));
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
@Requires("left <= right")
public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
public GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
if (left == right)
return new SAMRecord(read.getHeader());
SAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1);
return new GATKSAMRecord(read.getHeader());
GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1);
// after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions
// make the left cut index no longer part of the read. In that case, clip the read entirely.
if (left > leftTailRead.getAlignmentEnd())
return new SAMRecord(read.getHeader());
return new GATKSAMRecord(read.getHeader());
ReadClipper clipper = new ReadClipper(leftTailRead);
return clipper.hardClipByReferenceCoordinatesLeftTail(left);
}
public SAMRecord hardClipLowQualEnds(byte lowQual) {
public GATKSAMRecord hardClipLowQualEnds(byte lowQual) {
byte [] quals = read.getBaseQualities();
int leftClipIndex = 0;
int rightClipIndex = read.getReadLength() - 1;
@ -114,7 +114,7 @@ public class ReadClipper {
// if the entire read should be clipped, then return an empty read. (--todo: maybe null is better? testing this for now)
if (leftClipIndex > rightClipIndex)
return (new SAMRecord(read.getHeader()));
return (new GATKSAMRecord(read.getHeader()));
if (rightClipIndex < read.getReadLength() - 1) {
this.addOp(new ClippingOp(rightClipIndex + 1, read.getReadLength() - 1));
@ -125,7 +125,7 @@ public class ReadClipper {
return this.clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
public SAMRecord hardClipSoftClippedBases () {
public GATKSAMRecord hardClipSoftClippedBases () {
int readIndex = 0;
int cutLeft = -1; // first position to hard clip (inclusive)
int cutRight = -1; // first position to hard clip (inclusive)
@ -164,12 +164,12 @@ public class ReadClipper {
* @param algorithm
* @return
*/
public SAMRecord clipRead(ClippingRepresentation algorithm) {
public GATKSAMRecord clipRead(ClippingRepresentation algorithm) {
if (ops == null)
return getRead();
else {
try {
SAMRecord clippedRead = (SAMRecord) read.clone();
GATKSAMRecord clippedRead = (GATKSAMRecord) read.clone();
for (ClippingOp op : getOps()) {
clippedRead = op.apply(algorithm, clippedRead);
}
@ -181,7 +181,7 @@ public class ReadClipper {
}
}
public SAMRecord hardClipLeadingInsertions() {
public GATKSAMRecord hardClipLeadingInsertions() {
for(CigarElement cigarElement : read.getCigar().getCigarElements()) {
if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP &&
cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.DELETION)

View File

@ -25,7 +25,6 @@
package org.broadinstitute.sting.utils.duplicates;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -35,27 +34,28 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Arrays;
import java.util.List;
public class DupUtils {
private static SAMRecord tmpCopyRead(SAMRecord read) {
private static GATKSAMRecord tmpCopyRead(GATKSAMRecord read) {
try {
return (SAMRecord)read.clone();
return (GATKSAMRecord)read.clone();
} catch ( CloneNotSupportedException e ) {
throw new ReviewedStingException("Unexpected Clone failure!");
}
}
public static SAMRecord combineDuplicates(GenomeLocParser genomeLocParser,List<SAMRecord> duplicates, int maxQScore) {
public static GATKSAMRecord combineDuplicates(GenomeLocParser genomeLocParser,List<GATKSAMRecord> duplicates, int maxQScore) {
if ( duplicates.size() == 0 )
return null;
// make the combined read by copying the first read and setting the
// bases and quals to new arrays
SAMRecord comb = tmpCopyRead(duplicates.get(0));
//SAMRecord comb = tmpCopyRead(duplicates.get(0));
GATKSAMRecord comb = tmpCopyRead(duplicates.get(0));
//GATKSAMRecord comb = tmpCopyRead(duplicates.get(0));
comb.setDuplicateReadFlag(false);
int readLen = comb.getReadBases().length;
byte[] bases = new byte[readLen];
@ -63,7 +63,7 @@ public class DupUtils {
for ( int i = 0; i < readLen; i++ ) {
//System.out.printf("I is %d%n", i);
//for ( SAMRecord read : duplicates ) {
//for ( GATKSAMRecord read : duplicates ) {
// System.out.printf("dup base %c %d%n", (char)read.getReadBases()[i], read.getBaseQualities()[i]);
//}
Pair<Byte, Byte> baseAndQual = combineBaseProbs(genomeLocParser,duplicates, i, maxQScore);
@ -117,7 +117,7 @@ public class DupUtils {
System.out.printf("%n");
}
private static Pair<Byte, Byte> combineBaseProbs(GenomeLocParser genomeLocParser,List<SAMRecord> duplicates, int readOffset, int maxQScore) {
private static Pair<Byte, Byte> combineBaseProbs(GenomeLocParser genomeLocParser,List<GATKSAMRecord> duplicates, int readOffset, int maxQScore) {
GenomeLoc loc = genomeLocParser.createGenomeLoc(duplicates.get(0));
ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, duplicates, readOffset);

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.fragments;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.*;
@ -35,17 +36,17 @@ public class FragmentUtils {
* @param <T>
*/
public interface ReadGetter<T> {
public SAMRecord get(T object);
public GATKSAMRecord get(T object);
}
/** Identify getter for SAMRecords themselves */
private final static ReadGetter<SAMRecord> SamRecordGetter = new ReadGetter<SAMRecord>() {
@Override public SAMRecord get(final SAMRecord object) { return object; }
private final static ReadGetter<GATKSAMRecord> SamRecordGetter = new ReadGetter<GATKSAMRecord>() {
@Override public GATKSAMRecord get(final GATKSAMRecord object) { return object; }
};
/** Gets the SAMRecord in a PileupElement */
private final static ReadGetter<PileupElement> PileupElementGetter = new ReadGetter<PileupElement>() {
@Override public SAMRecord get(final PileupElement object) { return object.getRead(); }
@Override public GATKSAMRecord get(final PileupElement object) { return object.getRead(); }
};
@ -116,7 +117,7 @@ public class FragmentUtils {
return create(rbp, rbp.getNumberOfElements(), PileupElementGetter);
}
public final static FragmentCollection<SAMRecord> create(List<SAMRecord> reads) {
public final static FragmentCollection<GATKSAMRecord> create(List<GATKSAMRecord> reads) {
return create(reads, reads.size(), SamRecordGetter);
}

View File

@ -24,13 +24,13 @@
package org.broadinstitute.sting.utils.pileup;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
import org.broadinstitute.sting.utils.fragments.FragmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.*;
@ -59,12 +59,12 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
* @param reads
* @param offsets
*/
public AbstractReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets ) {
public AbstractReadBackedPileup(GenomeLoc loc, List<GATKSAMRecord> reads, List<Integer> offsets ) {
this.loc = loc;
this.pileupElementTracker = readsOffsets2Pileup(reads,offsets);
}
public AbstractReadBackedPileup(GenomeLoc loc, List<SAMRecord> reads, int offset ) {
public AbstractReadBackedPileup(GenomeLoc loc, List<GATKSAMRecord> reads, int offset ) {
this.loc = loc;
this.pileupElementTracker = readsOffsets2Pileup(reads,offset);
}
@ -167,7 +167,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
* @param offsets
* @return
*/
private PileupElementTracker<PE> readsOffsets2Pileup(List<SAMRecord> reads, List<Integer> offsets ) {
private PileupElementTracker<PE> readsOffsets2Pileup(List<GATKSAMRecord> reads, List<Integer> offsets ) {
if ( reads == null ) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup");
if ( offsets == null ) throw new ReviewedStingException("Illegal null offsets list in UnifiedReadBackedPileup");
if ( reads.size() != offsets.size() ) throw new ReviewedStingException("Reads and offset lists have different sizes!");
@ -187,7 +187,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
* @param offset
* @return
*/
private PileupElementTracker<PE> readsOffsets2Pileup(List<SAMRecord> reads, int offset ) {
private PileupElementTracker<PE> readsOffsets2Pileup(List<GATKSAMRecord> reads, int offset ) {
if ( reads == null ) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup");
if ( offset < 0 ) throw new ReviewedStingException("Illegal offset < 0 UnifiedReadBackedPileup");
@ -200,7 +200,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
}
protected abstract AbstractReadBackedPileup<RBP,PE> createNewPileup(GenomeLoc loc, PileupElementTracker<PE> pileupElementTracker);
protected abstract PE createNewPileupElement(SAMRecord read, int offset);
protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset);
// --------------------------------------------------------
//
@ -512,7 +512,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
else {
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
for(PE p: pileupElementTracker) {
SAMRecord read = p.getRead();
GATKSAMRecord read = p.getRead();
if(targetReadGroupId != null) {
if(read.getReadGroup() != null && targetReadGroupId.equals(read.getReadGroup().getReadGroupId()))
filteredTracker.add(p);
@ -543,7 +543,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
else {
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
for(PE p: pileupElementTracker) {
SAMRecord read = p.getRead();
GATKSAMRecord read = p.getRead();
if(laneID != null) {
if(read.getReadGroup() != null &&
(read.getReadGroup().getReadGroupId().startsWith(laneID + ".")) || // lane is the same, but sample identifier is different
@ -567,7 +567,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
else {
Collection<String> sampleNames = new HashSet<String>();
for(PileupElement p: this) {
SAMRecord read = p.getRead();
GATKSAMRecord read = p.getRead();
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
sampleNames.add(sampleName);
}
@ -644,7 +644,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
HashSet<String> hashSampleNames = new HashSet<String>(sampleNames); // to speed up the "contains" access in the for loop
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
for(PE p: pileupElementTracker) {
SAMRecord read = p.getRead();
GATKSAMRecord read = p.getRead();
if(sampleNames != null) { // still checking on sampleNames because hashSampleNames will never be null. And empty means something else.
if(read.getReadGroup() != null && hashSampleNames.contains(read.getReadGroup().getSample()))
filteredTracker.add(p);
@ -669,7 +669,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
else {
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
for(PE p: pileupElementTracker) {
SAMRecord read = p.getRead();
GATKSAMRecord read = p.getRead();
if(sampleName != null) {
if(read.getReadGroup() != null && sampleName.equals(read.getReadGroup().getSample()))
filteredTracker.add(p);
@ -824,8 +824,8 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
* @return
*/
@Override
public List<SAMRecord> getReads() {
List<SAMRecord> reads = new ArrayList<SAMRecord>(getNumberOfElements());
public List<GATKSAMRecord> getReads() {
List<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>(getNumberOfElements());
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
return reads;
}

View File

@ -1,131 +1,132 @@
package org.broadinstitute.sting.utils.pileup;
import net.sf.samtools.SAMRecord;
import java.util.Arrays;
/**
* In the "standard" locus traversal mode,
* the traversal is performed striclty over the reference bases. Thus, only pileups of bases (and hence local events
* such as point mutations) are "seen" at every invocation of the walker's map() function at every (genomic) locus. Deletions
* are seen on the base-by-base basis (i.e. the pileup does keep the information about the current reference base being deleted
* in some reads), but the information about the extended event (deletion length, string of all deleted bases) is not kept.
* The insertions that may be present in some reads are not seen at all in such strict reference traversal mode.
*
* By convention, any extended event (indel) is mapped onto the reference at the last base prior to the event (i.e.
* last base before the insertion or deletion). If the special "extended" traversal mode is turned on and there is
* an indel in at least one read that maps onto the reference position Z, the walker's map function will be called twice:
* first call will be performed in a "standard" mode, with a pileup of bases over the position Z, and then the additional
* call will be made at the same position with a pileup of event/noevent calls, where events are extended and contain
* full information about insertions/deletions. Then the next, "standard", call to map() will be performed at the next
* (covered) reference position. Note that if the extended event at Z was a deletion, the "standard" base pileup at
* Z+1 and following bases may still contain deleted bases. However the fully extended event call will be performed
* only once, at the position where the indel maps (starts).
*
* This class wraps an "extended" event (indel) so that in can be added to a pileup of events at a given location.
*
* Created by IntelliJ IDEA.
* User: asivache
* Date: Dec 21, 2009
* Time: 2:57:55 PM
* To change this template use File | Settings | File Templates.
*/
public class ExtendedEventPileupElement extends PileupElement {
public enum Type {
NOEVENT, DELETION, INSERTION
}
private Type type = null;
private int eventLength = -1;
private String eventBases = null; // if it is a deletion, we do not have information about the actual deleted bases
// in the read itself, so we fill the string with D's; for insertions we keep actual inserted bases
private SAMRecord read;
private int offset; // position in the read immediately BEFORE the event
// This is broken! offset is always zero because these member variables are shadowed by base class
/** Constructor for extended pileup element (indel).
*
* @param read the read, in which the indel is observed
* @param offset position in the read immediately before the indel (can be -1 if read starts with an insertion)
* @param length length of the indel (number of inserted or deleted bases); length <=0 indicates that the read has no indel (NOEVENT)
* @param eventBases inserted bases. null indicates that the event is a deletion; ignored if length<=0 (noevent)
*/
public ExtendedEventPileupElement( SAMRecord read, int offset, int length, byte[] eventBases ) {
super(read, offset);
this.eventLength = length;
if ( length <= 0 ) type = Type.NOEVENT;
else {
if ( eventBases != null ) {
this.eventBases = new String(eventBases).toUpperCase();
type = Type.INSERTION;
} else {
type = Type.DELETION;
}
}
}
/** Constructor for deletion or noevent calls - does not take event bases as an argument (as those should
* be null or are ignored in these cases anyway)
* @param read
* @param offset
* @param length
*/
public ExtendedEventPileupElement( SAMRecord read, int offset, int length ) {
this(read,offset, length, null);
}
public boolean isDeletion() {
return type == Type.DELETION;
}
public boolean isInsertion() {
return type == Type.INSERTION;
}
public boolean isIndel() {
return isDeletion() || isInsertion();
}
public Type getType() { return type; }
// The offset can be negative with insertions at the start of the read, but a valid base does exist at this position with
// a valid base quality. The following code attempts to compensate for that.'
@Override
public byte getBase() {
return getBase(offset >= 0 ? offset : offset+eventLength);
}
@Override
public int getBaseIndex() {
return getBaseIndex(offset >= 0 ? offset : offset+eventLength);
}
@Override
public byte getQual() {
return getQual(offset >= 0 ? offset : offset+eventLength);
}
/** Returns length of the event (number of inserted or deleted bases */
public int getEventLength() { return eventLength; }
/** Returns actual sequence of inserted bases, or a null if the event is a deletion or if there is no event in the associated read.
* */
public String getEventBases() { return eventBases; }
@Override
public String toString() {
char c = '.';
String fillStr = null ;
if ( isDeletion() ) {
c = '-';
char [] filler = new char[eventLength];
Arrays.fill(filler, 'D');
fillStr = new String(filler);
}
else if ( isInsertion() ) c = '+';
return String.format("%s @ %d = %c%s MQ%d", getRead().getReadName(), getOffset(), c, isIndel()?
(isInsertion() ? eventBases : fillStr ): "", getMappingQual());
}
}
package org.broadinstitute.sting.utils.pileup;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Arrays;
/**
* In the "standard" locus traversal mode,
* the traversal is performed striclty over the reference bases. Thus, only pileups of bases (and hence local events
* such as point mutations) are "seen" at every invocation of the walker's map() function at every (genomic) locus. Deletions
* are seen on the base-by-base basis (i.e. the pileup does keep the information about the current reference base being deleted
* in some reads), but the information about the extended event (deletion length, string of all deleted bases) is not kept.
* The insertions that may be present in some reads are not seen at all in such strict reference traversal mode.
*
* By convention, any extended event (indel) is mapped onto the reference at the last base prior to the event (i.e.
* last base before the insertion or deletion). If the special "extended" traversal mode is turned on and there is
* an indel in at least one read that maps onto the reference position Z, the walker's map function will be called twice:
* first call will be performed in a "standard" mode, with a pileup of bases over the position Z, and then the additional
* call will be made at the same position with a pileup of event/noevent calls, where events are extended and contain
* full information about insertions/deletions. Then the next, "standard", call to map() will be performed at the next
* (covered) reference position. Note that if the extended event at Z was a deletion, the "standard" base pileup at
* Z+1 and following bases may still contain deleted bases. However the fully extended event call will be performed
* only once, at the position where the indel maps (starts).
*
* This class wraps an "extended" event (indel) so that in can be added to a pileup of events at a given location.
*
* Created by IntelliJ IDEA.
* User: asivache
* Date: Dec 21, 2009
* Time: 2:57:55 PM
* To change this template use File | Settings | File Templates.
*/
public class ExtendedEventPileupElement extends PileupElement {
public enum Type {
NOEVENT, DELETION, INSERTION
}
private Type type = null;
private int eventLength = -1;
private String eventBases = null; // if it is a deletion, we do not have information about the actual deleted bases
// in the read itself, so we fill the string with D's; for insertions we keep actual inserted bases
private SAMRecord read;
private int offset; // position in the read immediately BEFORE the event
// This is broken! offset is always zero because these member variables are shadowed by base class
/** Constructor for extended pileup element (indel).
*
* @param read the read, in which the indel is observed
* @param offset position in the read immediately before the indel (can be -1 if read starts with an insertion)
* @param length length of the indel (number of inserted or deleted bases); length <=0 indicates that the read has no indel (NOEVENT)
* @param eventBases inserted bases. null indicates that the event is a deletion; ignored if length<=0 (noevent)
*/
public ExtendedEventPileupElement( GATKSAMRecord read, int offset, int length, byte[] eventBases ) {
super(read, offset);
this.eventLength = length;
if ( length <= 0 ) type = Type.NOEVENT;
else {
if ( eventBases != null ) {
this.eventBases = new String(eventBases).toUpperCase();
type = Type.INSERTION;
} else {
type = Type.DELETION;
}
}
}
/** Constructor for deletion or noevent calls - does not take event bases as an argument (as those should
* be null or are ignored in these cases anyway)
* @param read
* @param offset
* @param length
*/
public ExtendedEventPileupElement( GATKSAMRecord read, int offset, int length ) {
this(read,offset, length, null);
}
public boolean isDeletion() {
return type == Type.DELETION;
}
public boolean isInsertion() {
return type == Type.INSERTION;
}
public boolean isIndel() {
return isDeletion() || isInsertion();
}
public Type getType() { return type; }
// The offset can be negative with insertions at the start of the read, but a valid base does exist at this position with
// a valid base quality. The following code attempts to compensate for that.'
@Override
public byte getBase() {
return getBase(offset >= 0 ? offset : offset+eventLength);
}
@Override
public int getBaseIndex() {
return getBaseIndex(offset >= 0 ? offset : offset+eventLength);
}
@Override
public byte getQual() {
return getQual(offset >= 0 ? offset : offset+eventLength);
}
/** Returns length of the event (number of inserted or deleted bases */
public int getEventLength() { return eventLength; }
/** Returns actual sequence of inserted bases, or a null if the event is a deletion or if there is no event in the associated read.
* */
public String getEventBases() { return eventBases; }
@Override
public String toString() {
char c = '.';
String fillStr = null ;
if ( isDeletion() ) {
c = '-';
char [] filler = new char[eventLength];
Arrays.fill(filler, 'D');
fillStr = new String(filler);
}
else if ( isInsertion() ) c = '+';
return String.format("%s @ %d = %c%s MQ%d", getRead().getReadName(), getOffset(), c, isIndel()?
(isInsertion() ? eventBases : fillStr ): "", getMappingQual());
}
}

View File

@ -2,10 +2,8 @@ package org.broadinstitute.sting.utils.pileup;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
/**
* Created by IntelliJ IDEA.
@ -21,14 +19,14 @@ public class PileupElement implements Comparable<PileupElement> {
public static final byte T_FOLLOWED_BY_INSERTION_BASE = (byte) 89;
public static final byte G_FOLLOWED_BY_INSERTION_BASE = (byte) 90;
protected final SAMRecord read;
protected final GATKSAMRecord read;
protected final int offset;
@Requires({
"read != null",
"offset >= -1",
"offset <= read.getReadLength()"})
public PileupElement( SAMRecord read, int offset ) {
public PileupElement( GATKSAMRecord read, int offset ) {
this.read = read;
this.offset = offset;
}
@ -38,7 +36,7 @@ public class PileupElement implements Comparable<PileupElement> {
}
@Ensures("result != null")
public SAMRecord getRead() { return read; }
public GATKSAMRecord getRead() { return read; }
@Ensures("result == offset")
public int getOffset() { return offset; }

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.pileup;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Collection;
import java.util.List;
@ -166,7 +167,7 @@ public interface ReadBackedExtendedEventPileup extends ReadBackedPileup {
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
public List<SAMRecord> getReads();
public List<GATKSAMRecord> getReads();
/**
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time

View File

@ -23,10 +23,10 @@
*/
package org.broadinstitute.sting.utils.pileup;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.*;
@ -95,7 +95,7 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
}
@Override
protected ExtendedEventPileupElement createNewPileupElement(SAMRecord read, int offset) {
protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset) {
throw new UnsupportedOperationException("Not enough information provided to create a new pileup element");
}

View File

@ -24,10 +24,10 @@
package org.broadinstitute.sting.utils.pileup;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.HasGenomeLocation;
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Collection;
import java.util.List;
@ -202,7 +202,7 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
* @return
*/
public List<SAMRecord> getReads();
public List<GATKSAMRecord> getReads();
/**
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time

View File

@ -23,8 +23,8 @@
*/
package org.broadinstitute.sting.utils.pileup;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.List;
import java.util.Map;
@ -35,11 +35,11 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPil
super(loc);
}
public ReadBackedPileupImpl(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets ) {
public ReadBackedPileupImpl(GenomeLoc loc, List<GATKSAMRecord> reads, List<Integer> offsets ) {
super(loc,reads,offsets);
}
public ReadBackedPileupImpl(GenomeLoc loc, List<SAMRecord> reads, int offset ) {
public ReadBackedPileupImpl(GenomeLoc loc, List<GATKSAMRecord> reads, int offset ) {
super(loc,reads,offset);
}
@ -70,7 +70,7 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPil
}
@Override
protected PileupElement createNewPileupElement(SAMRecord read, int offset) {
protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset) {
return new PileupElement(read,offset);
}
}

View File

@ -104,9 +104,9 @@ public class ArtificialReadsTraversal<M,T> extends TraversalEngine<M,T,Walker<M,
// an array of characters that represent the reference
ReferenceContext refSeq = null;
final boolean keepMeP = readWalker.filter(refSeq, read);
final boolean keepMeP = readWalker.filter(refSeq, (GATKSAMRecord) read);
if (keepMeP) {
M x = readWalker.map(refSeq, read, null); // TODO: fix me at some point, it would be nice to fake out ROD data too
M x = readWalker.map(refSeq, (GATKSAMRecord) read, null); // TODO: fix me at some point, it would be nice to fake out ROD data too
sum = readWalker.reduce(x, sum);
}
}

View File

@ -3,7 +3,6 @@ package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.*;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -201,9 +200,9 @@ public class ArtificialSAMUtils {
return rec;
}
public final static List<SAMRecord> createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
SAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen);
SAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen);
public final static List<GATKSAMRecord> createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
GATKSAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen);
GATKSAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen);
left.setReadPairedFlag(true);
right.setReadPairedFlag(true);
@ -327,9 +326,9 @@ public class ArtificialSAMUtils {
if ( rightStart <= 0 ) continue;
List<SAMRecord> pair = createPair(header, readName, readLen, leftStart, rightStart, leftIsFirst, leftIsNegative);
final SAMRecord left = pair.get(0);
final SAMRecord right = pair.get(1);
List<GATKSAMRecord> pair = createPair(header, readName, readLen, leftStart, rightStart, leftIsFirst, leftIsNegative);
final GATKSAMRecord left = pair.get(0);
final GATKSAMRecord right = pair.get(1);
pileupElements.add(new PileupElement(left, pos - leftStart));

View File

@ -188,15 +188,15 @@ public class ReadUtils {
* This makes the following code a little nasty, since we can only detect if a base is in the adaptor, but not
* if it overlaps the read.
*
* @param rec
* @param read
* @param basePos
* @param adaptorLength
* @return
*/
public static OverlapType readPairBaseOverlapType(final SAMRecord rec, long basePos, final int adaptorLength) {
public static OverlapType readPairBaseOverlapType(final SAMRecord read, long basePos, final int adaptorLength) {
OverlapType state = OverlapType.NOT_OVERLAPPING;
Pair<Integer, Integer> adaptorBoundaries = getAdaptorBoundaries(rec, adaptorLength);
Pair<Integer, Integer> adaptorBoundaries = getAdaptorBoundaries(read, adaptorLength);
if ( adaptorBoundaries != null ) { // we're not an unmapped pair -- cannot filter out
@ -205,28 +205,28 @@ public class ReadUtils {
if ( inAdapator ) {
state = OverlapType.IN_ADAPTOR;
//System.out.printf("baseOverlapState: %50s negStrand=%b base=%d start=%d stop=%d, adaptorStart=%d adaptorEnd=%d isize=%d => %s%n",
// rec.getReadName(), rec.getReadNegativeStrandFlag(), basePos, rec.getAlignmentStart(), rec.getAlignmentEnd(), adaptorBoundaries.first, adaptorBoundaries.second, rec.getInferredInsertSize(), state);
// read.getReadName(), read.getReadNegativeStrandFlag(), basePos, read.getAlignmentStart(), read.getAlignmentEnd(), adaptorBoundaries.first, adaptorBoundaries.second, read.getInferredInsertSize(), state);
}
}
return state;
}
private static Pair<Integer, Integer> getAdaptorBoundaries(SAMRecord rec, int adaptorLength) {
int isize = rec.getInferredInsertSize();
private static Pair<Integer, Integer> getAdaptorBoundaries(SAMRecord read, int adaptorLength) {
int isize = read.getInferredInsertSize();
if ( isize == 0 )
return null; // don't worry about unmapped pairs
int adaptorStart, adaptorEnd;
if ( rec.getReadNegativeStrandFlag() ) {
if ( read.getReadNegativeStrandFlag() ) {
// we are on the negative strand, so our mate is on the positive strand
int mateStart = rec.getMateAlignmentStart();
int mateStart = read.getMateAlignmentStart();
adaptorStart = mateStart - adaptorLength - 1;
adaptorEnd = mateStart - 1;
} else {
// we are on the positive strand, so our mate is on the negative strand
int mateEnd = rec.getAlignmentStart() + isize - 1;
int mateEnd = read.getAlignmentStart() + isize - 1;
adaptorStart = mateEnd + 1;
adaptorEnd = mateEnd + adaptorLength;
}
@ -236,47 +236,47 @@ public class ReadUtils {
/**
*
* @param rec original SAM record
* @param read original SAM record
* @param adaptorLength length of adaptor sequence
* @return a new read with adaptor sequence hard-clipped out or null if read is fully clipped
*/
public static GATKSAMRecord hardClipAdaptorSequence(final SAMRecord rec, int adaptorLength) {
public static GATKSAMRecord hardClipAdaptorSequence(final GATKSAMRecord read, int adaptorLength) {
Pair<Integer, Integer> adaptorBoundaries = getAdaptorBoundaries(rec, adaptorLength);
GATKSAMRecord result = (GATKSAMRecord)rec;
Pair<Integer, Integer> adaptorBoundaries = getAdaptorBoundaries(read, adaptorLength);
GATKSAMRecord result = (GATKSAMRecord)read;
if ( adaptorBoundaries != null ) {
if ( rec.getReadNegativeStrandFlag() && adaptorBoundaries.second >= rec.getAlignmentStart() && adaptorBoundaries.first < rec.getAlignmentEnd() )
result = hardClipStartOfRead(rec, adaptorBoundaries.second);
else if ( !rec.getReadNegativeStrandFlag() && adaptorBoundaries.first <= rec.getAlignmentEnd() )
result = hardClipEndOfRead(rec, adaptorBoundaries.first);
if ( read.getReadNegativeStrandFlag() && adaptorBoundaries.second >= read.getAlignmentStart() && adaptorBoundaries.first < read.getAlignmentEnd() )
result = hardClipStartOfRead(read, adaptorBoundaries.second);
else if ( !read.getReadNegativeStrandFlag() && adaptorBoundaries.first <= read.getAlignmentEnd() )
result = hardClipEndOfRead(read, adaptorBoundaries.first);
}
return result;
}
// return true if the read needs to be completely clipped
private static GATKSAMRecord hardClipStartOfRead(SAMRecord oldRec, int stopPosition) {
private static GATKSAMRecord hardClipStartOfRead(GATKSAMRecord oldRec, int stopPosition) {
if ( stopPosition >= oldRec.getAlignmentEnd() ) {
// BAM representation issue -- we can't clip away all bases in a read, just leave it alone and let the filter deal with it
//System.out.printf("Entire read needs to be clipped: %50s %n", rec.getReadName());
//System.out.printf("Entire read needs to be clipped: %50s %n", read.getReadName());
return null;
}
GATKSAMRecord rec;
GATKSAMRecord read;
try {
rec = (GATKSAMRecord)oldRec.clone();
read = (GATKSAMRecord)oldRec.clone();
} catch (Exception e) {
return null;
}
//System.out.printf("Clipping start of read: %50s start=%d adaptorEnd=%d isize=%d %n",
// rec.getReadName(), rec.getAlignmentStart(), stopPosition, rec.getInferredInsertSize());
// read.getReadName(), read.getAlignmentStart(), stopPosition, read.getInferredInsertSize());
Cigar oldCigar = rec.getCigar();
Cigar oldCigar = read.getCigar();
LinkedList<CigarElement> newCigarElements = new LinkedList<CigarElement>();
int currentPos = rec.getAlignmentStart();
int currentPos = read.getAlignmentStart();
int basesToClip = 0;
int basesAlreadyClipped = 0;
@ -315,48 +315,48 @@ public class ReadUtils {
}
// copy over the unclipped bases
final byte[] bases = rec.getReadBases();
final byte[] quals = rec.getBaseQualities();
final byte[] bases = read.getReadBases();
final byte[] quals = read.getBaseQualities();
int newLength = bases.length - basesToClip;
byte[] newBases = new byte[newLength];
byte[] newQuals = new byte[newLength];
System.arraycopy(bases, basesToClip, newBases, 0, newLength);
System.arraycopy(quals, basesToClip, newQuals, 0, newLength);
rec.setReadBases(newBases);
rec.setBaseQualities(newQuals);
read.setReadBases(newBases);
read.setBaseQualities(newQuals);
// now add a CIGAR element for the clipped bases
newCigarElements.addFirst(new CigarElement(basesToClip + basesAlreadyClipped, CigarOperator.H));
Cigar newCigar = new Cigar(newCigarElements);
rec.setCigar(newCigar);
read.setCigar(newCigar);
// adjust the start accordingly
rec.setAlignmentStart(stopPosition + 1);
read.setAlignmentStart(stopPosition + 1);
return rec;
return read;
}
private static GATKSAMRecord hardClipEndOfRead(SAMRecord oldRec, int startPosition) {
private static GATKSAMRecord hardClipEndOfRead(GATKSAMRecord oldRec, int startPosition) {
if ( startPosition <= oldRec.getAlignmentStart() ) {
// BAM representation issue -- we can't clip away all bases in a read, just leave it alone and let the filter deal with it
//System.out.printf("Entire read needs to be clipped: %50s %n", rec.getReadName());
//System.out.printf("Entire read needs to be clipped: %50s %n", read.getReadName());
return null;
}
GATKSAMRecord rec;
GATKSAMRecord read;
try {
rec = (GATKSAMRecord)oldRec.clone();
read = (GATKSAMRecord)oldRec.clone();
} catch (Exception e) {
return null;
}
//System.out.printf("Clipping end of read: %50s adaptorStart=%d end=%d isize=%d %n",
// rec.getReadName(), startPosition, rec.getAlignmentEnd(), rec.getInferredInsertSize());
// read.getReadName(), startPosition, read.getAlignmentEnd(), read.getInferredInsertSize());
Cigar oldCigar = rec.getCigar();
Cigar oldCigar = read.getCigar();
LinkedList<CigarElement> newCigarElements = new LinkedList<CigarElement>();
int currentPos = rec.getAlignmentStart();
int currentPos = read.getAlignmentStart();
int basesToKeep = 0;
int basesAlreadyClipped = 0;
@ -402,41 +402,41 @@ public class ReadUtils {
}
// copy over the unclipped bases
final byte[] bases = rec.getReadBases();
final byte[] quals = rec.getBaseQualities();
final byte[] bases = read.getReadBases();
final byte[] quals = read.getBaseQualities();
byte[] newBases = new byte[basesToKeep];
byte[] newQuals = new byte[basesToKeep];
System.arraycopy(bases, 0, newBases, 0, basesToKeep);
System.arraycopy(quals, 0, newQuals, 0, basesToKeep);
rec.setReadBases(newBases);
rec.setBaseQualities(newQuals);
read.setReadBases(newBases);
read.setBaseQualities(newQuals);
// now add a CIGAR element for the clipped bases
newCigarElements.add(new CigarElement((bases.length - basesToKeep) + basesAlreadyClipped, CigarOperator.H));
Cigar newCigar = new Cigar(newCigarElements);
rec.setCigar(newCigar);
read.setCigar(newCigar);
// adjust the stop accordingly
// rec.setAlignmentEnd(startPosition - 1);
// read.setAlignmentEnd(startPosition - 1);
return rec;
return read;
}
/**
* Hard clips away (i.e.g, removes from the read) bases that were previously soft clipped.
*
* @param rec
* @param read
* @return
*/
@Requires("rec != null")
@Requires("read != null")
@Ensures("result != null")
public static SAMRecord hardClipSoftClippedBases(SAMRecord rec) {
List<CigarElement> cigarElts = rec.getCigar().getCigarElements();
public static GATKSAMRecord hardClipSoftClippedBases(GATKSAMRecord read) {
List<CigarElement> cigarElts = read.getCigar().getCigarElements();
if ( cigarElts.size() == 1 ) // can't be soft clipped, just return
return rec;
return read;
int keepStart = 0, keepEnd = rec.getReadLength() - 1;
int keepStart = 0, keepEnd = read.getReadLength() - 1;
List<CigarElement> newCigarElements = new LinkedList<CigarElement>();
for ( int i = 0; i < cigarElts.size(); i++ ) {
@ -447,7 +447,7 @@ public class ReadUtils {
if ( i == 0 )
keepStart = l;
else
keepEnd = rec.getReadLength() - l - 1;
keepEnd = read.getReadLength() - l - 1;
newCigarElements.add(new CigarElement(l, CigarOperator.HARD_CLIP));
break;
@ -477,54 +477,54 @@ public class ReadUtils {
}
mergedCigarElements.add(new CigarElement(currentOperatorLength, currentOperator));
return hardClipBases(rec, keepStart, keepEnd, mergedCigarElements);
return hardClipBases(read, keepStart, keepEnd, mergedCigarElements);
}
/**
* Hard clips out the bases in rec, keeping the bases from keepStart to keepEnd, inclusive. Note these
* Hard clips out the bases in read, keeping the bases from keepStart to keepEnd, inclusive. Note these
* are offsets, so they are 0 based
*
* @param rec
* @param read
* @param keepStart
* @param keepEnd
* @param newCigarElements
* @return
*/
@Requires({
"rec != null",
"read != null",
"keepStart >= 0",
"keepEnd < rec.getReadLength()",
"rec.getReadUnmappedFlag() || newCigarElements != null"})
"keepEnd < read.getReadLength()",
"read.getReadUnmappedFlag() || newCigarElements != null"})
@Ensures("result != null")
public static SAMRecord hardClipBases(SAMRecord rec, int keepStart, int keepEnd, List<CigarElement> newCigarElements) {
public static GATKSAMRecord hardClipBases(GATKSAMRecord read, int keepStart, int keepEnd, List<CigarElement> newCigarElements) {
int newLength = keepEnd - keepStart + 1;
if ( newLength != rec.getReadLength() ) {
if ( newLength != read.getReadLength() ) {
try {
rec = SimplifyingSAMFileWriter.simplifyRead((SAMRecord)rec.clone());
read = (GATKSAMRecord)read.clone();
// copy over the unclipped bases
final byte[] bases = rec.getReadBases();
final byte[] quals = rec.getBaseQualities();
final byte[] bases = read.getReadBases();
final byte[] quals = read.getBaseQualities();
byte[] newBases = new byte[newLength];
byte[] newQuals = new byte[newLength];
System.arraycopy(bases, keepStart, newBases, 0, newLength);
System.arraycopy(quals, keepStart, newQuals, 0, newLength);
rec.setReadBases(newBases);
rec.setBaseQualities(newQuals);
read.setReadBases(newBases);
read.setBaseQualities(newQuals);
// now add a CIGAR element for the clipped bases, if the read isn't unmapped
if ( ! rec.getReadUnmappedFlag() ) {
if ( ! read.getReadUnmappedFlag() ) {
Cigar newCigar = new Cigar(newCigarElements);
rec.setCigar(newCigar);
read.setCigar(newCigar);
}
} catch ( CloneNotSupportedException e ) {
throw new ReviewedStingException("WTF, where did clone go?", e);
}
}
return rec;
return read;
}
public static SAMRecord replaceSoftClipsWithMatches(SAMRecord read) {
public static GATKSAMRecord replaceSoftClipsWithMatches(GATKSAMRecord read) {
List<CigarElement> newCigarElements = new ArrayList<CigarElement>();
for ( CigarElement ce : read.getCigar().getCigarElements() ) {
@ -561,15 +561,15 @@ public class ReadUtils {
/**
*
* @param rec original SAM record
* @param read original SAM record
* @return a new read with adaptor sequence hard-clipped out or null if read is fully clipped
*/
public static GATKSAMRecord hardClipAdaptorSequence(final SAMRecord rec) {
return hardClipAdaptorSequence(rec, DEFAULT_ADAPTOR_SIZE);
public static GATKSAMRecord hardClipAdaptorSequence(final GATKSAMRecord read) {
return hardClipAdaptorSequence(read, DEFAULT_ADAPTOR_SIZE);
}
public static OverlapType readPairBaseOverlapType(final SAMRecord rec, long basePos) {
return readPairBaseOverlapType(rec, basePos, DEFAULT_ADAPTOR_SIZE);
public static OverlapType readPairBaseOverlapType(final SAMRecord read, long basePos) {
return readPairBaseOverlapType(read, basePos, DEFAULT_ADAPTOR_SIZE);
}
public static boolean is454Read(SAMRecord read) {
@ -601,10 +601,10 @@ public class ReadUtils {
readFlagNames.put(0x400, "Duplicate");
}
public static String readFlagsAsString(SAMRecord rec) {
public static String readFlagsAsString(GATKSAMRecord read) {
String flags = "";
for (int flag : readFlagNames.keySet()) {
if ((rec.getFlags() & flag) != 0) {
if ((read.getFlags() & flag) != 0) {
flags += readFlagNames.get(flag) + " ";
}
}
@ -618,7 +618,7 @@ public class ReadUtils {
* @param reads
* @return
*/
public final static List<SAMRecord> coordinateSortReads(List<SAMRecord> reads) {
public final static List<GATKSAMRecord> coordinateSortReads(List<GATKSAMRecord> reads) {
final SAMRecordComparator comparer = new SAMRecordCoordinateComparator();
Collections.sort(reads, comparer);
return reads;
@ -647,7 +647,7 @@ public class ReadUtils {
* @param interval the interval
* @return the overlap type as described by ReadAndIntervalOverlap enum (see above)
*/
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) {
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(GATKSAMRecord read, GenomeLoc interval) {
int sStart = getRefCoordSoftUnclippedStart(read);
int sStop = getRefCoordSoftUnclippedEnd(read);
@ -685,7 +685,7 @@ public class ReadUtils {
}
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
public static int getRefCoordSoftUnclippedStart(SAMRecord read) {
public static int getRefCoordSoftUnclippedStart(GATKSAMRecord read) {
int start = read.getUnclippedStart();
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
@ -697,7 +697,7 @@ public class ReadUtils {
}
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
public static int getRefCoordSoftUnclippedEnd(SAMRecord read) {
public static int getRefCoordSoftUnclippedEnd(GATKSAMRecord read) {
int stop = read.getUnclippedStart();
if (readIsEntirelyInsertion(read))
@ -716,7 +716,7 @@ public class ReadUtils {
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
}
private static boolean readIsEntirelyInsertion(SAMRecord read) {
private static boolean readIsEntirelyInsertion(GATKSAMRecord read) {
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
if (cigarElement.getOperator() != CigarOperator.INSERTION)
return false;
@ -730,7 +730,7 @@ public class ReadUtils {
}
/**
* Pre-processes the results of getReadCoordinateForReferenceCoordinate(SAMRecord, int) in case it falls in
* Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) in case it falls in
* a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns
* the base prior to the deletion. If clipping the right tail (end of the read) returns the base after the
* deletion.
@ -742,7 +742,7 @@ public class ReadUtils {
*/
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
@Ensures({"result >= 0", "result < read.getReadLength()"})
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord, ClippingTail tail) {
public static int getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord, ClippingTail tail) {
Pair<Integer, Boolean> result = getReadCoordinateForReferenceCoordinate(read, refCoord);
int readCoord = result.getFirst();
@ -760,7 +760,7 @@ public class ReadUtils {
* Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use when faced with
* a deletion.
*
* SUGGESTION: Use getReadCoordinateForReferenceCoordinate(SAMRecord, int, ClippingTail) instead to get a
* SUGGESTION: Use getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int, ClippingTail) instead to get a
* pre-processed result according to normal clipping needs. Or you can use this function and tailor the
* behavior to your needs.
*
@ -770,7 +770,7 @@ public class ReadUtils {
*/
@Requires({"refCoord >= getRefCoordSoftUnclippedStart(read)", "refCoord <= getRefCoordSoftUnclippedEnd(read)"})
@Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"})
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord) {
int readBases = 0;
int refBases = 0;
boolean fallsInsideDeletion = false;
@ -851,13 +851,13 @@ public class ReadUtils {
return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
}
public static SAMRecord unclipSoftClippedBases(SAMRecord rec) {
int newReadStart = rec.getAlignmentStart();
int newReadEnd = rec.getAlignmentEnd();
List<CigarElement> newCigarElements = new ArrayList<CigarElement>(rec.getCigar().getCigarElements().size());
public static GATKSAMRecord unclipSoftClippedBases(GATKSAMRecord read) {
int newReadStart = read.getAlignmentStart();
int newReadEnd = read.getAlignmentEnd();
List<CigarElement> newCigarElements = new ArrayList<CigarElement>(read.getCigar().getCigarElements().size());
int heldOver = -1;
boolean sSeen = false;
for ( CigarElement e : rec.getCigar().getCigarElements() ) {
for ( CigarElement e : read.getCigar().getCigarElements() ) {
if ( e.getOperator().equals(CigarOperator.S) ) {
newCigarElements.add(new CigarElement(e.getLength(),CigarOperator.M));
if ( sSeen ) {
@ -872,7 +872,7 @@ public class ReadUtils {
}
// merge duplicate operators together
int idx = 0;
List<CigarElement> finalCigarElements = new ArrayList<CigarElement>(rec.getCigar().getCigarElements().size());
List<CigarElement> finalCigarElements = new ArrayList<CigarElement>(read.getCigar().getCigarElements().size());
while ( idx < newCigarElements.size() -1 ) {
if ( newCigarElements.get(idx).getOperator().equals(newCigarElements.get(idx+1).getOperator()) ) {
int combSize = newCigarElements.get(idx).getLength();
@ -889,10 +889,10 @@ public class ReadUtils {
idx++;
}
rec.setCigar(new Cigar(finalCigarElements));
rec.setAlignmentStart(newReadStart);
read.setCigar(new Cigar(finalCigarElements));
read.setAlignmentStart(newReadStart);
return rec;
return read;
}
/**
@ -905,7 +905,7 @@ public class ReadUtils {
@Requires({"read1 != null", "read2 != null"})
@Ensures("result == 0 || result == 1 || result == -1")
public static int compareSAMRecords(SAMRecord read1, SAMRecord read2) {
public static int compareSAMRecords(GATKSAMRecord read1, GATKSAMRecord read2) {
AlignmentStartComparator comp = new AlignmentStartComparator();
return comp.compare(read1, read2);
}

View File

@ -1,11 +1,10 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import net.sf.samtools.SAMRecord;
import java.util.List;
/**
@ -38,7 +37,7 @@ public class AllLocusViewUnitTest extends LocusViewTemplate {
* @param reads
*/
@Override
protected void testReadsInContext( LocusView view, List<GenomeLoc> range, List<SAMRecord> reads ) {
protected void testReadsInContext( LocusView view, List<GenomeLoc> range, List<GATKSAMRecord> reads ) {
AllLocusView allLocusView = (AllLocusView)view;
// TODO: Should skip over loci not in the given range.
@ -52,7 +51,7 @@ public class AllLocusViewUnitTest extends LocusViewTemplate {
Assert.assertEquals(locusContext.getLocation(), site, "Locus context location is incorrect");
int expectedReadsAtSite = 0;
for( SAMRecord read: reads ) {
for( GATKSAMRecord read: reads ) {
if(genomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) {
Assert.assertTrue(locusContext.getReads().contains(read),"Target locus context does not contain reads");
expectedReadsAtSite++;

View File

@ -1,11 +1,11 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import net.sf.samtools.SAMRecord;
import java.util.List;
/**
@ -41,7 +41,7 @@ public class CoveredLocusViewUnitTest extends LocusViewTemplate {
* @param reads
*/
@Override
protected void testReadsInContext( LocusView view, List<GenomeLoc> range, List<SAMRecord> reads ) {
protected void testReadsInContext( LocusView view, List<GenomeLoc> range, List<GATKSAMRecord> reads ) {
CoveredLocusView coveredLocusView = (CoveredLocusView)view;
// TODO: Should skip over loci not in the given range.
@ -53,7 +53,7 @@ public class CoveredLocusViewUnitTest extends LocusViewTemplate {
GenomeLoc site = genomeLocParser.createGenomeLoc("chr1",i);
int expectedReadsAtSite = 0;
for( SAMRecord read: reads ) {
for( GATKSAMRecord read: reads ) {
if( genomeLocParser.createGenomeLoc(read).containsP(site) )
expectedReadsAtSite++;
}
@ -67,7 +67,7 @@ public class CoveredLocusViewUnitTest extends LocusViewTemplate {
Assert.assertEquals(locusContext.getLocation(), site, "Target locus context location is incorrect");
Assert.assertEquals(locusContext.getReads().size(), expectedReadsAtSite, "Found wrong number of reads at site");
for( SAMRecord read: reads ) {
for( GATKSAMRecord read: reads ) {
if(genomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation()))
Assert.assertTrue(locusContext.getReads().contains(read),"Target locus context does not contain reads");
}

View File

@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@ -55,12 +56,12 @@ public abstract class LocusViewTemplate extends BaseTest {
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLocs(), Collections.<SAMRecord>emptyList());
testReadsInContext(view, shard.getGenomeLocs(), Collections.<GATKSAMRecord>emptyList());
}
@Test
public void singleReadTest() {
SAMRecord read = buildSAMRecord("chr1", 1, 5);
GATKSAMRecord read = buildSAMRecord("read1","chr1", 1, 5);
SAMRecordIterator iterator = new SAMRecordIterator(read);
GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5);
@ -76,7 +77,7 @@ public abstract class LocusViewTemplate extends BaseTest {
@Test
public void readCoveringFirstPartTest() {
SAMRecord read = buildSAMRecord("chr1", 1, 5);
GATKSAMRecord read = buildSAMRecord("read1","chr1", 1, 5);
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
@ -90,7 +91,7 @@ public abstract class LocusViewTemplate extends BaseTest {
@Test
public void readCoveringLastPartTest() {
SAMRecord read = buildSAMRecord("chr1", 6, 10);
GATKSAMRecord read = buildSAMRecord("read1","chr1", 6, 10);
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
@ -104,7 +105,7 @@ public abstract class LocusViewTemplate extends BaseTest {
@Test
public void readCoveringMiddleTest() {
SAMRecord read = buildSAMRecord("chr1", 3, 7);
GATKSAMRecord read = buildSAMRecord("read1","chr1", 3, 7);
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
@ -118,7 +119,7 @@ public abstract class LocusViewTemplate extends BaseTest {
@Test
public void readAndLocusOverlapAtLastBase() {
SAMRecord read = buildSAMRecord("chr1", 1, 5);
GATKSAMRecord read = buildSAMRecord("read1","chr1", 1, 5);
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 5, 5)));
@ -132,7 +133,7 @@ public abstract class LocusViewTemplate extends BaseTest {
@Test
public void readOverlappingStartTest() {
SAMRecord read = buildSAMRecord("chr1", 1, 10);
GATKSAMRecord read = buildSAMRecord("read1","chr1", 1, 10);
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 6, 15)));
@ -146,7 +147,7 @@ public abstract class LocusViewTemplate extends BaseTest {
@Test
public void readOverlappingEndTest() {
SAMRecord read = buildSAMRecord("chr1", 6, 15);
GATKSAMRecord read = buildSAMRecord("read1","chr1", 6, 15);
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
@ -160,8 +161,8 @@ public abstract class LocusViewTemplate extends BaseTest {
@Test
public void readsSpanningTest() {
SAMRecord read1 = buildSAMRecord("chr1", 1, 5);
SAMRecord read2 = buildSAMRecord("chr1", 6, 10);
GATKSAMRecord read1 = buildSAMRecord("read1","chr1", 1, 5);
GATKSAMRecord read2 = buildSAMRecord("read2","chr1", 6, 10);
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
@ -170,17 +171,17 @@ public abstract class LocusViewTemplate extends BaseTest {
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
List<GATKSAMRecord> expectedReads = new ArrayList<GATKSAMRecord>();
Collections.addAll(expectedReads, read1, read2);
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
}
@Test
public void duplicateReadsTest() {
SAMRecord read1 = buildSAMRecord("chr1", 1, 5);
SAMRecord read2 = buildSAMRecord("chr1", 1, 5);
SAMRecord read3 = buildSAMRecord("chr1", 6, 10);
SAMRecord read4 = buildSAMRecord("chr1", 6, 10);
GATKSAMRecord read1 = buildSAMRecord("read1","chr1", 1, 5);
GATKSAMRecord read2 = buildSAMRecord("read2","chr1", 1, 5);
GATKSAMRecord read3 = buildSAMRecord("read3","chr1", 6, 10);
GATKSAMRecord read4 = buildSAMRecord("read4","chr1", 6, 10);
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
@ -189,17 +190,17 @@ public abstract class LocusViewTemplate extends BaseTest {
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
List<GATKSAMRecord> expectedReads = new ArrayList<GATKSAMRecord>();
Collections.addAll(expectedReads, read1, read2, read3, read4);
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
}
@Test
public void cascadingReadsWithinBoundsTest() {
SAMRecord read1 = buildSAMRecord("chr1", 2, 6);
SAMRecord read2 = buildSAMRecord("chr1", 3, 7);
SAMRecord read3 = buildSAMRecord("chr1", 4, 8);
SAMRecord read4 = buildSAMRecord("chr1", 5, 9);
GATKSAMRecord read1 = buildSAMRecord("read1","chr1", 2, 6);
GATKSAMRecord read2 = buildSAMRecord("read2","chr1", 3, 7);
GATKSAMRecord read3 = buildSAMRecord("read3","chr1", 4, 8);
GATKSAMRecord read4 = buildSAMRecord("read4","chr1", 5, 9);
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
@ -208,19 +209,19 @@ public abstract class LocusViewTemplate extends BaseTest {
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
List<GATKSAMRecord> expectedReads = new ArrayList<GATKSAMRecord>();
Collections.addAll(expectedReads, read1, read2, read3, read4);
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
}
@Test
public void cascadingReadsAtBoundsTest() {
SAMRecord read1 = buildSAMRecord("chr1", 1, 5);
SAMRecord read2 = buildSAMRecord("chr1", 2, 6);
SAMRecord read3 = buildSAMRecord("chr1", 3, 7);
SAMRecord read4 = buildSAMRecord("chr1", 4, 8);
SAMRecord read5 = buildSAMRecord("chr1", 5, 9);
SAMRecord read6 = buildSAMRecord("chr1", 6, 10);
GATKSAMRecord read1 = buildSAMRecord("read1","chr1", 1, 5);
GATKSAMRecord read2 = buildSAMRecord("read2","chr1", 2, 6);
GATKSAMRecord read3 = buildSAMRecord("read3","chr1", 3, 7);
GATKSAMRecord read4 = buildSAMRecord("read4","chr1", 4, 8);
GATKSAMRecord read5 = buildSAMRecord("read5","chr1", 5, 9);
GATKSAMRecord read6 = buildSAMRecord("read6","chr1", 6, 10);
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4, read5, read6);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
@ -229,25 +230,25 @@ public abstract class LocusViewTemplate extends BaseTest {
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
List<GATKSAMRecord> expectedReads = new ArrayList<GATKSAMRecord>();
Collections.addAll(expectedReads, read1, read2, read3, read4, read5, read6);
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
}
@Test
public void cascadingReadsOverlappingBoundsTest() {
SAMRecord read01 = buildSAMRecord("chr1", 1, 5);
SAMRecord read02 = buildSAMRecord("chr1", 2, 6);
SAMRecord read03 = buildSAMRecord("chr1", 3, 7);
SAMRecord read04 = buildSAMRecord("chr1", 4, 8);
SAMRecord read05 = buildSAMRecord("chr1", 5, 9);
SAMRecord read06 = buildSAMRecord("chr1", 6, 10);
SAMRecord read07 = buildSAMRecord("chr1", 7, 11);
SAMRecord read08 = buildSAMRecord("chr1", 8, 12);
SAMRecord read09 = buildSAMRecord("chr1", 9, 13);
SAMRecord read10 = buildSAMRecord("chr1", 10, 14);
SAMRecord read11 = buildSAMRecord("chr1", 11, 15);
SAMRecord read12 = buildSAMRecord("chr1", 12, 16);
GATKSAMRecord read01 = buildSAMRecord("read1","chr1", 1, 5);
GATKSAMRecord read02 = buildSAMRecord("read2","chr1", 2, 6);
GATKSAMRecord read03 = buildSAMRecord("read3","chr1", 3, 7);
GATKSAMRecord read04 = buildSAMRecord("read4","chr1", 4, 8);
GATKSAMRecord read05 = buildSAMRecord("read5","chr1", 5, 9);
GATKSAMRecord read06 = buildSAMRecord("read6","chr1", 6, 10);
GATKSAMRecord read07 = buildSAMRecord("read7","chr1", 7, 11);
GATKSAMRecord read08 = buildSAMRecord("read8","chr1", 8, 12);
GATKSAMRecord read09 = buildSAMRecord("read9","chr1", 9, 13);
GATKSAMRecord read10 = buildSAMRecord("read10","chr1", 10, 14);
GATKSAMRecord read11 = buildSAMRecord("read11","chr1", 11, 15);
GATKSAMRecord read12 = buildSAMRecord("read12","chr1", 12, 16);
SAMRecordIterator iterator = new SAMRecordIterator(read01, read02, read03, read04, read05, read06,
read07, read08, read09, read10, read11, read12);
@ -257,7 +258,7 @@ public abstract class LocusViewTemplate extends BaseTest {
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
List<SAMRecord> expectedReads = new ArrayList<SAMRecord>();
List<GATKSAMRecord> expectedReads = new ArrayList<GATKSAMRecord>();
Collections.addAll(expectedReads, read01, read02, read03, read04, read05, read06,
read07, read08, read09, read10, read11, read12);
testReadsInContext(view, shard.getGenomeLocs(), expectedReads);
@ -277,7 +278,7 @@ public abstract class LocusViewTemplate extends BaseTest {
* @param bounds
* @param reads
*/
protected abstract void testReadsInContext(LocusView view, List<GenomeLoc> bounds, List<SAMRecord> reads);
protected abstract void testReadsInContext(LocusView view, List<GenomeLoc> bounds, List<GATKSAMRecord> reads);
/**
* Fake a reference sequence file. Essentially, seek a header with a bunch of dummy data.
@ -321,12 +322,13 @@ public abstract class LocusViewTemplate extends BaseTest {
*
* @return New SAM Record
*/
protected SAMRecord buildSAMRecord(String contig, int alignmentStart, int alignmentEnd) {
protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) {
SAMFileHeader header = new SAMFileHeader();
header.setSequenceDictionary(sequenceSourceFile.getSequenceDictionary());
SAMRecord record = new SAMRecord(header);
GATKSAMRecord record = new GATKSAMRecord(header);
record.setReadName(readName);
record.setReferenceIndex(sequenceSourceFile.getSequenceDictionary().getSequenceIndex(contig));
record.setAlignmentStart(alignmentStart);
Cigar cigar = new Cigar();

View File

@ -25,13 +25,10 @@
package org.broadinstitute.sting.gatk.datasources.reads;
import com.google.caliper.Param;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
@ -41,9 +38,9 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.walkers.qc.CountLociWalker;
import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.File;
import java.lang.reflect.Field;
import java.util.Collections;
/**
@ -126,7 +123,7 @@ class CountBasesInReadPerformanceWalker extends ReadWalker<Integer,Long> {
private long Gs;
private long Ts;
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) {
for(byte base: read.getReadBases()) {
switch(base) {
case 'A': As++; break;

View File

@ -7,6 +7,7 @@ import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.ReadProperties;
@ -38,8 +39,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
}
private final LocusIteratorByState makeLTBS(List<SAMRecord> reads, ReadProperties readAttributes) {
return new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),
readAttributes, genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
return new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()), readAttributes, genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
}
@Test
@ -212,12 +212,12 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
Assert.assertEquals(alignmentContext.getLocation().getStart(),currentLocus,"Current locus returned by alignment context is incorrect");
if(currentLocus == firstLocus) {
List<SAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
Assert.assertEquals(readsAtLocus.size(),1,"Wrong number of reads at locus " + currentLocus);
Assert.assertSame(readsAtLocus.get(0),leadingRead,"leadingRead absent from pileup at locus " + currentLocus);
}
else if(currentLocus == secondLocus) {
List<SAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
Assert.assertEquals(readsAtLocus.size(),2,"Wrong number of reads at locus " + currentLocus);
Assert.assertSame(readsAtLocus.get(0),indelOnlyRead,"indelOnlyRead absent from pileup at locus " + currentLocus);
Assert.assertSame(readsAtLocus.get(1),fullMatchAfterIndel,"fullMatchAfterIndel absent from pileup at locus " + currentLocus);
@ -265,7 +265,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
Assert.assertTrue(li.hasNext(),"Missing first locus at " + firstLocus);
AlignmentContext alignmentContext = li.next();
Assert.assertEquals(alignmentContext.getLocation().getStart(),firstLocus,"Incorrect locus at this position; should be " + firstLocus);
List<SAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
Assert.assertEquals(readsAtLocus.size(),1,"Wrong number of reads at locus " + firstLocus);
Assert.assertSame(readsAtLocus.get(0),leadingRead,"leadingRead absent from pileup at locus " + firstLocus);

View File

@ -10,6 +10,8 @@ import net.sf.samtools.SAMFileHeader;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
@ -94,8 +96,8 @@ public class PrintReadsWalkerUnitTest extends BaseTest {
walker.out = writer;
SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000);
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50);
SAMRecord ret = walker.map(bases, rec,null);
GATKSAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50);
SAMRecord ret = walker.map(bases, rec, null);
assertTrue(ret == rec);
assertTrue(ret.getReadName().equals(rec.getReadName()));
}

View File

@ -32,7 +32,7 @@ public class ReadUtilsUnitTest extends BaseTest {
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_QUALITY_TAG, REDUCED_READ_COUNTS);
}
private void testReadBasesAndQuals(SAMRecord read, int expectedStart, int expectedStop) {
private void testReadBasesAndQuals(GATKSAMRecord read, int expectedStart, int expectedStop) {
SAMRecord clipped = ReadUtils.hardClipBases(read, expectedStart, expectedStop - 1, null);
String expectedBases = BASES.substring(expectedStart, expectedStop);
String expectedQuals = QUALS.substring(expectedStart, expectedStop);

View File

@ -26,9 +26,9 @@
package org.broadinstitute.sting.utils.clipreads;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@ -44,7 +44,7 @@ public class ReadClipperUnitTest extends BaseTest {
// TODO: Add error messages on failed tests
SAMRecord read, expected;
GATKSAMRecord read, expected;
ReadClipper readClipper;
final static String BASES = "ACTG";
final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
@ -65,7 +65,7 @@ public class ReadClipperUnitTest extends BaseTest {
logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(0,0), new SAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(0,0), new GATKSAMRecord(read.getHeader()));
//clip 1 base
expected = readClipper.hardClipBothEndsByReferenceCoordinates(0,3);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,3).getBytes());
@ -79,7 +79,7 @@ public class ReadClipperUnitTest extends BaseTest {
logger.warn("Executing testHardClipByReadCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReadCoordinates(0,3), new SAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipByReadCoordinates(0,3), new GATKSAMRecord(read.getHeader()));
//clip 1 base at start
expected = readClipper.hardClipByReadCoordinates(0,0);
@ -112,7 +112,7 @@ public class ReadClipperUnitTest extends BaseTest {
logger.warn("Executing testHardClipByReferenceCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(1,4), new SAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(1,4), new GATKSAMRecord(read.getHeader()));
//clip 1 base at start
expected = readClipper.hardClipByReferenceCoordinates(-1,1);
@ -145,7 +145,7 @@ public class ReadClipperUnitTest extends BaseTest {
logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(4), new SAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(4), new GATKSAMRecord(read.getHeader()));
//clip 1 base at start
expected = readClipper.hardClipByReferenceCoordinatesLeftTail(1);
@ -166,7 +166,7 @@ public class ReadClipperUnitTest extends BaseTest {
logger.warn("Executing testHardClipByReferenceCoordinatesRightTail");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(1), new SAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(1), new GATKSAMRecord(read.getHeader()));
//clip 1 base at end
expected = readClipper.hardClipByReferenceCoordinatesRightTail(3);
@ -188,7 +188,7 @@ public class ReadClipperUnitTest extends BaseTest {
//Clip whole read
Assert.assertEquals(readClipper.hardClipLowQualEnds((byte)64), new SAMRecord(read.getHeader()));
Assert.assertEquals(readClipper.hardClipLowQualEnds((byte)64), new GATKSAMRecord(read.getHeader()));
//clip 1 base at start
expected = readClipper.hardClipLowQualEnds((byte)34);

View File

@ -25,12 +25,12 @@
package org.broadinstitute.sting.utils.fragments;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
@ -52,16 +52,16 @@ public class FragmentUtilsUnitTest extends BaseTest {
boolean leftIsFirst, boolean leftIsNegative) {
super(FragmentUtilsTest.class, String.format("%s-leftIsFirst:%b-leftIsNegative:%b", name, leftIsFirst, leftIsNegative));
List<SAMRecord> pair = ArtificialSAMUtils.createPair(header, "readpair", readLen, leftStart, rightStart, leftIsFirst, leftIsNegative);
SAMRecord left = pair.get(0);
SAMRecord right = pair.get(1);
List<GATKSAMRecord> pair = ArtificialSAMUtils.createPair(header, "readpair", readLen, leftStart, rightStart, leftIsFirst, leftIsNegative);
GATKSAMRecord left = pair.get(0);
GATKSAMRecord right = pair.get(1);
for ( int pos = leftStart; pos < rightStart + readLen; pos++) {
boolean posCoveredByLeft = pos >= left.getAlignmentStart() && pos <= left.getAlignmentEnd();
boolean posCoveredByRight = pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd();
if ( posCoveredByLeft || posCoveredByRight ) {
List<SAMRecord> reads = new ArrayList<SAMRecord>();
List<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
List<Integer> offsets = new ArrayList<Integer>();
if ( posCoveredByLeft ) {
@ -89,9 +89,9 @@ public class FragmentUtilsUnitTest extends BaseTest {
private class TestState {
int expectedSingletons, expectedPairs;
ReadBackedPileup pileup;
List<SAMRecord> rawReads;
List<GATKSAMRecord> rawReads;
private TestState(final int expectedSingletons, final int expectedPairs, final ReadBackedPileup pileup, final List<SAMRecord> rawReads) {
private TestState(final int expectedSingletons, final int expectedPairs, final ReadBackedPileup pileup, final List<GATKSAMRecord> rawReads) {
this.expectedSingletons = expectedSingletons;
this.expectedPairs = expectedPairs;
this.pileup = pileup;
@ -131,7 +131,7 @@ public class FragmentUtilsUnitTest extends BaseTest {
@Test(enabled = true, dataProvider = "fragmentUtilsTest")
public void testAsListOfReadsFromPileup(FragmentUtilsTest test) {
for ( TestState testState : test.statesForPileup ) {
FragmentCollection<SAMRecord> fp = FragmentUtils.create(testState.pileup.getReads());
FragmentCollection<GATKSAMRecord> fp = FragmentUtils.create(testState.pileup.getReads());
Assert.assertEquals(fp.getOverlappingPairs().size(), testState.expectedPairs);
Assert.assertEquals(fp.getSingletonReads().size(), testState.expectedSingletons);
}
@ -140,7 +140,7 @@ public class FragmentUtilsUnitTest extends BaseTest {
@Test(enabled = true, dataProvider = "fragmentUtilsTest")
public void testAsListOfReads(FragmentUtilsTest test) {
for ( TestState testState : test.statesForReads ) {
FragmentCollection<SAMRecord> fp = FragmentUtils.create(testState.rawReads);
FragmentCollection<GATKSAMRecord> fp = FragmentUtils.create(testState.rawReads);
Assert.assertEquals(fp.getOverlappingPairs().size(), testState.expectedPairs);
Assert.assertEquals(fp.getSingletonReads().size(), testState.expectedSingletons);
}
@ -148,10 +148,10 @@ public class FragmentUtilsUnitTest extends BaseTest {
@Test(enabled = true, expectedExceptions = IllegalArgumentException.class)
public void testOutOfOrder() {
final List<SAMRecord> pair = ArtificialSAMUtils.createPair(header, "readpair", 100, 1, 50, true, true);
final SAMRecord left = pair.get(0);
final SAMRecord right = pair.get(1);
final List<SAMRecord> reads = Arrays.asList(right, left); // OUT OF ORDER!
final List<GATKSAMRecord> pair = ArtificialSAMUtils.createPair(header, "readpair", 100, 1, 50, true, true);
final GATKSAMRecord left = pair.get(0);
final GATKSAMRecord right = pair.get(1);
final List<GATKSAMRecord> reads = Arrays.asList(right, left); // OUT OF ORDER!
final List<Integer> offsets = Arrays.asList(0, 50);
final ReadBackedPileup pileup = new ReadBackedPileupImpl(null, reads, offsets);
FragmentUtils.create(pileup); // should throw exception

View File

@ -26,7 +26,7 @@ package org.broadinstitute.sting.utils.pileup;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
@ -50,27 +50,25 @@ public class ReadBackedPileupUnitTest {
header.addReadGroup(readGroupOne);
header.addReadGroup(readGroupTwo);
SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10);
GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10);
read1.setAttribute("RG",readGroupOne.getId());
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
read2.setAttribute("RG",readGroupTwo.getId());
SAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,10);
GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,10);
read3.setAttribute("RG",readGroupOne.getId());
SAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,10);
GATKSAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,10);
read4.setAttribute("RG",readGroupTwo.getId());
SAMRecord read5 = ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,10);
GATKSAMRecord read5 = ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,10);
read5.setAttribute("RG",readGroupTwo.getId());
SAMRecord read6 = ArtificialSAMUtils.createArtificialRead(header,"read6",0,1,10);
GATKSAMRecord read6 = ArtificialSAMUtils.createArtificialRead(header,"read6",0,1,10);
read6.setAttribute("RG",readGroupOne.getId());
SAMRecord read7 = ArtificialSAMUtils.createArtificialRead(header,"read7",0,1,10);
GATKSAMRecord read7 = ArtificialSAMUtils.createArtificialRead(header,"read7",0,1,10);
read7.setAttribute("RG",readGroupOne.getId());
ReadBackedPileup pileup = new ReadBackedPileupImpl(null,
Arrays.asList(read1,read2,read3,read4,read5,read6,read7),
Arrays.asList(1,1,1,1,1,1,1));
ReadBackedPileup pileup = new ReadBackedPileupImpl(null, Arrays.asList(read1,read2,read3,read4,read5,read6,read7), Arrays.asList(1,1,1,1,1,1,1));
ReadBackedPileup rg1Pileup = pileup.getPileupForReadGroup("rg1");
List<SAMRecord> rg1Reads = rg1Pileup.getReads();
List<GATKSAMRecord> rg1Reads = rg1Pileup.getReads();
Assert.assertEquals(rg1Reads.size(), 4, "Wrong number of reads in read group rg1");
Assert.assertEquals(rg1Reads.get(0), read1, "Read " + read1.getReadName() + " should be in rg1 but isn't");
Assert.assertEquals(rg1Reads.get(1), read3, "Read " + read3.getReadName() + " should be in rg1 but isn't");
@ -78,7 +76,7 @@ public class ReadBackedPileupUnitTest {
Assert.assertEquals(rg1Reads.get(3), read7, "Read " + read7.getReadName() + " should be in rg1 but isn't");
ReadBackedPileup rg2Pileup = pileup.getPileupForReadGroup("rg2");
List<SAMRecord> rg2Reads = rg2Pileup.getReads();
List<GATKSAMRecord> rg2Reads = rg2Pileup.getReads();
Assert.assertEquals(rg2Reads.size(), 3, "Wrong number of reads in read group rg2");
Assert.assertEquals(rg2Reads.get(0), read2, "Read " + read2.getReadName() + " should be in rg2 but isn't");
Assert.assertEquals(rg2Reads.get(1), read4, "Read " + read4.getReadName() + " should be in rg2 but isn't");
@ -92,16 +90,16 @@ public class ReadBackedPileupUnitTest {
public void testSplitByNullReadGroups() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1,1,1000);
SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10);
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
SAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,10);
GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10);
GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,10);
ReadBackedPileup pileup = new ReadBackedPileupImpl(null,
Arrays.asList(read1,read2,read3),
Arrays.asList(1,1,1));
ReadBackedPileup nullRgPileup = pileup.getPileupForReadGroup(null);
List<SAMRecord> nullRgReads = nullRgPileup.getReads();
List<GATKSAMRecord> nullRgReads = nullRgPileup.getReads();
Assert.assertEquals(nullRgPileup.getNumberOfElements(), 3, "Wrong number of reads in null read group");
Assert.assertEquals(nullRgReads.get(0), read1, "Read " + read1.getReadName() + " should be in null rg but isn't");
Assert.assertEquals(nullRgReads.get(1), read2, "Read " + read2.getReadName() + " should be in null rg but isn't");
@ -125,13 +123,13 @@ public class ReadBackedPileupUnitTest {
header.addReadGroup(readGroupOne);
header.addReadGroup(readGroupTwo);
SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10);
GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10);
read1.setAttribute("RG",readGroupOne.getId());
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
read2.setAttribute("RG",readGroupTwo.getId());
SAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,10);
GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,10);
read3.setAttribute("RG",readGroupOne.getId());
SAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,10);
GATKSAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,10);
read4.setAttribute("RG",readGroupTwo.getId());
ReadBackedPileupImpl sample1Pileup = new ReadBackedPileupImpl(null,
@ -147,14 +145,14 @@ public class ReadBackedPileupUnitTest {
ReadBackedPileup compositePileup = new ReadBackedPileupImpl(null,sampleToPileupMap);
ReadBackedPileup rg1Pileup = compositePileup.getPileupForReadGroup("rg1");
List<SAMRecord> rg1Reads = rg1Pileup.getReads();
List<GATKSAMRecord> rg1Reads = rg1Pileup.getReads();
Assert.assertEquals(rg1Reads.size(), 2, "Wrong number of reads in read group rg1");
Assert.assertEquals(rg1Reads.get(0), read1, "Read " + read1.getReadName() + " should be in rg1 but isn't");
Assert.assertEquals(rg1Reads.get(1), read3, "Read " + read3.getReadName() + " should be in rg1 but isn't");
ReadBackedPileup rg2Pileup = compositePileup.getPileupForReadGroup("rg2");
List<SAMRecord> rg2Reads = rg2Pileup.getReads();
List<GATKSAMRecord> rg2Reads = rg2Pileup.getReads();
Assert.assertEquals(rg1Reads.size(), 2, "Wrong number of reads in read group rg2");
Assert.assertEquals(rg2Reads.get(0), read2, "Read " + read2.getReadName() + " should be in rg2 but isn't");
@ -175,9 +173,9 @@ public class ReadBackedPileupUnitTest {
header.addReadGroup(readGroupOne);
header.addReadGroup(readGroupTwo);
SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10);
GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10);
read1.setAttribute("RG",readGroupOne.getId());
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
read2.setAttribute("RG",readGroupTwo.getId());
Map<String,ReadBackedPileupImpl> sampleToPileupMap = new HashMap<String,ReadBackedPileupImpl>();