diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java index c6755e878..a342cf932 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java +++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.alignment; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; import org.broadinstitute.sting.alignment.bwa.BWTFiles; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; @@ -35,6 +34,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Iterator; @@ -72,12 +72,13 @@ public class AlignmentValidationWalker extends ReadWalker { /** * Aligns a read to the given reference. + * * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null. * @param read Read to align. * @return Number of reads aligned by this map (aka 1). */ @Override - public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { + public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) { //logger.info(String.format("examining read %s", read.getReadName())); byte[] bases = read.getReadBases(); diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java index 7064e637f..c8554573b 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java +++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java @@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.File; @@ -92,12 +93,13 @@ public class AlignmentWalker extends ReadWalker { /** * Aligns a read to the given reference. + * * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null. * @param read Read to align. * @return Number of alignments found for this read. */ @Override - public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { + public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) { SAMRecord alignedRead = aligner.align(read,header); out.addAlignment(alignedRead); return 1; diff --git a/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java index 57d92319f..d91b83e7a 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java +++ b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.alignment; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; import org.broadinstitute.sting.alignment.bwa.BWTFiles; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; @@ -34,6 +33,7 @@ import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.PrintStream; import java.util.Iterator; @@ -79,12 +79,13 @@ public class CountBestAlignmentsWalker extends ReadWalker { /** * Aligns a read to the given reference. + * * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null. * @param read Read to align. * @return Number of alignments found for this read. */ @Override - public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { + public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) { Iterator alignmentIterator = aligner.getAllAlignments(read.getReadBases()).iterator(); if(alignmentIterator.hasNext()) { int numAlignments = alignmentIterator.next().length; diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java index 0161c3ab2..57416d111 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java @@ -25,12 +25,12 @@ package org.broadinstitute.sting.gatk.contexts; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.List; @@ -130,7 +130,7 @@ public class AlignmentContext implements HasGenomeLocation { */ @Deprecated //todo: unsafe and tailored for current usage only; both pileups can be null or worse, bot can be not null in theory - public List getReads() { return ( basePileup.getReads() ); } + public List getReads() { return ( basePileup.getReads() ); } /** * Are there any reads associated with this locus? diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java index e92599494..a6731ee18 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Collections; import java.util.List; @@ -132,7 +132,7 @@ public class AllLocusView extends LocusView { * @param site Site at which to create the blank locus context. * @return empty context. */ - private final static List EMPTY_PILEUP_READS = Collections.emptyList(); + private final static List EMPTY_PILEUP_READS = Collections.emptyList(); private final static List EMPTY_PILEUP_OFFSETS = Collections.emptyList(); private AlignmentContext createEmptyLocus( GenomeLoc site ) { return new AlignmentContext(site,new ReadBackedPileupImpl(site, EMPTY_PILEUP_READS, EMPTY_PILEUP_OFFSETS)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index 2f3652d6a..ee3ea63eb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -45,6 +45,7 @@ import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileupImpl; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import java.util.*; @@ -377,10 +378,7 @@ public class LocusIteratorByState extends LocusIterator { maxDeletionLength = Math.max(maxDeletionLength,state.getEventLength()); } else nInsertions++; - indelPile.add ( new ExtendedEventPileupElement(state.getRead(), - state.getReadEventStartOffset(), - state.getEventLength(), - state.getEventBases()) ); + indelPile.add ( new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), state.getReadEventStartOffset(), state.getEventLength(), state.getEventBases()) ); } else { // HACK: The readahead mechanism for LocusIteratorByState will effectively read past the current position @@ -402,9 +400,7 @@ public class LocusIteratorByState extends LocusIterator { // we count such reads (with a longer deletion spanning over a deletion at the previous base we are // about to report) only if includeReadsWithDeletionAtLoci is true. size++; - indelPile.add ( new ExtendedEventPileupElement(state.getRead(), - state.getReadOffset()-1, - -1) // length=-1 --> noevent + indelPile.add ( new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), state.getReadOffset()-1, -1) // length=-1 --> noevent ); } } @@ -442,12 +438,12 @@ public class LocusIteratorByState extends LocusIterator { continue; } else { //observed_bases++; - pile.add(new PileupElement(state.getRead(), state.getReadOffset())); + pile.add(new PileupElement((GATKSAMRecord) state.getRead(), state.getReadOffset())); size++; } } else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) { size++; - pile.add(new PileupElement(state.getRead(), -1)); + pile.add(new PileupElement((GATKSAMRecord) state.getRead(), -1)); nDeletions++; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java index 046003154..3f349d86d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.gatk.walkers.DuplicateWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.*; @@ -57,9 +58,9 @@ public class TraverseDuplicates extends TraversalEngine readsAtLoc(final SAMRecord read, PushbackIterator iter) { + private List readsAtLoc(final GATKSAMRecord read, PushbackIterator iter) { GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read); - ArrayList l = new ArrayList(); + ArrayList l = new ArrayList(); l.add(read); for (SAMRecord read2 : iter) { @@ -70,7 +71,7 @@ public class TraverseDuplicates extends TraversalEngine extends TraversalEngine> uniqueReadSets(List reads) { - Set> readSets = new LinkedHashSet>(); + protected Set> uniqueReadSets(List reads) { + Set> readSets = new LinkedHashSet>(); // for each read, find duplicates, and either add the read to its duplicate list or start a new one - for ( SAMRecord read : reads ) { - List readSet = findDuplicateReads(read, readSets); + for ( GATKSAMRecord read : reads ) { + List readSet = findDuplicateReads(read, readSets); if ( readSet == null ) { - readSets.add(new ArrayList(Arrays.asList(read))); // copy so I can add to the list + readSets.add(new ArrayList(Arrays.asList(read))); // copy so I can add to the list } else { readSet.add(read); } @@ -110,13 +111,13 @@ public class TraverseDuplicates extends TraversalEngine findDuplicateReads(SAMRecord read, Set> readSets ) { + protected List findDuplicateReads(GATKSAMRecord read, Set> readSets ) { if ( read.getReadPairedFlag() ) { // paired final GenomeLoc readMateLoc = engine.getGenomeLocParser().createGenomeLoc(read.getMateReferenceName(), read.getMateAlignmentStart(), read.getMateAlignmentStart()); - for (List reads : readSets) { - SAMRecord key = reads.get(0); + for (List reads : readSets) { + GATKSAMRecord key = reads.get(0); // read and key start at the same place, and either the this read and the key // share a mate location or the read is flagged as a duplicate @@ -131,8 +132,8 @@ public class TraverseDuplicates extends TraversalEngine reads : readSets) { - SAMRecord key = reads.get(0); + for (List reads : readSets) { + GATKSAMRecord key = reads.get(0); boolean v = (! key.getReadPairedFlag()) && read.getAlignmentStart() == key.getAlignmentStart() && ( key.getDuplicateReadFlag() || read.getDuplicateReadFlag() ) && read.getReadLength() == key.getReadLength(); //System.out.printf("%s %s %b %b %d %d %d %d => %b%n", // read.getReadPairedFlag(), key.getReadPairedFlag(), read.getDuplicateReadFlag(), key.getDuplicateReadFlag(), @@ -179,7 +180,7 @@ public class TraverseDuplicates extends TraversalEngine> readSets = uniqueReadSets(readsAtLoc(read, iter)); + Set> readSets = uniqueReadSets(readsAtLoc((GATKSAMRecord) read, iter)); if ( DEBUG ) logger.debug(String.format("*** TraverseDuplicates.traverse at %s with %d read sets", site, readSets.size())); // Jump forward in the reference to this locus location diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java index 27bbd50d5..24b8ac986 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java @@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /* * Copyright (c) 2009 The Broad Institute @@ -100,9 +101,9 @@ public class TraverseReads extends TraversalEngine,Read // if the read is mapped, create a metadata tracker ReadMetaDataTracker tracker = (read.getReferenceIndex() >= 0) ? rodView.getReferenceOrderedDataForRead(read) : null; - final boolean keepMeP = walker.filter(refContext, read); + final boolean keepMeP = walker.filter(refContext, (GATKSAMRecord) read); if (keepMeP) { - M x = walker.map(refContext, read, tracker); // the tracker can be null + M x = walker.map(refContext, (GATKSAMRecord) read, tracker); // the tracker can be null sum = walker.reduce(x, sum); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java index 6989f45b2..d1148cbd5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java @@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.picard.reference.ReferenceSequence; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.picard.reference.ReferenceSequenceFileFactory; -import net.sf.samtools.SAMRecord; import net.sf.samtools.util.StringUtil; import org.broadinstitute.sting.commandline.Advanced; import org.broadinstitute.sting.commandline.Argument; @@ -43,6 +42,7 @@ import org.broadinstitute.sting.utils.clipreads.ClippingOp; import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation; import org.broadinstitute.sting.utils.clipreads.ReadClipper; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import java.io.File; @@ -292,11 +292,12 @@ public class ClipReadsWalker extends ReadWalker strandAwarePositions(SAMRecord read, int start, int stop) { + private Pair strandAwarePositions(GATKSAMRecord read, int start, int stop) { if (read.getReadNegativeStrandFlag()) return new Pair(read.getReadLength() - stop - 1, read.getReadLength() - start - 1); else @@ -374,7 +375,7 @@ public class ClipReadsWalker extends ReadWalker p : cyclesToClip) { // iterate over each cycle range @@ -416,7 +417,7 @@ public class ClipReadsWalker extends ReadWalker clipSeqs) { + public ReadClipperWithData(GATKSAMRecord read, List clipSeqs) { super(read); data = new ClippingData(clipSeqs); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java index e2db1dc52..905ecf273 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter; import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.List; import java.util.Set; @@ -20,11 +20,11 @@ import java.util.Set; @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class}) public abstract class DuplicateWalker extends Walker { // Do we actually want to operate on the context? - public boolean filter(GenomeLoc loc, AlignmentContext context, Set> readSets ) { + public boolean filter(GenomeLoc loc, AlignmentContext context, Set> readSets ) { return true; // We are keeping all the reads } - public abstract MapType map(GenomeLoc loc, AlignmentContext context, Set> readSets ); + public abstract MapType map(GenomeLoc loc, AlignmentContext context, Set> readSets ); // Given result of map function public abstract ReduceType reduceInit(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java index 56287df31..7f9269725 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.text.XReadLines; import java.io.File; @@ -71,21 +72,23 @@ public class FindReadsWithNamesWalker extends ReadWalker { private FlagStat myStat = new FlagStat(); - public Integer map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) { + public Integer map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) { myStat.readCount++; if (read.getReadFailsVendorQualityCheckFlag()) { myStat.QC_failure++; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index 4f072e88c..ac69738d3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -40,6 +40,7 @@ import java.util.TreeSet; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /** * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file. @@ -136,11 +137,12 @@ public class PrintReadsWalker extends ReadWalker { /** * The reads filter function. + * * @param ref the reference bases that correspond to our read, if a reference was provided * @param read the read itself, as a SAMRecord * @return true if the read passes the filter, false if it doesn't */ - public boolean filter(ReferenceContext ref, SAMRecord read) { + public boolean filter(ReferenceContext ref, GATKSAMRecord read) { // check the read group if ( readGroup != null ) { SAMReadGroupRecord myReadGroup = read.getReadGroup(); @@ -180,11 +182,12 @@ public class PrintReadsWalker extends ReadWalker { /** * The reads map function. + * * @param ref the reference bases that correspond to our read, if a reference was provided * @param read the read itself, as a SAMRecord * @return the read itself */ - public SAMRecord map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) { + public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) { return read; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java index 35f04abfc..8933bd73e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /** * Created by IntelliJ IDEA. @@ -20,11 +21,11 @@ public abstract class ReadWalker extends Walker { } - public boolean filter(ReferenceContext ref, SAMRecord read) { + public boolean filter(ReferenceContext ref, GATKSAMRecord read) { return ( !read.getReadPairedFlag() || read.getReadPairedFlag() && read.getFirstOfPairFlag()); } @Override - public Integer map(ReferenceContext referenceContext, SAMRecord samRecord, ReadMetaDataTracker readMetaDataTracker) { + public Integer map(ReferenceContext referenceContext, GATKSAMRecord samRecord, ReadMetaDataTracker readMetaDataTracker) { GATKReportTable table = report.getTable("ReadLengthDistribution"); int length = Math.abs(samRecord.getReadLength()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java index adb7c4c38..3dd51fa7d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/ConstrainedMateFixingManager.java @@ -9,6 +9,7 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.*; @@ -180,8 +181,8 @@ public class ConstrainedMateFixingManager { addRead(newRead, readWasModified, true); } - public void addReads(List newReads, Set modifiedReads) { - for ( SAMRecord newRead : newReads ) + public void addReads(List newReads, Set modifiedReads) { + for ( GATKSAMRecord newRead : newReads ) addRead(newRead, modifiedReads.contains(newRead), false); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 85d151c19..60b1f8a88 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -47,6 +47,7 @@ import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.NWaySAMFileWriter; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.text.TextFormattingUtils; @@ -281,10 +282,10 @@ public class IndelRealigner extends ReadWalker { // the reads and known indels that fall into the current interval private final ReadBin readsToClean = new ReadBin(); - private final ArrayList readsNotToClean = new ArrayList(); + private final ArrayList readsNotToClean = new ArrayList(); private final ArrayList knownIndelsToTry = new ArrayList(); private final HashSet indelRodsSeen = new HashSet(); - private final HashSet readsActuallyCleaned = new HashSet(); + private final HashSet readsActuallyCleaned = new HashSet(); private static final int MAX_QUAL = 99; @@ -469,7 +470,7 @@ public class IndelRealigner extends ReadWalker { readsActuallyCleaned.clear(); } - public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { + public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) { if ( currentInterval == null ) { emit(read); return 0; @@ -535,7 +536,7 @@ public class IndelRealigner extends ReadWalker { // TODO -- it would be nice if we could use indels from 454 reads as alternate consenses } - private void cleanAndCallMap(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker, GenomeLoc readLoc) { + private void cleanAndCallMap(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker, GenomeLoc readLoc) { if ( readsToClean.size() > 0 ) { GenomeLoc earliestPossibleMove = getToolkit().getGenomeLocParser().createGenomeLoc(readsToClean.getReads().get(0)); if ( manager.canMoveReads(earliestPossibleMove) ) @@ -656,14 +657,14 @@ public class IndelRealigner extends ReadWalker { private void clean(ReadBin readsToClean) { - final List reads = readsToClean.getReads(); + final List reads = readsToClean.getReads(); if ( reads.size() == 0 ) return; byte[] reference = readsToClean.getReference(referenceReader); int leftmostIndex = readsToClean.getLocation().getStart(); - final ArrayList refReads = new ArrayList(); // reads that perfectly match ref + final ArrayList refReads = new ArrayList(); // reads that perfectly match ref final ArrayList altReads = new ArrayList(); // reads that don't perfectly match final LinkedList altAlignmentsToTest = new LinkedList(); // should we try to make an alt consensus from the read? final Set altConsenses = new LinkedHashSet(); // list of alt consenses @@ -815,7 +816,7 @@ public class IndelRealigner extends ReadWalker { // however we don't have enough info to use the proper MAQ scoring system. // For now, we will just arbitrarily add 10 to the mapping quality. [EB, 6/7/2010]. // TODO -- we need a better solution here - SAMRecord read = aRead.getRead(); + GATKSAMRecord read = aRead.getRead(); read.setMappingQuality(Math.min(aRead.getRead().getMappingQuality() + 10, 254)); // before we fix the attribute tags we first need to make sure we have enough of the reference sequence @@ -874,8 +875,8 @@ public class IndelRealigner extends ReadWalker { } } - private long determineReadsThatNeedCleaning(final List reads, - final ArrayList refReadsToPopulate, + private long determineReadsThatNeedCleaning(final List reads, + final ArrayList refReadsToPopulate, final ArrayList altReadsToPopulate, final LinkedList altAlignmentsToTest, final Set altConsenses, @@ -884,7 +885,7 @@ public class IndelRealigner extends ReadWalker { long totalRawMismatchSum = 0L; - for ( final SAMRecord read : reads ) { + for ( final GATKSAMRecord read : reads ) { // we can not deal with screwy records if ( read.getCigar().numCigarElements() == 0 ) { @@ -1372,7 +1373,7 @@ public class IndelRealigner extends ReadWalker { } private class AlignedRead { - private final SAMRecord read; + private final GATKSAMRecord read; private byte[] readBases = null; private byte[] baseQuals = null; private Cigar newCigar = null; @@ -1380,12 +1381,12 @@ public class IndelRealigner extends ReadWalker { private int mismatchScoreToReference = 0; private long alignerMismatchScore = 0; - public AlignedRead(SAMRecord read) { + public AlignedRead(GATKSAMRecord read) { this.read = read; mismatchScoreToReference = 0; } - public SAMRecord getRead() { + public GATKSAMRecord getRead() { return read; } @@ -1569,7 +1570,7 @@ public class IndelRealigner extends ReadWalker { private class ReadBin implements HasGenomeLocation { - private final ArrayList reads = new ArrayList(); + private final ArrayList reads = new ArrayList(); private byte[] reference = null; private GenomeLoc loc = null; @@ -1577,7 +1578,7 @@ public class IndelRealigner extends ReadWalker { // Return false if we can't process this read bin because the reads are not correctly overlapping. // This can happen if e.g. there's a large known indel with no overlapping reads. - public void add(SAMRecord read) { + public void add(GATKSAMRecord read) { GenomeLoc locForRead = getToolkit().getGenomeLocParser().createGenomeLoc(read); if ( loc == null ) @@ -1588,7 +1589,7 @@ public class IndelRealigner extends ReadWalker { reads.add(read); } - public List getReads() { return reads; } + public List getReads() { return reads; } public byte[] getReference(IndexedFastaSequenceFile referenceReader) { // set up the reference if we haven't done so yet diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java index 17d5a8e9b..7490262f2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/LeftAlignIndels.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /** @@ -88,7 +89,7 @@ public class LeftAlignIndels extends ReadWalker { writer.addAlignment(read); } - public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { + public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) { // we can not deal with screwy records if ( read.getCigar().numCigarElements() == 0 ) { emit(read); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index 50ec9fe74..414ffa09c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -57,6 +57,7 @@ import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.interval.OverlappingIntervalIterator; import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -394,7 +395,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { @Override - public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { + public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) { // if ( read.getReadName().equals("428EFAAXX090610:2:36:1384:639#0") ) System.out.println("GOT READ"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMalesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMalesWalker.java index 24c06d101..dbbd8e761 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMalesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMalesWalker.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.walkers.qc; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.samples.Gender; @@ -32,6 +31,7 @@ import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /** * Walks over the input data set, calculating the number of reads seen for diagnostic purposes. @@ -40,7 +40,7 @@ import org.broadinstitute.sting.gatk.walkers.Requires; */ @Requires({DataSource.READS, DataSource.REFERENCE}) public class CountMalesWalker extends ReadWalker { - public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) { + public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) { Sample sample = getSampleDB().getSample(read); return sample.getGender() == Gender.MALE ? 1 : 0; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java index 9ce9c4eec..b5a2d183f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.qc; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /** * Walks over the input data set, calculating the number of reads seen for diagnostic purposes. @@ -38,7 +38,7 @@ import org.broadinstitute.sting.gatk.walkers.Requires; */ @Requires({DataSource.READS, DataSource.REFERENCE}) public class CountReadsWalker extends ReadWalker { - public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) { + public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) { return 1; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java index b5f5442cd..1cb1579d0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CycleQualityWalker.java @@ -1,434 +1,434 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.qc; - -import net.sf.samtools.SAMReadGroupRecord; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.utils.collections.PrimitivePair; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.sam.AlignmentUtils; - -import java.io.*; -import java.util.*; - -/** - * Created by IntelliJ IDEA. - * User: asivache - * Date: Apr 9, 2010 - * Time: 12:16:41 PM - * To change this template use File | Settings | File Templates. - */ - -/** - * Walks over the input data set, calculating the number of reads seen for diagnostic purposes. - * Can also count the number of reads matching a given criterion using read filters (see the - * --read-filter command line argument). Simplest example of a read-backed analysis. - */ -@Requires({DataSource.READS}) -public class CycleQualityWalker extends ReadWalker { - @Output - protected PrintStream out; - - @Argument(fullName="mappedOnly", shortName="mo", doc="when this flag is set (default), statistics will be collected "+ - "on mapped reads only, while unmapped reads will be discarded", required=false) - protected boolean MAPPED_ONLY = true; - @Argument(fullName="maxReadLength", shortName="rl", doc="maximum read length", required=false) - protected int MAX_READ_LENGTH = 500; - @Argument(fullName="out_prefix",shortName="p",doc="prefix for output report and statistics files",required=true) - protected String PREFIX = null; -// @Argument(fullName="html",shortName="html",doc="produce html-formatted output (starting with h3-level tags) rather than plain text",required=false) - protected boolean HTML = false; - @Argument(fullName="qualThreshold", shortName="Q",doc="flag as problematic all cycles with av. qualities below the threshold (applies only to the generated report)",required=false) - protected double QTHRESHOLD = 10.0; - @Argument(fullName="useBothQualities",shortName="bothQ",required=false,doc="Generate statistics both for currently set and for "+ - "original base qualities (OQ tag, must be present in the bam); two separate data files will be generated.") - protected boolean ASSESS_BOTH_QUALS = false; - - private Map cyclesByLaneMap = null; - private Map cyclesByLibraryMap = null; - private Map cyclesByLaneMapOrig = null; - private Map cyclesByLibraryMapOrig = null; - - public void initialize() { - if ( PREFIX == null ) throw new ReviewedStingException("Prefix for output file(s) must be specified"); - cyclesByLaneMap = new HashMap(); - cyclesByLibraryMap = new HashMap(); - cyclesByLaneMapOrig = new HashMap(); - cyclesByLibraryMapOrig = new HashMap(); - } - - - public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { - - if ( AlignmentUtils.isReadUnmapped(read) ) return 0; - - SAMReadGroupRecord rg = read.getReadGroup(); - - if ( rg == null ) throw new UserException.ReadMissingReadGroup(read); - - String lane = read.getReadGroup().getPlatformUnit(); - String library = read.getReadGroup().getLibrary(); - - if ( lane == null ) throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has no platform unit information"); - if ( library == null ) throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has no library information"); - - int end = 0; - - if ( read.getReadPairedFlag() ) { - - if ( read.getFirstOfPairFlag() ) { - if ( read.getSecondOfPairFlag() ) - throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes"); - end = 1; - } else { - if ( ! read.getSecondOfPairFlag() ) - throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes"); - end = 2; - } - } - - CycleStats[] byLane = cyclesByLaneMap.get(lane); - CycleStats[] byLib = cyclesByLibraryMap.get(library); - - //byte [] quals = USE_ORIGINAL_QUALS ? AlignmentUtils.getOriginalQualsInCycleOrder(read) : AlignmentUtils.getQualsInCycleOrder(read); - - byte [] quals = AlignmentUtils.getQualsInCycleOrder(read); - - // if end == 0 (single end lane), we allocate array of length 1, otherwise we need two - // elements in the array in order to be able to collect statistics for each end in the pair independently - if ( byLane == null ) cyclesByLaneMap.put(lane,byLane = new CycleStats[(end==0?1:2)]); - if ( byLib == null ) cyclesByLibraryMap.put(library, byLib =new CycleStats[2]); - - if ( end != 0 ) end--; // we will now use 'end' as index into the array of stats - - if ( byLane[end] == null ) byLane[end] = new CycleStats(MAX_READ_LENGTH); - if ( byLib[end] == null ) byLib[end] =new CycleStats(MAX_READ_LENGTH); - byLane[end].add(quals); - byLib[end].add(quals); - - return 1; //To change body of implemented methods use File | Settings | File Templates. - } - - /** - * Provide an initial value for reduce computations. - * - * @return Initial value of reduce. - */ - public Integer reduceInit() { - return 0; //To change body of implemented methods use File | Settings | File Templates. - } - - /** - * Reduces a single map with the accumulator provided as the ReduceType. - * - * @param value result of the map. - * @param sum accumulator for the reduce. - * @return accumulator with result of the map taken into account. - */ - public Integer reduce(Integer value, Integer sum) { - return sum.intValue()+value.intValue(); //To change body of implemented methods use File | Settings | File Templates. - } - - public void onTraversalDone(Integer result) { - if ( HTML ) { - out.println("

Cycle Quality QC

\n"); - out.println("File(s) analyzed:
"); - for ( String fileName : getToolkit().getArguments().samFiles) out.println(fileName+"
"); - out.println("
"); - } - if ( HTML ) out.println("

"); - out.println("\n"+result+" reads analyzed\n"); - if ( HTML ) out.println("

"); - out.println("by platform unit:"); - if ( HTML ) out.println("
"); - report2(cyclesByLaneMap, new File(PREFIX+".byLane.txt"),true); - out.println(); - if ( HTML ) out.println("

"); - out.println("\nby library:"); - if ( HTML ) out.println("
"); - report2(cyclesByLibraryMap, new File(PREFIX+".byLibrary.txt"),true); - out.println(); - if ( HTML ) out.println("

"); - } - - - - private void report2(Map m, File f,boolean summaryReport) { - long totalReads_1 =0; - long totalReads_2 =0; - long totalReads_unpaired = 0; - SortedSet columns = new TreeSet(); - int maxLength = 0; // maximum read length across all lanes/read ends analyzed - - for( Map.Entry e : m.entrySet() ) { - if ( e.getValue()[0].getMaxReadLength() > maxLength ) maxLength = e.getValue()[0].getMaxReadLength(); - - if ( e.getValue().length == 1 || e.getValue().length == 2 && e.getValue()[1] == null ) { - totalReads_unpaired += e.getValue()[0].getReadCount(); // single end lane - } else { - totalReads_1 += e.getValue()[0].getReadCount(); - totalReads_2 += e.getValue()[1].getReadCount(); - if ( e.getValue()[1].getMaxReadLength() > maxLength ) maxLength = e.getValue()[1].getMaxReadLength(); - } - - columns.add(e.getKey()); - } - - if ( summaryReport ) { - if ( totalReads_1 == 0 && totalReads_2 != 0) { - out.println(" End 1: No reads"); - if ( HTML ) out.println("
"); - } - if ( totalReads_2 == 0 && totalReads_1 != 0 ) { - out.println(" End 2: No reads"); - if ( HTML ) out.println("
"); - } - if ( totalReads_1 == 0 && totalReads_2 == 0 && totalReads_unpaired == 0 ) { - out.println(" No reads found."); - if ( HTML ) out.println("
"); - } - } - - if ( totalReads_1 == 0 && totalReads_2 == 0 && totalReads_unpaired == 0 ) return; - - try { - BufferedWriter w = new BufferedWriter(new FileWriter(f)); - - w.write("cycle"); - - for( String col : columns ) { - CycleStats[] data = m.get(col); - if ( summaryReport ) { - out.print(" "); - out.print(col); - } - - CycleStats end1 = data[0]; - int minL = ( end1 == null ? 0 : end1.getMinReadLength() ); - int maxL = ( end1 == null ? 0 : end1.getMaxReadLength() ); - - if ( data.length == 2 && data[1] != null ) { - if ( summaryReport ) { - out.println(": paired"); - if ( HTML ) out.println("
"); - out.println(" Reads analyzed:"); - if ( HTML ) out.println("
"); - } - CycleStats end2 = data[1]; - - out.print( " End 1: "+ ( end1 == null ? 0 : end1.getReadCount()) ); - if ( minL == maxL ) out.println("; read length = "+minL); - else out.println("; WARNING: variable read length = "+minL+"-"+maxL); - if ( HTML ) out.println("
"); - - out.print( " End 2: "+ ( end2 == null ? 0 : end2.getReadCount()) ); - minL = ( end2 == null ? 0 : end2.getMinReadLength() ); - maxL = ( end2 == null ? 0 : end2.getMaxReadLength() ); - if ( minL == maxL ) out.println("; read length = "+minL); - else out.println("; WARNING: variable read length = "+minL+"-"+maxL); - if ( HTML ) out.println("
"); - } - else { - out.println(": unpaired"); - if ( HTML ) out.println("
"); - out.print( " Reads analyzed: "+ ( end1 == null ? 0 : end1.getReadCount()) ); - if ( minL == maxL ) out.println("; read length = "+minL); - else out.println("; WARNING: variable read length = "+minL+"-"+maxL); - if ( HTML ) out.println("
"); - } - - w.write('\t') ; - w.write(col); - if ( data.length == 1 || data.length == 2 && data[1] == null ) { - w.write(".unpaired"); - w.write('\t'); - w.write(col); - w.write(".unpaired.stddev"); - } else { - w.write(".end1"); - w.write('\t'); - w.write(col); - w.write(".end1.stddev"); - w.write('\t') ; - w.write(col); - w.write(".end2"); - w.write('\t'); - w.write(col); - w.write(".end2.stddev"); - } - } - - w.write('\n'); - - int cycle = 0; - - Map> problems = new HashMap>(); - - while ( cycle < maxLength ) { - w.write(Integer.toString(cycle+1)); - for ( String col : columns ) { - - CycleStats[] data = m.get(col); - CycleStats end1 = data[0]; - w.write('\t'); - if ( end1 == null || cycle >= end1.getMaxReadLength() ) w.write(".\t."); - else { - double aq = end1.getCycleQualAverage(cycle); - w.write(String.format("%.4f\t%.4f",aq,end1.getCycleQualStdDev(cycle))); - recordProblem(aq,cycle, problems,col+".End1"); - } - if ( data.length > 1 && data[1] != null ) { - w.write('\t'); - CycleStats end2 = data[1]; - if ( end2 == null || cycle >= end2.getMaxReadLength() ) w.write(".\t."); - else { - double aq = end2.getCycleQualAverage(cycle); - w.write(String.format("%.4f\t%.4f",aq,end2.getCycleQualStdDev(cycle))); - recordProblem(aq,cycle, problems,col+".End2"); - } - } - } - w.write('\n'); - cycle++; - } - w.close(); - - if ( HTML ) out.println("
"); - - if ( HTML ) out.println("
"); - out.println("\nOUTCOME (threshold at Q="+QTHRESHOLD+"):"); - if ( HTML ) out.println("
"); - for ( String col : columns ) { - List lp = problems.get(col+".End1"); - out.print(" "+col+" End1:"); - if ( lp == null ) { - out.print(" GOOD"); - } else { - for ( PrimitivePair.Int p : lp ) { - out.print(" "+(p.first+1)+"-"); - if ( p.second >= 0 ) out.print((p.second+1)); - else out.print("END"); - } - } - out.println(); - if ( HTML ) out.println("
"); - - lp = problems.get(col+".End2"); - out.print(" "+col+" End2:"); - if ( lp == null ) { - out.print(" GOOD"); - } else { - for ( PrimitivePair.Int p : lp ) { - out.print(" "+(p.first+1)+"-"); - if ( p.second >= 0 ) out.print(p.second); - else out.print("END"); - } - } - out.println(); - if ( HTML ) out.println("
"); - } - - } catch (IOException ioe) { - throw new UserException.CouldNotCreateOutputFile(f, "Failed to write report", ioe); - } - } - - - private void recordProblem(double q, int cycle, Map> problems, String name) { - - PrimitivePair.Int p = null; - List lp = null; - if ( q < QTHRESHOLD ) { // there is a problem - if ( ! problems.containsKey(name) ) { - lp = new ArrayList(); - p = new PrimitivePair.Int(cycle,-1); - lp.add(p); - problems.put(name,lp); - } else { - lp = problems.get(name); - p = lp.get(lp.size()-1); - } - if ( p.second != -1 ) { // if we are not already inside a run of bad qual bases - lp.add(new PrimitivePair.Int(cycle,-1)); // start new run - } - } else { // good base - if ( problems.containsKey(name) ) { // only if we had problem intervals at all, we need to check if the last one needs to be closed - lp = problems.get(name); - p = lp.get(lp.size()-1); - if ( p.second == -1 ) p.second = cycle - 1; - } - } - } - - - static class CycleStats { - private long readCount = 0; - private double[] cycleQualsAv = null; - private double[] cycleQualsSd = null; - private int minL = 1000000000; // read min. length - private int maxL = 0; // read max. length - - public CycleStats(int N) { - readCount = 0; - cycleQualsAv = new double[N]; - cycleQualsSd = new double[N]; - } - - public void add(byte[] quals) { - if ( quals.length > cycleQualsAv.length ) - throw new UserException("A read of length "+quals.length+" encountered, which exceeds specified maximum read length"); - if ( quals.length > maxL ) maxL = quals.length; - if ( quals.length < minL ) minL = quals.length; - readCount++; - for ( int i = 0 ; i < quals.length ; i++ ) { - // NOTE: in the update equaltions below, there is no need to check if readCount == 1 (i.e. - // we are initializing with the very first record) or not. Indeed, the arrays are initialized with - // 0; when the very first value arrives, readCount is 1 and cycleQuals[i] gets set to quals[i] (correct!); - // this will also make the second term in the update equation for Sd (quals[i]-cycleQualsAv[i]) equal - // to 0, so Sd will be initially set to 0. - double oldAvg = cycleQualsAv[i]; // save old mean, will need it for calculation of the variance - cycleQualsAv[i] += ( quals[i] - cycleQualsAv[i] ) / readCount; // update mean - cycleQualsSd[i] += ( quals[i] - oldAvg ) * ( quals[i] - cycleQualsAv[i] ); - } - } - - public long getReadCount() { return readCount; } - public int getMaxReadLength() { return maxL; } - public int getMinReadLength() { return minL; } -// long [] getCycleQualSums() { return cycleQuals; } -// long getCycleQualSum(int i) { return cycleQuals[i]; } - double getCycleQualAverage(int i) { return cycleQualsAv[i]; } - double getCycleQualStdDev(int i) { return Math.sqrt( cycleQualsSd[i]/(readCount-1) ); } - } -} +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.qc; + +import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.utils.collections.PrimitivePair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +import java.io.*; +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: asivache + * Date: Apr 9, 2010 + * Time: 12:16:41 PM + * To change this template use File | Settings | File Templates. + */ + +/** + * Walks over the input data set, calculating the number of reads seen for diagnostic purposes. + * Can also count the number of reads matching a given criterion using read filters (see the + * --read-filter command line argument). Simplest example of a read-backed analysis. + */ +@Requires({DataSource.READS}) +public class CycleQualityWalker extends ReadWalker { + @Output + protected PrintStream out; + + @Argument(fullName="mappedOnly", shortName="mo", doc="when this flag is set (default), statistics will be collected "+ + "on mapped reads only, while unmapped reads will be discarded", required=false) + protected boolean MAPPED_ONLY = true; + @Argument(fullName="maxReadLength", shortName="rl", doc="maximum read length", required=false) + protected int MAX_READ_LENGTH = 500; + @Argument(fullName="out_prefix",shortName="p",doc="prefix for output report and statistics files",required=true) + protected String PREFIX = null; +// @Argument(fullName="html",shortName="html",doc="produce html-formatted output (starting with h3-level tags) rather than plain text",required=false) + protected boolean HTML = false; + @Argument(fullName="qualThreshold", shortName="Q",doc="flag as problematic all cycles with av. qualities below the threshold (applies only to the generated report)",required=false) + protected double QTHRESHOLD = 10.0; + @Argument(fullName="useBothQualities",shortName="bothQ",required=false,doc="Generate statistics both for currently set and for "+ + "original base qualities (OQ tag, must be present in the bam); two separate data files will be generated.") + protected boolean ASSESS_BOTH_QUALS = false; + + private Map cyclesByLaneMap = null; + private Map cyclesByLibraryMap = null; + private Map cyclesByLaneMapOrig = null; + private Map cyclesByLibraryMapOrig = null; + + public void initialize() { + if ( PREFIX == null ) throw new ReviewedStingException("Prefix for output file(s) must be specified"); + cyclesByLaneMap = new HashMap(); + cyclesByLibraryMap = new HashMap(); + cyclesByLaneMapOrig = new HashMap(); + cyclesByLibraryMapOrig = new HashMap(); + } + + + public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) { + + if ( AlignmentUtils.isReadUnmapped(read) ) return 0; + + SAMReadGroupRecord rg = read.getReadGroup(); + + if ( rg == null ) throw new UserException.ReadMissingReadGroup(read); + + String lane = read.getReadGroup().getPlatformUnit(); + String library = read.getReadGroup().getLibrary(); + + if ( lane == null ) throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has no platform unit information"); + if ( library == null ) throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has no library information"); + + int end = 0; + + if ( read.getReadPairedFlag() ) { + + if ( read.getFirstOfPairFlag() ) { + if ( read.getSecondOfPairFlag() ) + throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes"); + end = 1; + } else { + if ( ! read.getSecondOfPairFlag() ) + throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes"); + end = 2; + } + } + + CycleStats[] byLane = cyclesByLaneMap.get(lane); + CycleStats[] byLib = cyclesByLibraryMap.get(library); + + //byte [] quals = USE_ORIGINAL_QUALS ? AlignmentUtils.getOriginalQualsInCycleOrder(read) : AlignmentUtils.getQualsInCycleOrder(read); + + byte [] quals = AlignmentUtils.getQualsInCycleOrder(read); + + // if end == 0 (single end lane), we allocate array of length 1, otherwise we need two + // elements in the array in order to be able to collect statistics for each end in the pair independently + if ( byLane == null ) cyclesByLaneMap.put(lane,byLane = new CycleStats[(end==0?1:2)]); + if ( byLib == null ) cyclesByLibraryMap.put(library, byLib =new CycleStats[2]); + + if ( end != 0 ) end--; // we will now use 'end' as index into the array of stats + + if ( byLane[end] == null ) byLane[end] = new CycleStats(MAX_READ_LENGTH); + if ( byLib[end] == null ) byLib[end] =new CycleStats(MAX_READ_LENGTH); + byLane[end].add(quals); + byLib[end].add(quals); + + return 1; //To change body of implemented methods use File | Settings | File Templates. + } + + /** + * Provide an initial value for reduce computations. + * + * @return Initial value of reduce. + */ + public Integer reduceInit() { + return 0; //To change body of implemented methods use File | Settings | File Templates. + } + + /** + * Reduces a single map with the accumulator provided as the ReduceType. + * + * @param value result of the map. + * @param sum accumulator for the reduce. + * @return accumulator with result of the map taken into account. + */ + public Integer reduce(Integer value, Integer sum) { + return sum.intValue()+value.intValue(); //To change body of implemented methods use File | Settings | File Templates. + } + + public void onTraversalDone(Integer result) { + if ( HTML ) { + out.println("

Cycle Quality QC

\n"); + out.println("File(s) analyzed:
"); + for ( String fileName : getToolkit().getArguments().samFiles) out.println(fileName+"
"); + out.println("
"); + } + if ( HTML ) out.println("

"); + out.println("\n"+result+" reads analyzed\n"); + if ( HTML ) out.println("

"); + out.println("by platform unit:"); + if ( HTML ) out.println("
"); + report2(cyclesByLaneMap, new File(PREFIX+".byLane.txt"),true); + out.println(); + if ( HTML ) out.println("

"); + out.println("\nby library:"); + if ( HTML ) out.println("
"); + report2(cyclesByLibraryMap, new File(PREFIX+".byLibrary.txt"),true); + out.println(); + if ( HTML ) out.println("

"); + } + + + + private void report2(Map m, File f,boolean summaryReport) { + long totalReads_1 =0; + long totalReads_2 =0; + long totalReads_unpaired = 0; + SortedSet columns = new TreeSet(); + int maxLength = 0; // maximum read length across all lanes/read ends analyzed + + for( Map.Entry e : m.entrySet() ) { + if ( e.getValue()[0].getMaxReadLength() > maxLength ) maxLength = e.getValue()[0].getMaxReadLength(); + + if ( e.getValue().length == 1 || e.getValue().length == 2 && e.getValue()[1] == null ) { + totalReads_unpaired += e.getValue()[0].getReadCount(); // single end lane + } else { + totalReads_1 += e.getValue()[0].getReadCount(); + totalReads_2 += e.getValue()[1].getReadCount(); + if ( e.getValue()[1].getMaxReadLength() > maxLength ) maxLength = e.getValue()[1].getMaxReadLength(); + } + + columns.add(e.getKey()); + } + + if ( summaryReport ) { + if ( totalReads_1 == 0 && totalReads_2 != 0) { + out.println(" End 1: No reads"); + if ( HTML ) out.println("
"); + } + if ( totalReads_2 == 0 && totalReads_1 != 0 ) { + out.println(" End 2: No reads"); + if ( HTML ) out.println("
"); + } + if ( totalReads_1 == 0 && totalReads_2 == 0 && totalReads_unpaired == 0 ) { + out.println(" No reads found."); + if ( HTML ) out.println("
"); + } + } + + if ( totalReads_1 == 0 && totalReads_2 == 0 && totalReads_unpaired == 0 ) return; + + try { + BufferedWriter w = new BufferedWriter(new FileWriter(f)); + + w.write("cycle"); + + for( String col : columns ) { + CycleStats[] data = m.get(col); + if ( summaryReport ) { + out.print(" "); + out.print(col); + } + + CycleStats end1 = data[0]; + int minL = ( end1 == null ? 0 : end1.getMinReadLength() ); + int maxL = ( end1 == null ? 0 : end1.getMaxReadLength() ); + + if ( data.length == 2 && data[1] != null ) { + if ( summaryReport ) { + out.println(": paired"); + if ( HTML ) out.println("
"); + out.println(" Reads analyzed:"); + if ( HTML ) out.println("
"); + } + CycleStats end2 = data[1]; + + out.print( " End 1: "+ ( end1 == null ? 0 : end1.getReadCount()) ); + if ( minL == maxL ) out.println("; read length = "+minL); + else out.println("; WARNING: variable read length = "+minL+"-"+maxL); + if ( HTML ) out.println("
"); + + out.print( " End 2: "+ ( end2 == null ? 0 : end2.getReadCount()) ); + minL = ( end2 == null ? 0 : end2.getMinReadLength() ); + maxL = ( end2 == null ? 0 : end2.getMaxReadLength() ); + if ( minL == maxL ) out.println("; read length = "+minL); + else out.println("; WARNING: variable read length = "+minL+"-"+maxL); + if ( HTML ) out.println("
"); + } + else { + out.println(": unpaired"); + if ( HTML ) out.println("
"); + out.print( " Reads analyzed: "+ ( end1 == null ? 0 : end1.getReadCount()) ); + if ( minL == maxL ) out.println("; read length = "+minL); + else out.println("; WARNING: variable read length = "+minL+"-"+maxL); + if ( HTML ) out.println("
"); + } + + w.write('\t') ; + w.write(col); + if ( data.length == 1 || data.length == 2 && data[1] == null ) { + w.write(".unpaired"); + w.write('\t'); + w.write(col); + w.write(".unpaired.stddev"); + } else { + w.write(".end1"); + w.write('\t'); + w.write(col); + w.write(".end1.stddev"); + w.write('\t') ; + w.write(col); + w.write(".end2"); + w.write('\t'); + w.write(col); + w.write(".end2.stddev"); + } + } + + w.write('\n'); + + int cycle = 0; + + Map> problems = new HashMap>(); + + while ( cycle < maxLength ) { + w.write(Integer.toString(cycle+1)); + for ( String col : columns ) { + + CycleStats[] data = m.get(col); + CycleStats end1 = data[0]; + w.write('\t'); + if ( end1 == null || cycle >= end1.getMaxReadLength() ) w.write(".\t."); + else { + double aq = end1.getCycleQualAverage(cycle); + w.write(String.format("%.4f\t%.4f",aq,end1.getCycleQualStdDev(cycle))); + recordProblem(aq,cycle, problems,col+".End1"); + } + if ( data.length > 1 && data[1] != null ) { + w.write('\t'); + CycleStats end2 = data[1]; + if ( end2 == null || cycle >= end2.getMaxReadLength() ) w.write(".\t."); + else { + double aq = end2.getCycleQualAverage(cycle); + w.write(String.format("%.4f\t%.4f",aq,end2.getCycleQualStdDev(cycle))); + recordProblem(aq,cycle, problems,col+".End2"); + } + } + } + w.write('\n'); + cycle++; + } + w.close(); + + if ( HTML ) out.println("
"); + + if ( HTML ) out.println("
"); + out.println("\nOUTCOME (threshold at Q="+QTHRESHOLD+"):"); + if ( HTML ) out.println("
"); + for ( String col : columns ) { + List lp = problems.get(col+".End1"); + out.print(" "+col+" End1:"); + if ( lp == null ) { + out.print(" GOOD"); + } else { + for ( PrimitivePair.Int p : lp ) { + out.print(" "+(p.first+1)+"-"); + if ( p.second >= 0 ) out.print((p.second+1)); + else out.print("END"); + } + } + out.println(); + if ( HTML ) out.println("
"); + + lp = problems.get(col+".End2"); + out.print(" "+col+" End2:"); + if ( lp == null ) { + out.print(" GOOD"); + } else { + for ( PrimitivePair.Int p : lp ) { + out.print(" "+(p.first+1)+"-"); + if ( p.second >= 0 ) out.print(p.second); + else out.print("END"); + } + } + out.println(); + if ( HTML ) out.println("
"); + } + + } catch (IOException ioe) { + throw new UserException.CouldNotCreateOutputFile(f, "Failed to write report", ioe); + } + } + + + private void recordProblem(double q, int cycle, Map> problems, String name) { + + PrimitivePair.Int p = null; + List lp = null; + if ( q < QTHRESHOLD ) { // there is a problem + if ( ! problems.containsKey(name) ) { + lp = new ArrayList(); + p = new PrimitivePair.Int(cycle,-1); + lp.add(p); + problems.put(name,lp); + } else { + lp = problems.get(name); + p = lp.get(lp.size()-1); + } + if ( p.second != -1 ) { // if we are not already inside a run of bad qual bases + lp.add(new PrimitivePair.Int(cycle,-1)); // start new run + } + } else { // good base + if ( problems.containsKey(name) ) { // only if we had problem intervals at all, we need to check if the last one needs to be closed + lp = problems.get(name); + p = lp.get(lp.size()-1); + if ( p.second == -1 ) p.second = cycle - 1; + } + } + } + + + static class CycleStats { + private long readCount = 0; + private double[] cycleQualsAv = null; + private double[] cycleQualsSd = null; + private int minL = 1000000000; // read min. length + private int maxL = 0; // read max. length + + public CycleStats(int N) { + readCount = 0; + cycleQualsAv = new double[N]; + cycleQualsSd = new double[N]; + } + + public void add(byte[] quals) { + if ( quals.length > cycleQualsAv.length ) + throw new UserException("A read of length "+quals.length+" encountered, which exceeds specified maximum read length"); + if ( quals.length > maxL ) maxL = quals.length; + if ( quals.length < minL ) minL = quals.length; + readCount++; + for ( int i = 0 ; i < quals.length ; i++ ) { + // NOTE: in the update equaltions below, there is no need to check if readCount == 1 (i.e. + // we are initializing with the very first record) or not. Indeed, the arrays are initialized with + // 0; when the very first value arrives, readCount is 1 and cycleQuals[i] gets set to quals[i] (correct!); + // this will also make the second term in the update equation for Sd (quals[i]-cycleQualsAv[i]) equal + // to 0, so Sd will be initially set to 0. + double oldAvg = cycleQualsAv[i]; // save old mean, will need it for calculation of the variance + cycleQualsAv[i] += ( quals[i] - cycleQualsAv[i] ) / readCount; // update mean + cycleQualsSd[i] += ( quals[i] - oldAvg ) * ( quals[i] - cycleQualsAv[i] ); + } + } + + public long getReadCount() { return readCount; } + public int getMaxReadLength() { return maxL; } + public int getMinReadLength() { return minL; } +// long [] getCycleQualSums() { return cycleQuals; } +// long getCycleQualSum(int i) { return cycleQuals[i]; } + double getCycleQualAverage(int i) { return cycleQualsAv[i]; } + double getCycleQualStdDev(int i) { return Math.sqrt( cycleQualsSd[i]/(readCount-1) ); } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/PrintLocusContextWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/PrintLocusContextWalker.java index d3b992cb5..ac0b3e7d5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/PrintLocusContextWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/PrintLocusContextWalker.java @@ -1,12 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.qc; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.PrintStream; import java.util.Arrays; @@ -40,7 +40,7 @@ public class PrintLocusContextWalker extends LocusWalker reads ) { + private String[] getReadNames( List reads ) { String[] readNames = new String[ reads.size() ]; for( int i = 0; i < reads.size(); i++ ) { readNames[i] = String.format("%nname = %s, start = %d, end = %d", reads.get(i).getReadName(), reads.get(i).getAlignmentStart(), reads.get(i).getAlignmentEnd()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java index 908e389a8..27f9d7b6d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadClippingStatsWalker.java @@ -1,142 +1,142 @@ -/* - * Copyright (c) 2009 The Broad Institute - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.qc; - -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; -import net.sf.samtools.SAMReadGroupRecord; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.sam.AlignmentUtils; - -import java.io.PrintStream; -import java.util.Arrays; - -/** - * User: depristo - * Date: May 5, 2010 - * Time: 12:16:41 PM - */ - -/** - * Walks over the input reads, printing out statistics about the read length, number of clipping events, and length - * of the clipping to the output stream. - */ -@Requires({DataSource.READS}) -public class ReadClippingStatsWalker extends ReadWalker { - @Output - protected PrintStream out; - - @Argument(fullName="mappedOnly", shortName="mo", doc="when this flag is set (default), statistics will be collected "+ - "on mapped reads only, while unmapped reads will be discarded", required=false) - protected boolean MAPPED_ONLY = true; - - @Argument(fullName="skip", shortName="skip", doc="When provided, only every skip reads are analyzed", required=false) - protected int SKIP = 1; - -// public void initialize() { -// -// } - - public class ReadClippingInfo { - SAMReadGroupRecord rg; - int readLength, nClippingEvents, nClippedBases; - } - - public ReadClippingInfo map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { - if ( AlignmentUtils.isReadUnmapped(read) && MAPPED_ONLY) - return null; - - ReadClippingInfo info = new ReadClippingInfo(); - info.rg = read.getReadGroup(); - - if ( info.rg == null ) throw new UserException.ReadMissingReadGroup(read); - - for ( CigarElement elt : read.getCigar().getCigarElements() ) { - if ( elt.getOperator() != CigarOperator.N ) - - switch ( elt.getOperator()) { - case H : // ignore hard clips - case S : // soft clip - info.nClippingEvents++; - info.nClippedBases += elt.getLength(); - // note the fall through here - case M : - case D : // deletion w.r.t. the reference - case P : // ignore pads - case I : // insertion w.r.t. the reference - info.readLength += elt.getLength(); // Unless we have a reference skip, the read gets longer - break; - case N : // reference skip (looks and gets processed just like a "deletion", just different logical meaning) - break; - default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + elt.getOperator()); - } - } - - return info; //To change body of implemented methods use File | Settings | File Templates. - } - - /** - * Provide an initial value for reduce computations. - * - * @return Initial value of reduce. - */ - public Integer reduceInit() { - out.println(Utils.join(" \t", Arrays.asList("ReadGroup", "ReadLength", "NClippingEvents", "NClippedBases", "PercentClipped"))); - return 0; - } - - /** - * Reduces a single map with the accumulator provided as the ReduceType. - * - * @param info result of the map. - * @param sum accumulator for the reduce. - * @return accumulator with result of the map taken into account. - */ - public Integer reduce(ReadClippingInfo info, Integer sum) { - if ( info != null ) { - if ( sum % SKIP == 0 ) { - String id = info.rg.getReadGroupId(); - out.printf("%s\t %d\t %d\t %d\t %.2f%n", - id, info.readLength, info.nClippingEvents, info.nClippedBases, - 100.0 * MathUtils.ratio(info.nClippedBases, info.readLength)); - } - return sum + 1; //To change body of implemented methods use File | Settings | File Templates. - } else { - return sum; - } - } - - public void onTraversalDone(Integer result) { - - } +/* + * Copyright (c) 2009 The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.qc; + +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +import java.io.PrintStream; +import java.util.Arrays; + +/** + * User: depristo + * Date: May 5, 2010 + * Time: 12:16:41 PM + */ + +/** + * Walks over the input reads, printing out statistics about the read length, number of clipping events, and length + * of the clipping to the output stream. + */ +@Requires({DataSource.READS}) +public class ReadClippingStatsWalker extends ReadWalker { + @Output + protected PrintStream out; + + @Argument(fullName="mappedOnly", shortName="mo", doc="when this flag is set (default), statistics will be collected "+ + "on mapped reads only, while unmapped reads will be discarded", required=false) + protected boolean MAPPED_ONLY = true; + + @Argument(fullName="skip", shortName="skip", doc="When provided, only every skip reads are analyzed", required=false) + protected int SKIP = 1; + +// public void initialize() { +// +// } + + public class ReadClippingInfo { + SAMReadGroupRecord rg; + int readLength, nClippingEvents, nClippedBases; + } + + public ReadClippingInfo map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) { + if ( AlignmentUtils.isReadUnmapped(read) && MAPPED_ONLY) + return null; + + ReadClippingInfo info = new ReadClippingInfo(); + info.rg = read.getReadGroup(); + + if ( info.rg == null ) throw new UserException.ReadMissingReadGroup(read); + + for ( CigarElement elt : read.getCigar().getCigarElements() ) { + if ( elt.getOperator() != CigarOperator.N ) + + switch ( elt.getOperator()) { + case H : // ignore hard clips + case S : // soft clip + info.nClippingEvents++; + info.nClippedBases += elt.getLength(); + // note the fall through here + case M : + case D : // deletion w.r.t. the reference + case P : // ignore pads + case I : // insertion w.r.t. the reference + info.readLength += elt.getLength(); // Unless we have a reference skip, the read gets longer + break; + case N : // reference skip (looks and gets processed just like a "deletion", just different logical meaning) + break; + default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + elt.getOperator()); + } + } + + return info; //To change body of implemented methods use File | Settings | File Templates. + } + + /** + * Provide an initial value for reduce computations. + * + * @return Initial value of reduce. + */ + public Integer reduceInit() { + out.println(Utils.join(" \t", Arrays.asList("ReadGroup", "ReadLength", "NClippingEvents", "NClippedBases", "PercentClipped"))); + return 0; + } + + /** + * Reduces a single map with the accumulator provided as the ReduceType. + * + * @param info result of the map. + * @param sum accumulator for the reduce. + * @return accumulator with result of the map taken into account. + */ + public Integer reduce(ReadClippingInfo info, Integer sum) { + if ( info != null ) { + if ( sum % SKIP == 0 ) { + String id = info.rg.getReadGroupId(); + out.printf("%s\t %d\t %d\t %d\t %.2f%n", + id, info.readLength, info.nClippingEvents, info.nClippedBases, + 100.0 * MathUtils.ratio(info.nClippedBases, info.readLength)); + } + return sum + 1; //To change body of implemented methods use File | Settings | File Templates. + } else { + return sum; + } + } + + public void onTraversalDone(Integer result) { + + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java index fa1bb4d55..4425f92c4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java @@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -64,21 +65,23 @@ public class ReadValidationWalker extends ReadWalker { /** * The reads filter function. + * * @param ref the reference bases that correspond to our read, if a reference was provided * @param read the read itself, as a SAMRecord * @return true if the read passes the filter, false if it doesn't */ - public boolean filter(ReferenceContext ref, SAMRecord read) { + public boolean filter(ReferenceContext ref, GATKSAMRecord read) { return true; } /** * The reads map function. + * * @param ref the reference bases that correspond to our read, if a reference was provided * @param read the read itself, as a SAMRecord * @return the read itself */ - public SAMRecord map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) { + public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) { return read; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index e04f5bc4b..1ce02a3cf 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -364,11 +364,12 @@ public class TableRecalibrationWalker extends ReadWalker ops = null; @@ -23,7 +23,7 @@ public class ReadClipper { * * @param read */ - public ReadClipper(final SAMRecord read) { + public ReadClipper(final GATKSAMRecord read) { this.read = read; this.wasClipped = false; } @@ -46,19 +46,19 @@ public class ReadClipper { return wasClipped; } - public SAMRecord getRead() { + public GATKSAMRecord getRead() { return read; } - public SAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) { + public GATKSAMRecord hardClipByReferenceCoordinatesLeftTail(int refStop) { return hardClipByReferenceCoordinates(-1, refStop); } - public SAMRecord hardClipByReferenceCoordinatesRightTail(int refStart) { + public GATKSAMRecord hardClipByReferenceCoordinatesRightTail(int refStart) { return hardClipByReferenceCoordinates(refStart, -1); } - private int numDeletions(SAMRecord read) { + private int numDeletions(GATKSAMRecord read) { int result = 0; for (CigarElement e: read.getCigar().getCigarElements()) { if ( e.getOperator() == CigarOperator.DELETION || e.getOperator() == CigarOperator.D ) @@ -67,7 +67,7 @@ public class ReadClipper { return result; } - protected SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { + protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL); int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); @@ -78,32 +78,32 @@ public class ReadClipper { throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!"); this.addOp(new ClippingOp(start, stop)); - SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES); + GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES); this.ops = null; return clippedRead; } - public SAMRecord hardClipByReadCoordinates(int start, int stop) { + public GATKSAMRecord hardClipByReadCoordinates(int start, int stop) { this.addOp(new ClippingOp(start, stop)); return clipRead(ClippingRepresentation.HARDCLIP_BASES); } @Requires("left <= right") - public SAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { + public GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { if (left == right) - return new SAMRecord(read.getHeader()); - SAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1); + return new GATKSAMRecord(read.getHeader()); + GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1); // after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions // make the left cut index no longer part of the read. In that case, clip the read entirely. if (left > leftTailRead.getAlignmentEnd()) - return new SAMRecord(read.getHeader()); + return new GATKSAMRecord(read.getHeader()); ReadClipper clipper = new ReadClipper(leftTailRead); return clipper.hardClipByReferenceCoordinatesLeftTail(left); } - public SAMRecord hardClipLowQualEnds(byte lowQual) { + public GATKSAMRecord hardClipLowQualEnds(byte lowQual) { byte [] quals = read.getBaseQualities(); int leftClipIndex = 0; int rightClipIndex = read.getReadLength() - 1; @@ -114,7 +114,7 @@ public class ReadClipper { // if the entire read should be clipped, then return an empty read. (--todo: maybe null is better? testing this for now) if (leftClipIndex > rightClipIndex) - return (new SAMRecord(read.getHeader())); + return (new GATKSAMRecord(read.getHeader())); if (rightClipIndex < read.getReadLength() - 1) { this.addOp(new ClippingOp(rightClipIndex + 1, read.getReadLength() - 1)); @@ -125,7 +125,7 @@ public class ReadClipper { return this.clipRead(ClippingRepresentation.HARDCLIP_BASES); } - public SAMRecord hardClipSoftClippedBases () { + public GATKSAMRecord hardClipSoftClippedBases () { int readIndex = 0; int cutLeft = -1; // first position to hard clip (inclusive) int cutRight = -1; // first position to hard clip (inclusive) @@ -164,12 +164,12 @@ public class ReadClipper { * @param algorithm * @return */ - public SAMRecord clipRead(ClippingRepresentation algorithm) { + public GATKSAMRecord clipRead(ClippingRepresentation algorithm) { if (ops == null) return getRead(); else { try { - SAMRecord clippedRead = (SAMRecord) read.clone(); + GATKSAMRecord clippedRead = (GATKSAMRecord) read.clone(); for (ClippingOp op : getOps()) { clippedRead = op.apply(algorithm, clippedRead); } @@ -181,7 +181,7 @@ public class ReadClipper { } } - public SAMRecord hardClipLeadingInsertions() { + public GATKSAMRecord hardClipLeadingInsertions() { for(CigarElement cigarElement : read.getCigar().getCigarElements()) { if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.DELETION) diff --git a/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java b/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java index bba47c76c..7ae575534 100644 --- a/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.utils.duplicates; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -35,27 +34,28 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Arrays; import java.util.List; public class DupUtils { - private static SAMRecord tmpCopyRead(SAMRecord read) { + private static GATKSAMRecord tmpCopyRead(GATKSAMRecord read) { try { - return (SAMRecord)read.clone(); + return (GATKSAMRecord)read.clone(); } catch ( CloneNotSupportedException e ) { throw new ReviewedStingException("Unexpected Clone failure!"); } } - public static SAMRecord combineDuplicates(GenomeLocParser genomeLocParser,List duplicates, int maxQScore) { + public static GATKSAMRecord combineDuplicates(GenomeLocParser genomeLocParser,List duplicates, int maxQScore) { if ( duplicates.size() == 0 ) return null; // make the combined read by copying the first read and setting the // bases and quals to new arrays - SAMRecord comb = tmpCopyRead(duplicates.get(0)); - //SAMRecord comb = tmpCopyRead(duplicates.get(0)); + GATKSAMRecord comb = tmpCopyRead(duplicates.get(0)); + //GATKSAMRecord comb = tmpCopyRead(duplicates.get(0)); comb.setDuplicateReadFlag(false); int readLen = comb.getReadBases().length; byte[] bases = new byte[readLen]; @@ -63,7 +63,7 @@ public class DupUtils { for ( int i = 0; i < readLen; i++ ) { //System.out.printf("I is %d%n", i); - //for ( SAMRecord read : duplicates ) { + //for ( GATKSAMRecord read : duplicates ) { // System.out.printf("dup base %c %d%n", (char)read.getReadBases()[i], read.getBaseQualities()[i]); //} Pair baseAndQual = combineBaseProbs(genomeLocParser,duplicates, i, maxQScore); @@ -117,7 +117,7 @@ public class DupUtils { System.out.printf("%n"); } - private static Pair combineBaseProbs(GenomeLocParser genomeLocParser,List duplicates, int readOffset, int maxQScore) { + private static Pair combineBaseProbs(GenomeLocParser genomeLocParser,List duplicates, int readOffset, int maxQScore) { GenomeLoc loc = genomeLocParser.createGenomeLoc(duplicates.get(0)); ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, duplicates, readOffset); diff --git a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java index 723830a91..e5500ca21 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.fragments; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.*; @@ -35,17 +36,17 @@ public class FragmentUtils { * @param */ public interface ReadGetter { - public SAMRecord get(T object); + public GATKSAMRecord get(T object); } /** Identify getter for SAMRecords themselves */ - private final static ReadGetter SamRecordGetter = new ReadGetter() { - @Override public SAMRecord get(final SAMRecord object) { return object; } + private final static ReadGetter SamRecordGetter = new ReadGetter() { + @Override public GATKSAMRecord get(final GATKSAMRecord object) { return object; } }; /** Gets the SAMRecord in a PileupElement */ private final static ReadGetter PileupElementGetter = new ReadGetter() { - @Override public SAMRecord get(final PileupElement object) { return object.getRead(); } + @Override public GATKSAMRecord get(final PileupElement object) { return object.getRead(); } }; @@ -116,7 +117,7 @@ public class FragmentUtils { return create(rbp, rbp.getNumberOfElements(), PileupElementGetter); } - public final static FragmentCollection create(List reads) { + public final static FragmentCollection create(List reads) { return create(reads, reads.size(), SamRecordGetter); } diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java index 4db2f974f..18051ce92 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java @@ -24,13 +24,13 @@ package org.broadinstitute.sting.utils.pileup; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.fragments.FragmentCollection; import org.broadinstitute.sting.utils.fragments.FragmentUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.*; @@ -59,12 +59,12 @@ public abstract class AbstractReadBackedPileup reads, List offsets ) { + public AbstractReadBackedPileup(GenomeLoc loc, List reads, List offsets ) { this.loc = loc; this.pileupElementTracker = readsOffsets2Pileup(reads,offsets); } - public AbstractReadBackedPileup(GenomeLoc loc, List reads, int offset ) { + public AbstractReadBackedPileup(GenomeLoc loc, List reads, int offset ) { this.loc = loc; this.pileupElementTracker = readsOffsets2Pileup(reads,offset); } @@ -167,7 +167,7 @@ public abstract class AbstractReadBackedPileup readsOffsets2Pileup(List reads, List offsets ) { + private PileupElementTracker readsOffsets2Pileup(List reads, List offsets ) { if ( reads == null ) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup"); if ( offsets == null ) throw new ReviewedStingException("Illegal null offsets list in UnifiedReadBackedPileup"); if ( reads.size() != offsets.size() ) throw new ReviewedStingException("Reads and offset lists have different sizes!"); @@ -187,7 +187,7 @@ public abstract class AbstractReadBackedPileup readsOffsets2Pileup(List reads, int offset ) { + private PileupElementTracker readsOffsets2Pileup(List reads, int offset ) { if ( reads == null ) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup"); if ( offset < 0 ) throw new ReviewedStingException("Illegal offset < 0 UnifiedReadBackedPileup"); @@ -200,7 +200,7 @@ public abstract class AbstractReadBackedPileup createNewPileup(GenomeLoc loc, PileupElementTracker pileupElementTracker); - protected abstract PE createNewPileupElement(SAMRecord read, int offset); + protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset); // -------------------------------------------------------- // @@ -512,7 +512,7 @@ public abstract class AbstractReadBackedPileup filteredTracker = new UnifiedPileupElementTracker(); for(PE p: pileupElementTracker) { - SAMRecord read = p.getRead(); + GATKSAMRecord read = p.getRead(); if(targetReadGroupId != null) { if(read.getReadGroup() != null && targetReadGroupId.equals(read.getReadGroup().getReadGroupId())) filteredTracker.add(p); @@ -543,7 +543,7 @@ public abstract class AbstractReadBackedPileup filteredTracker = new UnifiedPileupElementTracker(); for(PE p: pileupElementTracker) { - SAMRecord read = p.getRead(); + GATKSAMRecord read = p.getRead(); if(laneID != null) { if(read.getReadGroup() != null && (read.getReadGroup().getReadGroupId().startsWith(laneID + ".")) || // lane is the same, but sample identifier is different @@ -567,7 +567,7 @@ public abstract class AbstractReadBackedPileup sampleNames = new HashSet(); for(PileupElement p: this) { - SAMRecord read = p.getRead(); + GATKSAMRecord read = p.getRead(); String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null; sampleNames.add(sampleName); } @@ -644,7 +644,7 @@ public abstract class AbstractReadBackedPileup hashSampleNames = new HashSet(sampleNames); // to speed up the "contains" access in the for loop UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); for(PE p: pileupElementTracker) { - SAMRecord read = p.getRead(); + GATKSAMRecord read = p.getRead(); if(sampleNames != null) { // still checking on sampleNames because hashSampleNames will never be null. And empty means something else. if(read.getReadGroup() != null && hashSampleNames.contains(read.getReadGroup().getSample())) filteredTracker.add(p); @@ -669,7 +669,7 @@ public abstract class AbstractReadBackedPileup filteredTracker = new UnifiedPileupElementTracker(); for(PE p: pileupElementTracker) { - SAMRecord read = p.getRead(); + GATKSAMRecord read = p.getRead(); if(sampleName != null) { if(read.getReadGroup() != null && sampleName.equals(read.getReadGroup().getSample())) filteredTracker.add(p); @@ -824,8 +824,8 @@ public abstract class AbstractReadBackedPileup getReads() { - List reads = new ArrayList(getNumberOfElements()); + public List getReads() { + List reads = new ArrayList(getNumberOfElements()); for ( PileupElement pile : this ) { reads.add(pile.getRead()); } return reads; } diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java index 26e66014c..1e5e4d4e5 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java @@ -1,131 +1,132 @@ -package org.broadinstitute.sting.utils.pileup; - -import net.sf.samtools.SAMRecord; - -import java.util.Arrays; - -/** - * In the "standard" locus traversal mode, - * the traversal is performed striclty over the reference bases. Thus, only pileups of bases (and hence local events - * such as point mutations) are "seen" at every invocation of the walker's map() function at every (genomic) locus. Deletions - * are seen on the base-by-base basis (i.e. the pileup does keep the information about the current reference base being deleted - * in some reads), but the information about the extended event (deletion length, string of all deleted bases) is not kept. - * The insertions that may be present in some reads are not seen at all in such strict reference traversal mode. - * - * By convention, any extended event (indel) is mapped onto the reference at the last base prior to the event (i.e. - * last base before the insertion or deletion). If the special "extended" traversal mode is turned on and there is - * an indel in at least one read that maps onto the reference position Z, the walker's map function will be called twice: - * first call will be performed in a "standard" mode, with a pileup of bases over the position Z, and then the additional - * call will be made at the same position with a pileup of event/noevent calls, where events are extended and contain - * full information about insertions/deletions. Then the next, "standard", call to map() will be performed at the next - * (covered) reference position. Note that if the extended event at Z was a deletion, the "standard" base pileup at - * Z+1 and following bases may still contain deleted bases. However the fully extended event call will be performed - * only once, at the position where the indel maps (starts). - * - * This class wraps an "extended" event (indel) so that in can be added to a pileup of events at a given location. - * - * Created by IntelliJ IDEA. - * User: asivache - * Date: Dec 21, 2009 - * Time: 2:57:55 PM - * To change this template use File | Settings | File Templates. - */ -public class ExtendedEventPileupElement extends PileupElement { - public enum Type { - NOEVENT, DELETION, INSERTION - } - - private Type type = null; - private int eventLength = -1; - private String eventBases = null; // if it is a deletion, we do not have information about the actual deleted bases - // in the read itself, so we fill the string with D's; for insertions we keep actual inserted bases - private SAMRecord read; - private int offset; // position in the read immediately BEFORE the event - // This is broken! offset is always zero because these member variables are shadowed by base class - - /** Constructor for extended pileup element (indel). - * - * @param read the read, in which the indel is observed - * @param offset position in the read immediately before the indel (can be -1 if read starts with an insertion) - * @param length length of the indel (number of inserted or deleted bases); length <=0 indicates that the read has no indel (NOEVENT) - * @param eventBases inserted bases. null indicates that the event is a deletion; ignored if length<=0 (noevent) - */ - public ExtendedEventPileupElement( SAMRecord read, int offset, int length, byte[] eventBases ) { - super(read, offset); - this.eventLength = length; - if ( length <= 0 ) type = Type.NOEVENT; - else { - if ( eventBases != null ) { - this.eventBases = new String(eventBases).toUpperCase(); - type = Type.INSERTION; - } else { - type = Type.DELETION; - } - } - } - - /** Constructor for deletion or noevent calls - does not take event bases as an argument (as those should - * be null or are ignored in these cases anyway) - * @param read - * @param offset - * @param length - */ - public ExtendedEventPileupElement( SAMRecord read, int offset, int length ) { - this(read,offset, length, null); - } - - public boolean isDeletion() { - return type == Type.DELETION; - } - - public boolean isInsertion() { - return type == Type.INSERTION; - } - - public boolean isIndel() { - return isDeletion() || isInsertion(); - } - - public Type getType() { return type; } - - // The offset can be negative with insertions at the start of the read, but a valid base does exist at this position with - // a valid base quality. The following code attempts to compensate for that.' - - @Override - public byte getBase() { - return getBase(offset >= 0 ? offset : offset+eventLength); - } - - @Override - public int getBaseIndex() { - return getBaseIndex(offset >= 0 ? offset : offset+eventLength); - } - - @Override - public byte getQual() { - return getQual(offset >= 0 ? offset : offset+eventLength); - } - - /** Returns length of the event (number of inserted or deleted bases */ - public int getEventLength() { return eventLength; } - - /** Returns actual sequence of inserted bases, or a null if the event is a deletion or if there is no event in the associated read. - * */ - public String getEventBases() { return eventBases; } - - @Override - public String toString() { - char c = '.'; - String fillStr = null ; - if ( isDeletion() ) { - c = '-'; - char [] filler = new char[eventLength]; - Arrays.fill(filler, 'D'); - fillStr = new String(filler); - } - else if ( isInsertion() ) c = '+'; - return String.format("%s @ %d = %c%s MQ%d", getRead().getReadName(), getOffset(), c, isIndel()? - (isInsertion() ? eventBases : fillStr ): "", getMappingQual()); - } - -} +package org.broadinstitute.sting.utils.pileup; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; + +import java.util.Arrays; + +/** + * In the "standard" locus traversal mode, + * the traversal is performed striclty over the reference bases. Thus, only pileups of bases (and hence local events + * such as point mutations) are "seen" at every invocation of the walker's map() function at every (genomic) locus. Deletions + * are seen on the base-by-base basis (i.e. the pileup does keep the information about the current reference base being deleted + * in some reads), but the information about the extended event (deletion length, string of all deleted bases) is not kept. + * The insertions that may be present in some reads are not seen at all in such strict reference traversal mode. + * + * By convention, any extended event (indel) is mapped onto the reference at the last base prior to the event (i.e. + * last base before the insertion or deletion). If the special "extended" traversal mode is turned on and there is + * an indel in at least one read that maps onto the reference position Z, the walker's map function will be called twice: + * first call will be performed in a "standard" mode, with a pileup of bases over the position Z, and then the additional + * call will be made at the same position with a pileup of event/noevent calls, where events are extended and contain + * full information about insertions/deletions. Then the next, "standard", call to map() will be performed at the next + * (covered) reference position. Note that if the extended event at Z was a deletion, the "standard" base pileup at + * Z+1 and following bases may still contain deleted bases. However the fully extended event call will be performed + * only once, at the position where the indel maps (starts). + * + * This class wraps an "extended" event (indel) so that in can be added to a pileup of events at a given location. + * + * Created by IntelliJ IDEA. + * User: asivache + * Date: Dec 21, 2009 + * Time: 2:57:55 PM + * To change this template use File | Settings | File Templates. + */ +public class ExtendedEventPileupElement extends PileupElement { + public enum Type { + NOEVENT, DELETION, INSERTION + } + + private Type type = null; + private int eventLength = -1; + private String eventBases = null; // if it is a deletion, we do not have information about the actual deleted bases + // in the read itself, so we fill the string with D's; for insertions we keep actual inserted bases + private SAMRecord read; + private int offset; // position in the read immediately BEFORE the event + // This is broken! offset is always zero because these member variables are shadowed by base class + + /** Constructor for extended pileup element (indel). + * + * @param read the read, in which the indel is observed + * @param offset position in the read immediately before the indel (can be -1 if read starts with an insertion) + * @param length length of the indel (number of inserted or deleted bases); length <=0 indicates that the read has no indel (NOEVENT) + * @param eventBases inserted bases. null indicates that the event is a deletion; ignored if length<=0 (noevent) + */ + public ExtendedEventPileupElement( GATKSAMRecord read, int offset, int length, byte[] eventBases ) { + super(read, offset); + this.eventLength = length; + if ( length <= 0 ) type = Type.NOEVENT; + else { + if ( eventBases != null ) { + this.eventBases = new String(eventBases).toUpperCase(); + type = Type.INSERTION; + } else { + type = Type.DELETION; + } + } + } + + /** Constructor for deletion or noevent calls - does not take event bases as an argument (as those should + * be null or are ignored in these cases anyway) + * @param read + * @param offset + * @param length + */ + public ExtendedEventPileupElement( GATKSAMRecord read, int offset, int length ) { + this(read,offset, length, null); + } + + public boolean isDeletion() { + return type == Type.DELETION; + } + + public boolean isInsertion() { + return type == Type.INSERTION; + } + + public boolean isIndel() { + return isDeletion() || isInsertion(); + } + + public Type getType() { return type; } + + // The offset can be negative with insertions at the start of the read, but a valid base does exist at this position with + // a valid base quality. The following code attempts to compensate for that.' + + @Override + public byte getBase() { + return getBase(offset >= 0 ? offset : offset+eventLength); + } + + @Override + public int getBaseIndex() { + return getBaseIndex(offset >= 0 ? offset : offset+eventLength); + } + + @Override + public byte getQual() { + return getQual(offset >= 0 ? offset : offset+eventLength); + } + + /** Returns length of the event (number of inserted or deleted bases */ + public int getEventLength() { return eventLength; } + + /** Returns actual sequence of inserted bases, or a null if the event is a deletion or if there is no event in the associated read. + * */ + public String getEventBases() { return eventBases; } + + @Override + public String toString() { + char c = '.'; + String fillStr = null ; + if ( isDeletion() ) { + c = '-'; + char [] filler = new char[eventLength]; + Arrays.fill(filler, 'D'); + fillStr = new String(filler); + } + else if ( isInsertion() ) c = '+'; + return String.format("%s @ %d = %c%s MQ%d", getRead().getReadName(), getOffset(), c, isIndel()? + (isInsertion() ? eventBases : fillStr ): "", getMappingQual()); + } + +} diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index df6c6d80b..daf6606ef 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -2,10 +2,8 @@ package org.broadinstitute.sting.utils.pileup; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.sam.ReadUtils; /** * Created by IntelliJ IDEA. @@ -21,14 +19,14 @@ public class PileupElement implements Comparable { public static final byte T_FOLLOWED_BY_INSERTION_BASE = (byte) 89; public static final byte G_FOLLOWED_BY_INSERTION_BASE = (byte) 90; - protected final SAMRecord read; + protected final GATKSAMRecord read; protected final int offset; @Requires({ "read != null", "offset >= -1", "offset <= read.getReadLength()"}) - public PileupElement( SAMRecord read, int offset ) { + public PileupElement( GATKSAMRecord read, int offset ) { this.read = read; this.offset = offset; } @@ -38,7 +36,7 @@ public class PileupElement implements Comparable { } @Ensures("result != null") - public SAMRecord getRead() { return read; } + public GATKSAMRecord getRead() { return read; } @Ensures("result == offset") public int getOffset() { return offset; } diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java index 4205ff5af..3d872f9fb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.pileup; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Collection; import java.util.List; @@ -166,7 +167,7 @@ public interface ReadBackedExtendedEventPileup extends ReadBackedPileup { * Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time * @return */ - public List getReads(); + public List getReads(); /** * Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java index 8910487b8..43ad06352 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java @@ -23,10 +23,10 @@ */ package org.broadinstitute.sting.utils.pileup; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.*; @@ -95,7 +95,7 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup< } @Override - protected ExtendedEventPileupElement createNewPileupElement(SAMRecord read, int offset) { + protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset) { throw new UnsupportedOperationException("Not enough information provided to create a new pileup element"); } diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java index 9cf62c173..02767df7c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java @@ -24,10 +24,10 @@ package org.broadinstitute.sting.utils.pileup; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.fragments.FragmentCollection; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Collection; import java.util.List; @@ -202,7 +202,7 @@ public interface ReadBackedPileup extends Iterable, HasGenomeLoca * Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time * @return */ - public List getReads(); + public List getReads(); /** * Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java index 18e6d9134..b7445be8d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java @@ -23,8 +23,8 @@ */ package org.broadinstitute.sting.utils.pileup; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.List; import java.util.Map; @@ -35,11 +35,11 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup reads, List offsets ) { + public ReadBackedPileupImpl(GenomeLoc loc, List reads, List offsets ) { super(loc,reads,offsets); } - public ReadBackedPileupImpl(GenomeLoc loc, List reads, int offset ) { + public ReadBackedPileupImpl(GenomeLoc loc, List reads, int offset ) { super(loc,reads,offset); } @@ -70,7 +70,7 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup extends TraversalEngine createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) { - SAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen); - SAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen); + public final static List createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) { + GATKSAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen); + GATKSAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen); left.setReadPairedFlag(true); right.setReadPairedFlag(true); @@ -327,9 +326,9 @@ public class ArtificialSAMUtils { if ( rightStart <= 0 ) continue; - List pair = createPair(header, readName, readLen, leftStart, rightStart, leftIsFirst, leftIsNegative); - final SAMRecord left = pair.get(0); - final SAMRecord right = pair.get(1); + List pair = createPair(header, readName, readLen, leftStart, rightStart, leftIsFirst, leftIsNegative); + final GATKSAMRecord left = pair.get(0); + final GATKSAMRecord right = pair.get(1); pileupElements.add(new PileupElement(left, pos - leftStart)); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 9372d6478..e125b8c80 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -188,15 +188,15 @@ public class ReadUtils { * This makes the following code a little nasty, since we can only detect if a base is in the adaptor, but not * if it overlaps the read. * - * @param rec + * @param read * @param basePos * @param adaptorLength * @return */ - public static OverlapType readPairBaseOverlapType(final SAMRecord rec, long basePos, final int adaptorLength) { + public static OverlapType readPairBaseOverlapType(final SAMRecord read, long basePos, final int adaptorLength) { OverlapType state = OverlapType.NOT_OVERLAPPING; - Pair adaptorBoundaries = getAdaptorBoundaries(rec, adaptorLength); + Pair adaptorBoundaries = getAdaptorBoundaries(read, adaptorLength); if ( adaptorBoundaries != null ) { // we're not an unmapped pair -- cannot filter out @@ -205,28 +205,28 @@ public class ReadUtils { if ( inAdapator ) { state = OverlapType.IN_ADAPTOR; //System.out.printf("baseOverlapState: %50s negStrand=%b base=%d start=%d stop=%d, adaptorStart=%d adaptorEnd=%d isize=%d => %s%n", - // rec.getReadName(), rec.getReadNegativeStrandFlag(), basePos, rec.getAlignmentStart(), rec.getAlignmentEnd(), adaptorBoundaries.first, adaptorBoundaries.second, rec.getInferredInsertSize(), state); + // read.getReadName(), read.getReadNegativeStrandFlag(), basePos, read.getAlignmentStart(), read.getAlignmentEnd(), adaptorBoundaries.first, adaptorBoundaries.second, read.getInferredInsertSize(), state); } } return state; } - private static Pair getAdaptorBoundaries(SAMRecord rec, int adaptorLength) { - int isize = rec.getInferredInsertSize(); + private static Pair getAdaptorBoundaries(SAMRecord read, int adaptorLength) { + int isize = read.getInferredInsertSize(); if ( isize == 0 ) return null; // don't worry about unmapped pairs int adaptorStart, adaptorEnd; - if ( rec.getReadNegativeStrandFlag() ) { + if ( read.getReadNegativeStrandFlag() ) { // we are on the negative strand, so our mate is on the positive strand - int mateStart = rec.getMateAlignmentStart(); + int mateStart = read.getMateAlignmentStart(); adaptorStart = mateStart - adaptorLength - 1; adaptorEnd = mateStart - 1; } else { // we are on the positive strand, so our mate is on the negative strand - int mateEnd = rec.getAlignmentStart() + isize - 1; + int mateEnd = read.getAlignmentStart() + isize - 1; adaptorStart = mateEnd + 1; adaptorEnd = mateEnd + adaptorLength; } @@ -236,47 +236,47 @@ public class ReadUtils { /** * - * @param rec original SAM record + * @param read original SAM record * @param adaptorLength length of adaptor sequence * @return a new read with adaptor sequence hard-clipped out or null if read is fully clipped */ - public static GATKSAMRecord hardClipAdaptorSequence(final SAMRecord rec, int adaptorLength) { + public static GATKSAMRecord hardClipAdaptorSequence(final GATKSAMRecord read, int adaptorLength) { - Pair adaptorBoundaries = getAdaptorBoundaries(rec, adaptorLength); - GATKSAMRecord result = (GATKSAMRecord)rec; + Pair adaptorBoundaries = getAdaptorBoundaries(read, adaptorLength); + GATKSAMRecord result = (GATKSAMRecord)read; if ( adaptorBoundaries != null ) { - if ( rec.getReadNegativeStrandFlag() && adaptorBoundaries.second >= rec.getAlignmentStart() && adaptorBoundaries.first < rec.getAlignmentEnd() ) - result = hardClipStartOfRead(rec, adaptorBoundaries.second); - else if ( !rec.getReadNegativeStrandFlag() && adaptorBoundaries.first <= rec.getAlignmentEnd() ) - result = hardClipEndOfRead(rec, adaptorBoundaries.first); + if ( read.getReadNegativeStrandFlag() && adaptorBoundaries.second >= read.getAlignmentStart() && adaptorBoundaries.first < read.getAlignmentEnd() ) + result = hardClipStartOfRead(read, adaptorBoundaries.second); + else if ( !read.getReadNegativeStrandFlag() && adaptorBoundaries.first <= read.getAlignmentEnd() ) + result = hardClipEndOfRead(read, adaptorBoundaries.first); } return result; } // return true if the read needs to be completely clipped - private static GATKSAMRecord hardClipStartOfRead(SAMRecord oldRec, int stopPosition) { + private static GATKSAMRecord hardClipStartOfRead(GATKSAMRecord oldRec, int stopPosition) { if ( stopPosition >= oldRec.getAlignmentEnd() ) { // BAM representation issue -- we can't clip away all bases in a read, just leave it alone and let the filter deal with it - //System.out.printf("Entire read needs to be clipped: %50s %n", rec.getReadName()); + //System.out.printf("Entire read needs to be clipped: %50s %n", read.getReadName()); return null; } - GATKSAMRecord rec; + GATKSAMRecord read; try { - rec = (GATKSAMRecord)oldRec.clone(); + read = (GATKSAMRecord)oldRec.clone(); } catch (Exception e) { return null; } //System.out.printf("Clipping start of read: %50s start=%d adaptorEnd=%d isize=%d %n", - // rec.getReadName(), rec.getAlignmentStart(), stopPosition, rec.getInferredInsertSize()); + // read.getReadName(), read.getAlignmentStart(), stopPosition, read.getInferredInsertSize()); - Cigar oldCigar = rec.getCigar(); + Cigar oldCigar = read.getCigar(); LinkedList newCigarElements = new LinkedList(); - int currentPos = rec.getAlignmentStart(); + int currentPos = read.getAlignmentStart(); int basesToClip = 0; int basesAlreadyClipped = 0; @@ -315,48 +315,48 @@ public class ReadUtils { } // copy over the unclipped bases - final byte[] bases = rec.getReadBases(); - final byte[] quals = rec.getBaseQualities(); + final byte[] bases = read.getReadBases(); + final byte[] quals = read.getBaseQualities(); int newLength = bases.length - basesToClip; byte[] newBases = new byte[newLength]; byte[] newQuals = new byte[newLength]; System.arraycopy(bases, basesToClip, newBases, 0, newLength); System.arraycopy(quals, basesToClip, newQuals, 0, newLength); - rec.setReadBases(newBases); - rec.setBaseQualities(newQuals); + read.setReadBases(newBases); + read.setBaseQualities(newQuals); // now add a CIGAR element for the clipped bases newCigarElements.addFirst(new CigarElement(basesToClip + basesAlreadyClipped, CigarOperator.H)); Cigar newCigar = new Cigar(newCigarElements); - rec.setCigar(newCigar); + read.setCigar(newCigar); // adjust the start accordingly - rec.setAlignmentStart(stopPosition + 1); + read.setAlignmentStart(stopPosition + 1); - return rec; + return read; } - private static GATKSAMRecord hardClipEndOfRead(SAMRecord oldRec, int startPosition) { + private static GATKSAMRecord hardClipEndOfRead(GATKSAMRecord oldRec, int startPosition) { if ( startPosition <= oldRec.getAlignmentStart() ) { // BAM representation issue -- we can't clip away all bases in a read, just leave it alone and let the filter deal with it - //System.out.printf("Entire read needs to be clipped: %50s %n", rec.getReadName()); + //System.out.printf("Entire read needs to be clipped: %50s %n", read.getReadName()); return null; } - GATKSAMRecord rec; + GATKSAMRecord read; try { - rec = (GATKSAMRecord)oldRec.clone(); + read = (GATKSAMRecord)oldRec.clone(); } catch (Exception e) { return null; } //System.out.printf("Clipping end of read: %50s adaptorStart=%d end=%d isize=%d %n", - // rec.getReadName(), startPosition, rec.getAlignmentEnd(), rec.getInferredInsertSize()); + // read.getReadName(), startPosition, read.getAlignmentEnd(), read.getInferredInsertSize()); - Cigar oldCigar = rec.getCigar(); + Cigar oldCigar = read.getCigar(); LinkedList newCigarElements = new LinkedList(); - int currentPos = rec.getAlignmentStart(); + int currentPos = read.getAlignmentStart(); int basesToKeep = 0; int basesAlreadyClipped = 0; @@ -402,41 +402,41 @@ public class ReadUtils { } // copy over the unclipped bases - final byte[] bases = rec.getReadBases(); - final byte[] quals = rec.getBaseQualities(); + final byte[] bases = read.getReadBases(); + final byte[] quals = read.getBaseQualities(); byte[] newBases = new byte[basesToKeep]; byte[] newQuals = new byte[basesToKeep]; System.arraycopy(bases, 0, newBases, 0, basesToKeep); System.arraycopy(quals, 0, newQuals, 0, basesToKeep); - rec.setReadBases(newBases); - rec.setBaseQualities(newQuals); + read.setReadBases(newBases); + read.setBaseQualities(newQuals); // now add a CIGAR element for the clipped bases newCigarElements.add(new CigarElement((bases.length - basesToKeep) + basesAlreadyClipped, CigarOperator.H)); Cigar newCigar = new Cigar(newCigarElements); - rec.setCigar(newCigar); + read.setCigar(newCigar); // adjust the stop accordingly - // rec.setAlignmentEnd(startPosition - 1); + // read.setAlignmentEnd(startPosition - 1); - return rec; + return read; } /** * Hard clips away (i.e.g, removes from the read) bases that were previously soft clipped. * - * @param rec + * @param read * @return */ - @Requires("rec != null") + @Requires("read != null") @Ensures("result != null") - public static SAMRecord hardClipSoftClippedBases(SAMRecord rec) { - List cigarElts = rec.getCigar().getCigarElements(); + public static GATKSAMRecord hardClipSoftClippedBases(GATKSAMRecord read) { + List cigarElts = read.getCigar().getCigarElements(); if ( cigarElts.size() == 1 ) // can't be soft clipped, just return - return rec; + return read; - int keepStart = 0, keepEnd = rec.getReadLength() - 1; + int keepStart = 0, keepEnd = read.getReadLength() - 1; List newCigarElements = new LinkedList(); for ( int i = 0; i < cigarElts.size(); i++ ) { @@ -447,7 +447,7 @@ public class ReadUtils { if ( i == 0 ) keepStart = l; else - keepEnd = rec.getReadLength() - l - 1; + keepEnd = read.getReadLength() - l - 1; newCigarElements.add(new CigarElement(l, CigarOperator.HARD_CLIP)); break; @@ -477,54 +477,54 @@ public class ReadUtils { } mergedCigarElements.add(new CigarElement(currentOperatorLength, currentOperator)); - return hardClipBases(rec, keepStart, keepEnd, mergedCigarElements); + return hardClipBases(read, keepStart, keepEnd, mergedCigarElements); } /** - * Hard clips out the bases in rec, keeping the bases from keepStart to keepEnd, inclusive. Note these + * Hard clips out the bases in read, keeping the bases from keepStart to keepEnd, inclusive. Note these * are offsets, so they are 0 based * - * @param rec + * @param read * @param keepStart * @param keepEnd * @param newCigarElements * @return */ @Requires({ - "rec != null", + "read != null", "keepStart >= 0", - "keepEnd < rec.getReadLength()", - "rec.getReadUnmappedFlag() || newCigarElements != null"}) + "keepEnd < read.getReadLength()", + "read.getReadUnmappedFlag() || newCigarElements != null"}) @Ensures("result != null") - public static SAMRecord hardClipBases(SAMRecord rec, int keepStart, int keepEnd, List newCigarElements) { + public static GATKSAMRecord hardClipBases(GATKSAMRecord read, int keepStart, int keepEnd, List newCigarElements) { int newLength = keepEnd - keepStart + 1; - if ( newLength != rec.getReadLength() ) { + if ( newLength != read.getReadLength() ) { try { - rec = SimplifyingSAMFileWriter.simplifyRead((SAMRecord)rec.clone()); + read = (GATKSAMRecord)read.clone(); // copy over the unclipped bases - final byte[] bases = rec.getReadBases(); - final byte[] quals = rec.getBaseQualities(); + final byte[] bases = read.getReadBases(); + final byte[] quals = read.getBaseQualities(); byte[] newBases = new byte[newLength]; byte[] newQuals = new byte[newLength]; System.arraycopy(bases, keepStart, newBases, 0, newLength); System.arraycopy(quals, keepStart, newQuals, 0, newLength); - rec.setReadBases(newBases); - rec.setBaseQualities(newQuals); + read.setReadBases(newBases); + read.setBaseQualities(newQuals); // now add a CIGAR element for the clipped bases, if the read isn't unmapped - if ( ! rec.getReadUnmappedFlag() ) { + if ( ! read.getReadUnmappedFlag() ) { Cigar newCigar = new Cigar(newCigarElements); - rec.setCigar(newCigar); + read.setCigar(newCigar); } } catch ( CloneNotSupportedException e ) { throw new ReviewedStingException("WTF, where did clone go?", e); } } - return rec; + return read; } - public static SAMRecord replaceSoftClipsWithMatches(SAMRecord read) { + public static GATKSAMRecord replaceSoftClipsWithMatches(GATKSAMRecord read) { List newCigarElements = new ArrayList(); for ( CigarElement ce : read.getCigar().getCigarElements() ) { @@ -561,15 +561,15 @@ public class ReadUtils { /** * - * @param rec original SAM record + * @param read original SAM record * @return a new read with adaptor sequence hard-clipped out or null if read is fully clipped */ - public static GATKSAMRecord hardClipAdaptorSequence(final SAMRecord rec) { - return hardClipAdaptorSequence(rec, DEFAULT_ADAPTOR_SIZE); + public static GATKSAMRecord hardClipAdaptorSequence(final GATKSAMRecord read) { + return hardClipAdaptorSequence(read, DEFAULT_ADAPTOR_SIZE); } - public static OverlapType readPairBaseOverlapType(final SAMRecord rec, long basePos) { - return readPairBaseOverlapType(rec, basePos, DEFAULT_ADAPTOR_SIZE); + public static OverlapType readPairBaseOverlapType(final SAMRecord read, long basePos) { + return readPairBaseOverlapType(read, basePos, DEFAULT_ADAPTOR_SIZE); } public static boolean is454Read(SAMRecord read) { @@ -601,10 +601,10 @@ public class ReadUtils { readFlagNames.put(0x400, "Duplicate"); } - public static String readFlagsAsString(SAMRecord rec) { + public static String readFlagsAsString(GATKSAMRecord read) { String flags = ""; for (int flag : readFlagNames.keySet()) { - if ((rec.getFlags() & flag) != 0) { + if ((read.getFlags() & flag) != 0) { flags += readFlagNames.get(flag) + " "; } } @@ -618,7 +618,7 @@ public class ReadUtils { * @param reads * @return */ - public final static List coordinateSortReads(List reads) { + public final static List coordinateSortReads(List reads) { final SAMRecordComparator comparer = new SAMRecordCoordinateComparator(); Collections.sort(reads, comparer); return reads; @@ -647,7 +647,7 @@ public class ReadUtils { * @param interval the interval * @return the overlap type as described by ReadAndIntervalOverlap enum (see above) */ - public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) { + public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(GATKSAMRecord read, GenomeLoc interval) { int sStart = getRefCoordSoftUnclippedStart(read); int sStop = getRefCoordSoftUnclippedEnd(read); @@ -685,7 +685,7 @@ public class ReadUtils { } @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"}) - public static int getRefCoordSoftUnclippedStart(SAMRecord read) { + public static int getRefCoordSoftUnclippedStart(GATKSAMRecord read) { int start = read.getUnclippedStart(); for (CigarElement cigarElement : read.getCigar().getCigarElements()) { if (cigarElement.getOperator() == CigarOperator.HARD_CLIP) @@ -697,7 +697,7 @@ public class ReadUtils { } @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"}) - public static int getRefCoordSoftUnclippedEnd(SAMRecord read) { + public static int getRefCoordSoftUnclippedEnd(GATKSAMRecord read) { int stop = read.getUnclippedStart(); if (readIsEntirelyInsertion(read)) @@ -716,7 +716,7 @@ public class ReadUtils { return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ; } - private static boolean readIsEntirelyInsertion(SAMRecord read) { + private static boolean readIsEntirelyInsertion(GATKSAMRecord read) { for (CigarElement cigarElement : read.getCigar().getCigarElements()) { if (cigarElement.getOperator() != CigarOperator.INSERTION) return false; @@ -730,7 +730,7 @@ public class ReadUtils { } /** - * Pre-processes the results of getReadCoordinateForReferenceCoordinate(SAMRecord, int) in case it falls in + * Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) in case it falls in * a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns * the base prior to the deletion. If clipping the right tail (end of the read) returns the base after the * deletion. @@ -742,7 +742,7 @@ public class ReadUtils { */ @Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"}) @Ensures({"result >= 0", "result < read.getReadLength()"}) - public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord, ClippingTail tail) { + public static int getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord, ClippingTail tail) { Pair result = getReadCoordinateForReferenceCoordinate(read, refCoord); int readCoord = result.getFirst(); @@ -760,7 +760,7 @@ public class ReadUtils { * Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use when faced with * a deletion. * - * SUGGESTION: Use getReadCoordinateForReferenceCoordinate(SAMRecord, int, ClippingTail) instead to get a + * SUGGESTION: Use getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int, ClippingTail) instead to get a * pre-processed result according to normal clipping needs. Or you can use this function and tailor the * behavior to your needs. * @@ -770,7 +770,7 @@ public class ReadUtils { */ @Requires({"refCoord >= getRefCoordSoftUnclippedStart(read)", "refCoord <= getRefCoordSoftUnclippedEnd(read)"}) @Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"}) - public static Pair getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) { + public static Pair getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord) { int readBases = 0; int refBases = 0; boolean fallsInsideDeletion = false; @@ -851,13 +851,13 @@ public class ReadUtils { return new Pair(readBases, fallsInsideDeletion); } - public static SAMRecord unclipSoftClippedBases(SAMRecord rec) { - int newReadStart = rec.getAlignmentStart(); - int newReadEnd = rec.getAlignmentEnd(); - List newCigarElements = new ArrayList(rec.getCigar().getCigarElements().size()); + public static GATKSAMRecord unclipSoftClippedBases(GATKSAMRecord read) { + int newReadStart = read.getAlignmentStart(); + int newReadEnd = read.getAlignmentEnd(); + List newCigarElements = new ArrayList(read.getCigar().getCigarElements().size()); int heldOver = -1; boolean sSeen = false; - for ( CigarElement e : rec.getCigar().getCigarElements() ) { + for ( CigarElement e : read.getCigar().getCigarElements() ) { if ( e.getOperator().equals(CigarOperator.S) ) { newCigarElements.add(new CigarElement(e.getLength(),CigarOperator.M)); if ( sSeen ) { @@ -872,7 +872,7 @@ public class ReadUtils { } // merge duplicate operators together int idx = 0; - List finalCigarElements = new ArrayList(rec.getCigar().getCigarElements().size()); + List finalCigarElements = new ArrayList(read.getCigar().getCigarElements().size()); while ( idx < newCigarElements.size() -1 ) { if ( newCigarElements.get(idx).getOperator().equals(newCigarElements.get(idx+1).getOperator()) ) { int combSize = newCigarElements.get(idx).getLength(); @@ -889,10 +889,10 @@ public class ReadUtils { idx++; } - rec.setCigar(new Cigar(finalCigarElements)); - rec.setAlignmentStart(newReadStart); + read.setCigar(new Cigar(finalCigarElements)); + read.setAlignmentStart(newReadStart); - return rec; + return read; } /** @@ -905,7 +905,7 @@ public class ReadUtils { @Requires({"read1 != null", "read2 != null"}) @Ensures("result == 0 || result == 1 || result == -1") - public static int compareSAMRecords(SAMRecord read1, SAMRecord read2) { + public static int compareSAMRecords(GATKSAMRecord read1, GATKSAMRecord read2) { AlignmentStartComparator comp = new AlignmentStartComparator(); return comp.compare(read1, read2); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewUnitTest.java index 9807cede4..ecb865f0c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewUnitTest.java @@ -1,11 +1,10 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import net.sf.samtools.SAMRecord; import java.util.List; /** @@ -38,7 +37,7 @@ public class AllLocusViewUnitTest extends LocusViewTemplate { * @param reads */ @Override - protected void testReadsInContext( LocusView view, List range, List reads ) { + protected void testReadsInContext( LocusView view, List range, List reads ) { AllLocusView allLocusView = (AllLocusView)view; // TODO: Should skip over loci not in the given range. @@ -52,7 +51,7 @@ public class AllLocusViewUnitTest extends LocusViewTemplate { Assert.assertEquals(locusContext.getLocation(), site, "Locus context location is incorrect"); int expectedReadsAtSite = 0; - for( SAMRecord read: reads ) { + for( GATKSAMRecord read: reads ) { if(genomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) { Assert.assertTrue(locusContext.getReads().contains(read),"Target locus context does not contain reads"); expectedReadsAtSite++; diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewUnitTest.java index 75716eae6..3a0caef51 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewUnitTest.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import net.sf.samtools.SAMRecord; import java.util.List; /** @@ -41,7 +41,7 @@ public class CoveredLocusViewUnitTest extends LocusViewTemplate { * @param reads */ @Override - protected void testReadsInContext( LocusView view, List range, List reads ) { + protected void testReadsInContext( LocusView view, List range, List reads ) { CoveredLocusView coveredLocusView = (CoveredLocusView)view; // TODO: Should skip over loci not in the given range. @@ -53,7 +53,7 @@ public class CoveredLocusViewUnitTest extends LocusViewTemplate { GenomeLoc site = genomeLocParser.createGenomeLoc("chr1",i); int expectedReadsAtSite = 0; - for( SAMRecord read: reads ) { + for( GATKSAMRecord read: reads ) { if( genomeLocParser.createGenomeLoc(read).containsP(site) ) expectedReadsAtSite++; } @@ -67,7 +67,7 @@ public class CoveredLocusViewUnitTest extends LocusViewTemplate { Assert.assertEquals(locusContext.getLocation(), site, "Target locus context location is incorrect"); Assert.assertEquals(locusContext.getReads().size(), expectedReadsAtSite, "Found wrong number of reads at site"); - for( SAMRecord read: reads ) { + for( GATKSAMRecord read: reads ) { if(genomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) Assert.assertTrue(locusContext.getReads().contains(read),"Target locus context does not contain reads"); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java index 2adb4864c..8d7dd82ac 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java @@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -55,12 +56,12 @@ public abstract class LocusViewTemplate extends BaseTest { LocusView view = createView(dataProvider); - testReadsInContext(view, shard.getGenomeLocs(), Collections.emptyList()); + testReadsInContext(view, shard.getGenomeLocs(), Collections.emptyList()); } @Test public void singleReadTest() { - SAMRecord read = buildSAMRecord("chr1", 1, 5); + GATKSAMRecord read = buildSAMRecord("read1","chr1", 1, 5); SAMRecordIterator iterator = new SAMRecordIterator(read); GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5); @@ -76,7 +77,7 @@ public abstract class LocusViewTemplate extends BaseTest { @Test public void readCoveringFirstPartTest() { - SAMRecord read = buildSAMRecord("chr1", 1, 5); + GATKSAMRecord read = buildSAMRecord("read1","chr1", 1, 5); SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); @@ -90,7 +91,7 @@ public abstract class LocusViewTemplate extends BaseTest { @Test public void readCoveringLastPartTest() { - SAMRecord read = buildSAMRecord("chr1", 6, 10); + GATKSAMRecord read = buildSAMRecord("read1","chr1", 6, 10); SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); @@ -104,7 +105,7 @@ public abstract class LocusViewTemplate extends BaseTest { @Test public void readCoveringMiddleTest() { - SAMRecord read = buildSAMRecord("chr1", 3, 7); + GATKSAMRecord read = buildSAMRecord("read1","chr1", 3, 7); SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); @@ -118,7 +119,7 @@ public abstract class LocusViewTemplate extends BaseTest { @Test public void readAndLocusOverlapAtLastBase() { - SAMRecord read = buildSAMRecord("chr1", 1, 5); + GATKSAMRecord read = buildSAMRecord("read1","chr1", 1, 5); SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 5, 5))); @@ -132,7 +133,7 @@ public abstract class LocusViewTemplate extends BaseTest { @Test public void readOverlappingStartTest() { - SAMRecord read = buildSAMRecord("chr1", 1, 10); + GATKSAMRecord read = buildSAMRecord("read1","chr1", 1, 10); SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 6, 15))); @@ -146,7 +147,7 @@ public abstract class LocusViewTemplate extends BaseTest { @Test public void readOverlappingEndTest() { - SAMRecord read = buildSAMRecord("chr1", 6, 15); + GATKSAMRecord read = buildSAMRecord("read1","chr1", 6, 15); SAMRecordIterator iterator = new SAMRecordIterator(read); Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); @@ -160,8 +161,8 @@ public abstract class LocusViewTemplate extends BaseTest { @Test public void readsSpanningTest() { - SAMRecord read1 = buildSAMRecord("chr1", 1, 5); - SAMRecord read2 = buildSAMRecord("chr1", 6, 10); + GATKSAMRecord read1 = buildSAMRecord("read1","chr1", 1, 5); + GATKSAMRecord read2 = buildSAMRecord("read2","chr1", 6, 10); SAMRecordIterator iterator = new SAMRecordIterator(read1, read2); Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); @@ -170,17 +171,17 @@ public abstract class LocusViewTemplate extends BaseTest { LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); - List expectedReads = new ArrayList(); + List expectedReads = new ArrayList(); Collections.addAll(expectedReads, read1, read2); testReadsInContext(view, shard.getGenomeLocs(), expectedReads); } @Test public void duplicateReadsTest() { - SAMRecord read1 = buildSAMRecord("chr1", 1, 5); - SAMRecord read2 = buildSAMRecord("chr1", 1, 5); - SAMRecord read3 = buildSAMRecord("chr1", 6, 10); - SAMRecord read4 = buildSAMRecord("chr1", 6, 10); + GATKSAMRecord read1 = buildSAMRecord("read1","chr1", 1, 5); + GATKSAMRecord read2 = buildSAMRecord("read2","chr1", 1, 5); + GATKSAMRecord read3 = buildSAMRecord("read3","chr1", 6, 10); + GATKSAMRecord read4 = buildSAMRecord("read4","chr1", 6, 10); SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4); Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); @@ -189,17 +190,17 @@ public abstract class LocusViewTemplate extends BaseTest { LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); - List expectedReads = new ArrayList(); + List expectedReads = new ArrayList(); Collections.addAll(expectedReads, read1, read2, read3, read4); testReadsInContext(view, shard.getGenomeLocs(), expectedReads); } @Test public void cascadingReadsWithinBoundsTest() { - SAMRecord read1 = buildSAMRecord("chr1", 2, 6); - SAMRecord read2 = buildSAMRecord("chr1", 3, 7); - SAMRecord read3 = buildSAMRecord("chr1", 4, 8); - SAMRecord read4 = buildSAMRecord("chr1", 5, 9); + GATKSAMRecord read1 = buildSAMRecord("read1","chr1", 2, 6); + GATKSAMRecord read2 = buildSAMRecord("read2","chr1", 3, 7); + GATKSAMRecord read3 = buildSAMRecord("read3","chr1", 4, 8); + GATKSAMRecord read4 = buildSAMRecord("read4","chr1", 5, 9); SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4); Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); @@ -208,19 +209,19 @@ public abstract class LocusViewTemplate extends BaseTest { LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); - List expectedReads = new ArrayList(); + List expectedReads = new ArrayList(); Collections.addAll(expectedReads, read1, read2, read3, read4); testReadsInContext(view, shard.getGenomeLocs(), expectedReads); } @Test public void cascadingReadsAtBoundsTest() { - SAMRecord read1 = buildSAMRecord("chr1", 1, 5); - SAMRecord read2 = buildSAMRecord("chr1", 2, 6); - SAMRecord read3 = buildSAMRecord("chr1", 3, 7); - SAMRecord read4 = buildSAMRecord("chr1", 4, 8); - SAMRecord read5 = buildSAMRecord("chr1", 5, 9); - SAMRecord read6 = buildSAMRecord("chr1", 6, 10); + GATKSAMRecord read1 = buildSAMRecord("read1","chr1", 1, 5); + GATKSAMRecord read2 = buildSAMRecord("read2","chr1", 2, 6); + GATKSAMRecord read3 = buildSAMRecord("read3","chr1", 3, 7); + GATKSAMRecord read4 = buildSAMRecord("read4","chr1", 4, 8); + GATKSAMRecord read5 = buildSAMRecord("read5","chr1", 5, 9); + GATKSAMRecord read6 = buildSAMRecord("read6","chr1", 6, 10); SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4, read5, read6); Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); @@ -229,25 +230,25 @@ public abstract class LocusViewTemplate extends BaseTest { LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); - List expectedReads = new ArrayList(); + List expectedReads = new ArrayList(); Collections.addAll(expectedReads, read1, read2, read3, read4, read5, read6); testReadsInContext(view, shard.getGenomeLocs(), expectedReads); } @Test public void cascadingReadsOverlappingBoundsTest() { - SAMRecord read01 = buildSAMRecord("chr1", 1, 5); - SAMRecord read02 = buildSAMRecord("chr1", 2, 6); - SAMRecord read03 = buildSAMRecord("chr1", 3, 7); - SAMRecord read04 = buildSAMRecord("chr1", 4, 8); - SAMRecord read05 = buildSAMRecord("chr1", 5, 9); - SAMRecord read06 = buildSAMRecord("chr1", 6, 10); - SAMRecord read07 = buildSAMRecord("chr1", 7, 11); - SAMRecord read08 = buildSAMRecord("chr1", 8, 12); - SAMRecord read09 = buildSAMRecord("chr1", 9, 13); - SAMRecord read10 = buildSAMRecord("chr1", 10, 14); - SAMRecord read11 = buildSAMRecord("chr1", 11, 15); - SAMRecord read12 = buildSAMRecord("chr1", 12, 16); + GATKSAMRecord read01 = buildSAMRecord("read1","chr1", 1, 5); + GATKSAMRecord read02 = buildSAMRecord("read2","chr1", 2, 6); + GATKSAMRecord read03 = buildSAMRecord("read3","chr1", 3, 7); + GATKSAMRecord read04 = buildSAMRecord("read4","chr1", 4, 8); + GATKSAMRecord read05 = buildSAMRecord("read5","chr1", 5, 9); + GATKSAMRecord read06 = buildSAMRecord("read6","chr1", 6, 10); + GATKSAMRecord read07 = buildSAMRecord("read7","chr1", 7, 11); + GATKSAMRecord read08 = buildSAMRecord("read8","chr1", 8, 12); + GATKSAMRecord read09 = buildSAMRecord("read9","chr1", 9, 13); + GATKSAMRecord read10 = buildSAMRecord("read10","chr1", 10, 14); + GATKSAMRecord read11 = buildSAMRecord("read11","chr1", 11, 15); + GATKSAMRecord read12 = buildSAMRecord("read12","chr1", 12, 16); SAMRecordIterator iterator = new SAMRecordIterator(read01, read02, read03, read04, read05, read06, read07, read08, read09, read10, read11, read12); @@ -257,7 +258,7 @@ public abstract class LocusViewTemplate extends BaseTest { LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); - List expectedReads = new ArrayList(); + List expectedReads = new ArrayList(); Collections.addAll(expectedReads, read01, read02, read03, read04, read05, read06, read07, read08, read09, read10, read11, read12); testReadsInContext(view, shard.getGenomeLocs(), expectedReads); @@ -277,7 +278,7 @@ public abstract class LocusViewTemplate extends BaseTest { * @param bounds * @param reads */ - protected abstract void testReadsInContext(LocusView view, List bounds, List reads); + protected abstract void testReadsInContext(LocusView view, List bounds, List reads); /** * Fake a reference sequence file. Essentially, seek a header with a bunch of dummy data. @@ -321,12 +322,13 @@ public abstract class LocusViewTemplate extends BaseTest { * * @return New SAM Record */ - protected SAMRecord buildSAMRecord(String contig, int alignmentStart, int alignmentEnd) { + protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) { SAMFileHeader header = new SAMFileHeader(); header.setSequenceDictionary(sequenceSourceFile.getSequenceDictionary()); - SAMRecord record = new SAMRecord(header); + GATKSAMRecord record = new GATKSAMRecord(header); + record.setReadName(readName); record.setReferenceIndex(sequenceSourceFile.getSequenceDictionary().getSequenceIndex(contig)); record.setAlignmentStart(alignmentStart); Cigar cigar = new Cigar(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/GATKWalkerBenchmark.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/GATKWalkerBenchmark.java index 31458f835..564d1e2a3 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/GATKWalkerBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/GATKWalkerBenchmark.java @@ -25,13 +25,10 @@ package org.broadinstitute.sting.gatk.datasources.reads; import com.google.caliper.Param; -import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; @@ -41,9 +38,9 @@ import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.qc.CountLociWalker; import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker; import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.File; -import java.lang.reflect.Field; import java.util.Collections; /** @@ -126,7 +123,7 @@ class CountBasesInReadPerformanceWalker extends ReadWalker { private long Gs; private long Ts; - public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) { + public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker tracker) { for(byte base: read.getReadBases()) { switch(base) { case 'A': As++; break; diff --git a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java index 97d01bf0a..c9727d904 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java @@ -7,6 +7,7 @@ import net.sf.samtools.util.CloseableIterator; import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.ReadProperties; @@ -38,8 +39,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest { } private final LocusIteratorByState makeLTBS(List reads, ReadProperties readAttributes) { - return new LocusIteratorByState(new FakeCloseableIterator(reads.iterator()), - readAttributes, genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups()); + return new LocusIteratorByState(new FakeCloseableIterator(reads.iterator()), readAttributes, genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups()); } @Test @@ -212,12 +212,12 @@ public class LocusIteratorByStateUnitTest extends BaseTest { Assert.assertEquals(alignmentContext.getLocation().getStart(),currentLocus,"Current locus returned by alignment context is incorrect"); if(currentLocus == firstLocus) { - List readsAtLocus = alignmentContext.getBasePileup().getReads(); + List readsAtLocus = alignmentContext.getBasePileup().getReads(); Assert.assertEquals(readsAtLocus.size(),1,"Wrong number of reads at locus " + currentLocus); Assert.assertSame(readsAtLocus.get(0),leadingRead,"leadingRead absent from pileup at locus " + currentLocus); } else if(currentLocus == secondLocus) { - List readsAtLocus = alignmentContext.getBasePileup().getReads(); + List readsAtLocus = alignmentContext.getBasePileup().getReads(); Assert.assertEquals(readsAtLocus.size(),2,"Wrong number of reads at locus " + currentLocus); Assert.assertSame(readsAtLocus.get(0),indelOnlyRead,"indelOnlyRead absent from pileup at locus " + currentLocus); Assert.assertSame(readsAtLocus.get(1),fullMatchAfterIndel,"fullMatchAfterIndel absent from pileup at locus " + currentLocus); @@ -265,7 +265,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest { Assert.assertTrue(li.hasNext(),"Missing first locus at " + firstLocus); AlignmentContext alignmentContext = li.next(); Assert.assertEquals(alignmentContext.getLocation().getStart(),firstLocus,"Incorrect locus at this position; should be " + firstLocus); - List readsAtLocus = alignmentContext.getBasePileup().getReads(); + List readsAtLocus = alignmentContext.getBasePileup().getReads(); Assert.assertEquals(readsAtLocus.size(),1,"Wrong number of reads at locus " + firstLocus); Assert.assertSame(readsAtLocus.get(0),leadingRead,"leadingRead absent from pileup at locus " + firstLocus); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java index 5990f1a06..8cd10048a 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java @@ -10,6 +10,8 @@ import net.sf.samtools.SAMFileHeader; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; + +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -94,8 +96,8 @@ public class PrintReadsWalkerUnitTest extends BaseTest { walker.out = writer; SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000); - SAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50); - SAMRecord ret = walker.map(bases, rec,null); + GATKSAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50); + SAMRecord ret = walker.map(bases, rec, null); assertTrue(ret == rec); assertTrue(ret.getReadName().equals(rec.getReadName())); } diff --git a/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java index 4471202c7..bc39d714e 100755 --- a/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java @@ -32,7 +32,7 @@ public class ReadUtilsUnitTest extends BaseTest { reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_QUALITY_TAG, REDUCED_READ_COUNTS); } - private void testReadBasesAndQuals(SAMRecord read, int expectedStart, int expectedStop) { + private void testReadBasesAndQuals(GATKSAMRecord read, int expectedStart, int expectedStop) { SAMRecord clipped = ReadUtils.hardClipBases(read, expectedStart, expectedStop - 1, null); String expectedBases = BASES.substring(expectedStart, expectedStop); String expectedQuals = QUALS.substring(expectedStart, expectedStop); diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java index cd0de27f5..f625af23c 100644 --- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java @@ -26,9 +26,9 @@ package org.broadinstitute.sting.utils.clipreads; import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -44,7 +44,7 @@ public class ReadClipperUnitTest extends BaseTest { // TODO: Add error messages on failed tests - SAMRecord read, expected; + GATKSAMRecord read, expected; ReadClipper readClipper; final static String BASES = "ACTG"; final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63 @@ -65,7 +65,7 @@ public class ReadClipperUnitTest extends BaseTest { logger.warn("Executing testHardClipBothEndsByReferenceCoordinates"); //Clip whole read - Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(0,0), new SAMRecord(read.getHeader())); + Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(0,0), new GATKSAMRecord(read.getHeader())); //clip 1 base expected = readClipper.hardClipBothEndsByReferenceCoordinates(0,3); Assert.assertEquals(expected.getReadBases(), BASES.substring(1,3).getBytes()); @@ -79,7 +79,7 @@ public class ReadClipperUnitTest extends BaseTest { logger.warn("Executing testHardClipByReadCoordinates"); //Clip whole read - Assert.assertEquals(readClipper.hardClipByReadCoordinates(0,3), new SAMRecord(read.getHeader())); + Assert.assertEquals(readClipper.hardClipByReadCoordinates(0,3), new GATKSAMRecord(read.getHeader())); //clip 1 base at start expected = readClipper.hardClipByReadCoordinates(0,0); @@ -112,7 +112,7 @@ public class ReadClipperUnitTest extends BaseTest { logger.warn("Executing testHardClipByReferenceCoordinates"); //Clip whole read - Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(1,4), new SAMRecord(read.getHeader())); + Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(1,4), new GATKSAMRecord(read.getHeader())); //clip 1 base at start expected = readClipper.hardClipByReferenceCoordinates(-1,1); @@ -145,7 +145,7 @@ public class ReadClipperUnitTest extends BaseTest { logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail"); //Clip whole read - Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(4), new SAMRecord(read.getHeader())); + Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(4), new GATKSAMRecord(read.getHeader())); //clip 1 base at start expected = readClipper.hardClipByReferenceCoordinatesLeftTail(1); @@ -166,7 +166,7 @@ public class ReadClipperUnitTest extends BaseTest { logger.warn("Executing testHardClipByReferenceCoordinatesRightTail"); //Clip whole read - Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(1), new SAMRecord(read.getHeader())); + Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(1), new GATKSAMRecord(read.getHeader())); //clip 1 base at end expected = readClipper.hardClipByReferenceCoordinatesRightTail(3); @@ -188,7 +188,7 @@ public class ReadClipperUnitTest extends BaseTest { //Clip whole read - Assert.assertEquals(readClipper.hardClipLowQualEnds((byte)64), new SAMRecord(read.getHeader())); + Assert.assertEquals(readClipper.hardClipLowQualEnds((byte)64), new GATKSAMRecord(read.getHeader())); //clip 1 base at start expected = readClipper.hardClipLowQualEnds((byte)34); diff --git a/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java index b4e6ad0ad..cbe580809 100644 --- a/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java @@ -25,12 +25,12 @@ package org.broadinstitute.sting.utils.fragments; import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; import org.testng.annotations.BeforeTest; import org.testng.annotations.DataProvider; @@ -52,16 +52,16 @@ public class FragmentUtilsUnitTest extends BaseTest { boolean leftIsFirst, boolean leftIsNegative) { super(FragmentUtilsTest.class, String.format("%s-leftIsFirst:%b-leftIsNegative:%b", name, leftIsFirst, leftIsNegative)); - List pair = ArtificialSAMUtils.createPair(header, "readpair", readLen, leftStart, rightStart, leftIsFirst, leftIsNegative); - SAMRecord left = pair.get(0); - SAMRecord right = pair.get(1); + List pair = ArtificialSAMUtils.createPair(header, "readpair", readLen, leftStart, rightStart, leftIsFirst, leftIsNegative); + GATKSAMRecord left = pair.get(0); + GATKSAMRecord right = pair.get(1); for ( int pos = leftStart; pos < rightStart + readLen; pos++) { boolean posCoveredByLeft = pos >= left.getAlignmentStart() && pos <= left.getAlignmentEnd(); boolean posCoveredByRight = pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd(); if ( posCoveredByLeft || posCoveredByRight ) { - List reads = new ArrayList(); + List reads = new ArrayList(); List offsets = new ArrayList(); if ( posCoveredByLeft ) { @@ -89,9 +89,9 @@ public class FragmentUtilsUnitTest extends BaseTest { private class TestState { int expectedSingletons, expectedPairs; ReadBackedPileup pileup; - List rawReads; + List rawReads; - private TestState(final int expectedSingletons, final int expectedPairs, final ReadBackedPileup pileup, final List rawReads) { + private TestState(final int expectedSingletons, final int expectedPairs, final ReadBackedPileup pileup, final List rawReads) { this.expectedSingletons = expectedSingletons; this.expectedPairs = expectedPairs; this.pileup = pileup; @@ -131,7 +131,7 @@ public class FragmentUtilsUnitTest extends BaseTest { @Test(enabled = true, dataProvider = "fragmentUtilsTest") public void testAsListOfReadsFromPileup(FragmentUtilsTest test) { for ( TestState testState : test.statesForPileup ) { - FragmentCollection fp = FragmentUtils.create(testState.pileup.getReads()); + FragmentCollection fp = FragmentUtils.create(testState.pileup.getReads()); Assert.assertEquals(fp.getOverlappingPairs().size(), testState.expectedPairs); Assert.assertEquals(fp.getSingletonReads().size(), testState.expectedSingletons); } @@ -140,7 +140,7 @@ public class FragmentUtilsUnitTest extends BaseTest { @Test(enabled = true, dataProvider = "fragmentUtilsTest") public void testAsListOfReads(FragmentUtilsTest test) { for ( TestState testState : test.statesForReads ) { - FragmentCollection fp = FragmentUtils.create(testState.rawReads); + FragmentCollection fp = FragmentUtils.create(testState.rawReads); Assert.assertEquals(fp.getOverlappingPairs().size(), testState.expectedPairs); Assert.assertEquals(fp.getSingletonReads().size(), testState.expectedSingletons); } @@ -148,10 +148,10 @@ public class FragmentUtilsUnitTest extends BaseTest { @Test(enabled = true, expectedExceptions = IllegalArgumentException.class) public void testOutOfOrder() { - final List pair = ArtificialSAMUtils.createPair(header, "readpair", 100, 1, 50, true, true); - final SAMRecord left = pair.get(0); - final SAMRecord right = pair.get(1); - final List reads = Arrays.asList(right, left); // OUT OF ORDER! + final List pair = ArtificialSAMUtils.createPair(header, "readpair", 100, 1, 50, true, true); + final GATKSAMRecord left = pair.get(0); + final GATKSAMRecord right = pair.get(1); + final List reads = Arrays.asList(right, left); // OUT OF ORDER! final List offsets = Arrays.asList(0, 50); final ReadBackedPileup pileup = new ReadBackedPileupImpl(null, reads, offsets); FragmentUtils.create(pileup); // should throw exception diff --git a/public/java/test/org/broadinstitute/sting/utils/pileup/ReadBackedPileupUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/pileup/ReadBackedPileupUnitTest.java index 564c987e5..6e955289c 100644 --- a/public/java/test/org/broadinstitute/sting/utils/pileup/ReadBackedPileupUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/pileup/ReadBackedPileupUnitTest.java @@ -26,7 +26,7 @@ package org.broadinstitute.sting.utils.pileup; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMReadGroupRecord; -import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; @@ -50,27 +50,25 @@ public class ReadBackedPileupUnitTest { header.addReadGroup(readGroupOne); header.addReadGroup(readGroupTwo); - SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10); + GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10); read1.setAttribute("RG",readGroupOne.getId()); - SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10); + GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10); read2.setAttribute("RG",readGroupTwo.getId()); - SAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,10); + GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,10); read3.setAttribute("RG",readGroupOne.getId()); - SAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,10); + GATKSAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,10); read4.setAttribute("RG",readGroupTwo.getId()); - SAMRecord read5 = ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,10); + GATKSAMRecord read5 = ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,10); read5.setAttribute("RG",readGroupTwo.getId()); - SAMRecord read6 = ArtificialSAMUtils.createArtificialRead(header,"read6",0,1,10); + GATKSAMRecord read6 = ArtificialSAMUtils.createArtificialRead(header,"read6",0,1,10); read6.setAttribute("RG",readGroupOne.getId()); - SAMRecord read7 = ArtificialSAMUtils.createArtificialRead(header,"read7",0,1,10); + GATKSAMRecord read7 = ArtificialSAMUtils.createArtificialRead(header,"read7",0,1,10); read7.setAttribute("RG",readGroupOne.getId()); - ReadBackedPileup pileup = new ReadBackedPileupImpl(null, - Arrays.asList(read1,read2,read3,read4,read5,read6,read7), - Arrays.asList(1,1,1,1,1,1,1)); + ReadBackedPileup pileup = new ReadBackedPileupImpl(null, Arrays.asList(read1,read2,read3,read4,read5,read6,read7), Arrays.asList(1,1,1,1,1,1,1)); ReadBackedPileup rg1Pileup = pileup.getPileupForReadGroup("rg1"); - List rg1Reads = rg1Pileup.getReads(); + List rg1Reads = rg1Pileup.getReads(); Assert.assertEquals(rg1Reads.size(), 4, "Wrong number of reads in read group rg1"); Assert.assertEquals(rg1Reads.get(0), read1, "Read " + read1.getReadName() + " should be in rg1 but isn't"); Assert.assertEquals(rg1Reads.get(1), read3, "Read " + read3.getReadName() + " should be in rg1 but isn't"); @@ -78,7 +76,7 @@ public class ReadBackedPileupUnitTest { Assert.assertEquals(rg1Reads.get(3), read7, "Read " + read7.getReadName() + " should be in rg1 but isn't"); ReadBackedPileup rg2Pileup = pileup.getPileupForReadGroup("rg2"); - List rg2Reads = rg2Pileup.getReads(); + List rg2Reads = rg2Pileup.getReads(); Assert.assertEquals(rg2Reads.size(), 3, "Wrong number of reads in read group rg2"); Assert.assertEquals(rg2Reads.get(0), read2, "Read " + read2.getReadName() + " should be in rg2 but isn't"); Assert.assertEquals(rg2Reads.get(1), read4, "Read " + read4.getReadName() + " should be in rg2 but isn't"); @@ -92,16 +90,16 @@ public class ReadBackedPileupUnitTest { public void testSplitByNullReadGroups() { SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1,1,1000); - SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10); - SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10); - SAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,10); + GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10); + GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10); + GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,10); ReadBackedPileup pileup = new ReadBackedPileupImpl(null, Arrays.asList(read1,read2,read3), Arrays.asList(1,1,1)); ReadBackedPileup nullRgPileup = pileup.getPileupForReadGroup(null); - List nullRgReads = nullRgPileup.getReads(); + List nullRgReads = nullRgPileup.getReads(); Assert.assertEquals(nullRgPileup.getNumberOfElements(), 3, "Wrong number of reads in null read group"); Assert.assertEquals(nullRgReads.get(0), read1, "Read " + read1.getReadName() + " should be in null rg but isn't"); Assert.assertEquals(nullRgReads.get(1), read2, "Read " + read2.getReadName() + " should be in null rg but isn't"); @@ -125,13 +123,13 @@ public class ReadBackedPileupUnitTest { header.addReadGroup(readGroupOne); header.addReadGroup(readGroupTwo); - SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10); + GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10); read1.setAttribute("RG",readGroupOne.getId()); - SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10); + GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10); read2.setAttribute("RG",readGroupTwo.getId()); - SAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,10); + GATKSAMRecord read3 = ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,10); read3.setAttribute("RG",readGroupOne.getId()); - SAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,10); + GATKSAMRecord read4 = ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,10); read4.setAttribute("RG",readGroupTwo.getId()); ReadBackedPileupImpl sample1Pileup = new ReadBackedPileupImpl(null, @@ -147,14 +145,14 @@ public class ReadBackedPileupUnitTest { ReadBackedPileup compositePileup = new ReadBackedPileupImpl(null,sampleToPileupMap); ReadBackedPileup rg1Pileup = compositePileup.getPileupForReadGroup("rg1"); - List rg1Reads = rg1Pileup.getReads(); + List rg1Reads = rg1Pileup.getReads(); Assert.assertEquals(rg1Reads.size(), 2, "Wrong number of reads in read group rg1"); Assert.assertEquals(rg1Reads.get(0), read1, "Read " + read1.getReadName() + " should be in rg1 but isn't"); Assert.assertEquals(rg1Reads.get(1), read3, "Read " + read3.getReadName() + " should be in rg1 but isn't"); ReadBackedPileup rg2Pileup = compositePileup.getPileupForReadGroup("rg2"); - List rg2Reads = rg2Pileup.getReads(); + List rg2Reads = rg2Pileup.getReads(); Assert.assertEquals(rg1Reads.size(), 2, "Wrong number of reads in read group rg2"); Assert.assertEquals(rg2Reads.get(0), read2, "Read " + read2.getReadName() + " should be in rg2 but isn't"); @@ -175,9 +173,9 @@ public class ReadBackedPileupUnitTest { header.addReadGroup(readGroupOne); header.addReadGroup(readGroupTwo); - SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10); + GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,10); read1.setAttribute("RG",readGroupOne.getId()); - SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10); + GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10); read2.setAttribute("RG",readGroupTwo.getId()); Map sampleToPileupMap = new HashMap();