Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2011-10-25 22:20:53 -04:00
commit be4eb83041
22 changed files with 795 additions and 556 deletions

18
.gitignore vendored 100644
View File

@ -0,0 +1,18 @@
/*.bam
/*.bai
/*.bed
*.idx
*~
/*.vcf
/*.txt
/*.csh
/.*
/*.pdf
/*.eval
*.ipr
*.iws
queueScatterGather
/foo*
/bar*
integrationtests/
public/testdata/onTheFlyOutputTest.vcf

View File

@ -43,6 +43,7 @@ import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.baq.BAQSamIterator;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
import java.io.File;
import java.lang.reflect.InvocationTargetException;
@ -57,6 +58,8 @@ import java.util.*;
* Converts shards to SAM iterators over the specified region
*/
public class SAMDataSource {
final private static GATKSamRecordFactory factory = new GATKSamRecordFactory();
/** Backing support for reads. */
protected final ReadProperties readProperties;
@ -644,7 +647,9 @@ public class SAMDataSource {
BAQ.QualityMode qmode,
IndexedFastaSequenceFile refReader,
byte defaultBaseQualities) {
wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
if ( useOriginalBaseQualities || defaultBaseQualities >= 0 )
// only wrap if we are replacing the original qualitiies or using a default base quality
wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
// NOTE: this (and other filtering) should be done before on-the-fly sorting
// as there is no reason to sort something that we will end of throwing away
@ -756,6 +761,7 @@ public class SAMDataSource {
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
for(SAMReaderID readerID: readerIDs) {
SAMFileReader reader = new SAMFileReader(readerID.samFile);
reader.setSAMRecordFactory(factory);
reader.enableFileSource(true);
reader.enableIndexMemoryMapping(false);
if(!enableLowMemorySharding)

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
* An iterator which does post-processing of a read, including potentially wrapping
@ -78,7 +77,30 @@ public class ReadFormattingIterator implements StingSAMIterator {
* no next exists.
*/
public SAMRecord next() {
return new GATKSAMRecord(wrappedIterator.next(), useOriginalBaseQualities, defaultBaseQualities);
SAMRecord rec = wrappedIterator.next();
// if we are using default quals, check if we need them, and add if necessary.
// 1. we need if reads are lacking or have incomplete quality scores
// 2. we add if defaultBaseQualities has a positive value
if (defaultBaseQualities >= 0) {
byte reads [] = rec.getReadBases();
byte quals [] = rec.getBaseQualities();
if (quals == null || quals.length < reads.length) {
byte new_quals [] = new byte [reads.length];
for (int i=0; i<reads.length; i++)
new_quals[i] = defaultBaseQualities;
rec.setBaseQualities(new_quals);
}
}
// if we are using original quals, set them now if they are present in the record
if ( useOriginalBaseQualities ) {
byte[] originalQuals = rec.getOriginalBaseQualities();
if ( originalQuals != null )
rec.setBaseQualities(originalQuals);
}
return rec;
}
/**

View File

@ -30,12 +30,10 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.PileupElement;

View File

@ -403,7 +403,7 @@ public class PairHMMIndelErrorModel {
for (PileupElement p: pileup) {
// > 1 when the read is a consensus read representing multiple independent observations
final boolean isReduced = ReadUtils.isReducedRead(p.getRead());
final boolean isReduced = p.isReducedRead();
readCounts[readIdx] = isReduced ? p.getReducedCount() : 1;
// check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)
@ -420,10 +420,6 @@ public class PairHMMIndelErrorModel {
if (read == null)
continue;
if ( isReduced ) {
read = ReadUtils.reducedReadWithReducedQuals(read);
}
if(ReadUtils.is454Read(read)) {
continue;
}

View File

@ -2,9 +2,12 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
/*
@ -46,6 +49,9 @@ import java.util.List;
*/
public class CycleCovariate implements StandardCovariate {
private final static EnumSet<NGSPlatform> DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS);
private final static EnumSet<NGSPlatform> FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT);
// Initialize any member variables using the command-line arguments passed to the walkers
public void initialize( final RecalibrationArgumentCollection RAC ) {
if( RAC.DEFAULT_PLATFORM != null ) {
@ -58,122 +64,6 @@ public class CycleCovariate implements StandardCovariate {
}
}
/*
// Used to pick out the covariate's value from attributes of the read
public final Comparable getValue( final SAMRecord read, final int offset ) {
int cycle = 1;
//-----------------------------
// ILLUMINA and SOLID
//-----------------------------
if( read.getReadGroup().getPlatform().equalsIgnoreCase( "ILLUMINA" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "SLX" ) || // Some bams have "illumina" and others have "SLX"
read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "ABI_SOLID" )) { // Some bams have "solid" and others have "ABI_SOLID"
cycle = offset + 1;
if( read.getReadNegativeStrandFlag() ) {
cycle = read.getReadLength() - offset;
}
}
//-----------------------------
// 454
//-----------------------------
else if( read.getReadGroup().getPlatform().contains( "454" ) ) { // Some bams have "LS454" and others have just "454"
final byte[] bases = read.getReadBases();
// BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
// For example, AAAAAAA was probably read in two flow cycles but here we count it as one
if( !read.getReadNegativeStrandFlag() ) { // Forward direction
int iii = 0;
while( iii <= offset )
{
while( iii <= offset && bases[iii] == (byte)'T' ) { iii++; }
while( iii <= offset && bases[iii] == (byte)'A' ) { iii++; }
while( iii <= offset && bases[iii] == (byte)'C' ) { iii++; }
while( iii <= offset && bases[iii] == (byte)'G' ) { iii++; }
if( iii <= offset ) { cycle++; }
if( iii <= offset && !BaseUtils.isRegularBase(bases[iii]) ) { iii++; }
}
} else { // Negative direction
int iii = bases.length-1;
while( iii >= offset )
{
while( iii >= offset && bases[iii] == (byte)'T' ) { iii--; }
while( iii >= offset && bases[iii] == (byte)'A' ) { iii--; }
while( iii >= offset && bases[iii] == (byte)'C' ) { iii--; }
while( iii >= offset && bases[iii] == (byte)'G' ) { iii--; }
if( iii >= offset ) { cycle++; }
if( iii >= offset && !BaseUtils.isRegularBase(bases[iii]) ) { iii--; }
}
}
}
//-----------------------------
// SOLID (unused), only to be used in conjunction with PrimerRoundCovariate
//-----------------------------
//else if( read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) ) {
// // The ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
// int pos = offset + 1;
// if( read.getReadNegativeStrandFlag() ) {
// pos = read.getReadLength() - offset;
// }
// cycle = pos / 5; // integer division
//}
//-----------------------------
// UNRECOGNIZED PLATFORM
//-----------------------------
else { // Platform is unrecognized so revert to the default platform but warn the user first
if( defaultPlatform != null) { // The user set a default platform
if( !warnedUserBadPlatform ) {
Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " +
"Defaulting to platform = " + defaultPlatform + "." );
}
warnedUserBadPlatform = true;
read.getReadGroup().setPlatform( defaultPlatform );
return getValue( read, offset ); // A recursive call
} else { // The user did not set a default platform
throw new StingException( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " +
"No default platform specified. Users must set the default platform using the --default_platform <String> argument." );
}
}
// Differentiate between first and second of pair.
// The sequencing machine cycle keeps incrementing for the second read in a pair. So it is possible for a read group
// to have an error affecting quality at a particular cycle on the first of pair which carries over to the second of pair.
// Therefore the cycle covariate must differentiate between first and second of pair reads.
// This effect can not be corrected by pulling out the first of pair and second of pair flags into a separate covariate because
// the current sequential model would consider the effects independently instead of jointly.
if( read.getReadPairedFlag() && read.getSecondOfPairFlag() ) {
cycle *= -1;
}
return cycle;
}
*/
// todo -- this should be put into a common place in the code base
private static List<String> ILLUMINA_NAMES = Arrays.asList("ILLUMINA", "SLX", "SOLEXA");
private static List<String> SOLID_NAMES = Arrays.asList("SOLID");
private static List<String> LS454_NAMES = Arrays.asList("454");
private static List<String> COMPLETE_GENOMICS_NAMES = Arrays.asList("COMPLETE");
private static List<String> PACBIO_NAMES = Arrays.asList("PACBIO");
private static List<String> ION_TORRENT_NAMES = Arrays.asList("IONTORRENT");
private static boolean isPlatform(SAMRecord read, List<String> names) {
String pl = read.getReadGroup().getPlatform().toUpperCase();
for ( String name : names )
if ( pl.contains( name ) )
return true;
return false;
}
// Used to pick out the covariate's value from attributes of the read
public void getValues(SAMRecord read, Comparable[] comparable) {
@ -181,7 +71,8 @@ public class CycleCovariate implements StandardCovariate {
// Illumina, Solid, PacBio, and Complete Genomics
//-----------------------------
if( isPlatform(read, ILLUMINA_NAMES) || isPlatform(read, SOLID_NAMES) || isPlatform(read, PACBIO_NAMES) || isPlatform(read, COMPLETE_GENOMICS_NAMES) ) {
final NGSPlatform ngsPlatform = ((GATKSAMRecord)read).getNGSPlatform();
if( DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform) ) {
final int init;
final int increment;
if( !read.getReadNegativeStrandFlag() ) {
@ -227,8 +118,7 @@ public class CycleCovariate implements StandardCovariate {
//-----------------------------
// 454 and Ion Torrent
//-----------------------------
else if ( isPlatform(read, LS454_NAMES) || isPlatform(read, ION_TORRENT_NAMES)) { // Some bams have "LS454" and others have just "454"
else if( FLOW_CYCLE_PLATFORMS.contains(ngsPlatform) ) {
final int readLength = read.getReadLength();
final byte[] bases = read.getReadBases();
@ -273,8 +163,6 @@ public class CycleCovariate implements StandardCovariate {
else {
throw new IllegalStateException("This method hasn't been implemented yet for " + read.getReadGroup().getPlatform());
}
}
// Used to get the covariate's value from input csv file in TableRecalibrationWalker

View File

@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.collections.NestedHashMap;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.ArrayList;
@ -228,8 +229,7 @@ public class RecalDataManager {
* @param RAC The list of shared command line arguments
*/
public static void parseSAMRecord( final SAMRecord read, final RecalibrationArgumentCollection RAC ) {
SAMReadGroupRecord readGroup = read.getReadGroup();
GATKSAMReadGroupRecord readGroup = ((GATKSAMRecord)read).getReadGroup();
// If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments
if( readGroup == null ) {
@ -241,7 +241,7 @@ public class RecalDataManager {
warnUserNullReadGroup = true;
}
// There is no readGroup so defaulting to these values
readGroup = new SAMReadGroupRecord( RAC.DEFAULT_READ_GROUP );
readGroup = new GATKSAMReadGroupRecord( RAC.DEFAULT_READ_GROUP );
readGroup.setPlatform( RAC.DEFAULT_PLATFORM );
((GATKSAMRecord)read).setReadGroup( readGroup );
} else {
@ -251,7 +251,7 @@ public class RecalDataManager {
if( RAC.FORCE_READ_GROUP != null && !readGroup.getReadGroupId().equals(RAC.FORCE_READ_GROUP) ) { // Collapse all the read groups into a single common String provided by the user
final String oldPlatform = readGroup.getPlatform();
readGroup = new SAMReadGroupRecord( RAC.FORCE_READ_GROUP );
readGroup = new GATKSAMReadGroupRecord( RAC.FORCE_READ_GROUP );
readGroup.setPlatform( oldPlatform );
((GATKSAMRecord)read).setReadGroup( readGroup );
}

View File

@ -0,0 +1,108 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
/**
* A canonical, master list of the standard NGS platforms. These values
* can be obtained (efficiently) from a GATKSAMRecord object with the
* getNGSPlatform method.
*
* @author Mark DePristo
* @since 2011
*/
public enum NGSPlatform {
ILLUMINA("ILLUMINA", "SLX", "SOLEXA"),
SOLID("SOLID"),
LS454("454"),
COMPLETE_GENOMICS("COMPLETE"),
PACBIO("PACBIO"),
ION_TORRENT("IONTORRENT"),
UNKNOWN("UNKNOWN");
/**
* Array of the prefix names in a BAM file for each of the platforms.
*/
private final String[] BAM_PL_NAMES;
NGSPlatform(final String... BAM_PL_NAMES) {
for ( int i = 0; i < BAM_PL_NAMES.length; i++ )
BAM_PL_NAMES[i] = BAM_PL_NAMES[i].toUpperCase();
this.BAM_PL_NAMES = BAM_PL_NAMES;
}
/**
* Returns a representative PL string for this platform
* @return
*/
public final String getDefaultPlatform() {
return BAM_PL_NAMES[0];
}
/**
* Convenience constructor -- calculates the NGSPlatfrom from a SAMRecord.
* Note you should not use this function if you have a GATKSAMRecord -- use the
* accessor method instead.
*
* @param read
* @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match
*/
public static final NGSPlatform fromRead(SAMRecord read) {
return fromReadGroup(read.getReadGroup());
}
/**
* Returns the NGSPlatform corresponding to the PL tag in the read group
* @param rg
* @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match
*/
public static final NGSPlatform fromReadGroup(SAMReadGroupRecord rg) {
return fromReadGroupPL(rg.getPlatform());
}
/**
* Returns the NGSPlatform corresponding to the PL tag in the read group
* @param plFromRG -- the PL field (or equivalent) in a ReadGroup object
* @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match
*/
public static final NGSPlatform fromReadGroupPL(final String plFromRG) {
if ( plFromRG == null ) return UNKNOWN;
// todo -- algorithm could be implemented more efficiently, as the list of all
// todo -- names is known upfront, so a decision tree could be used to identify
// todo -- a prefix common to PL
final String pl = plFromRG.toUpperCase();
for ( final NGSPlatform ngsPlatform : NGSPlatform.values() ) {
for ( final String bamPLName : ngsPlatform.BAM_PL_NAMES ) {
if ( pl.contains(bamPLName) )
return ngsPlatform;
}
}
return UNKNOWN;
}
}

View File

@ -1,9 +1,10 @@
package org.broadinstitute.sting.utils.pileup;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.util.*;
/**
* An easy to access fragment-based pileup, which contains two separate pileups. The first
@ -13,31 +14,51 @@ import java.util.Map;
*
* Based on the original code by E. Banks
*
* TODO -- technically we could generalize this code to support a pseudo-duplicate marking
* TODO -- algorithm that could collect all duplicates into a single super pileup element
* Oct 21: note that the order of the oneReadPileup and twoReadPileups are not
* defined. The algorithms that produce these lists are in fact producing
* lists of Pileup elements *NOT* sorted by alignment start position of the underlying
* reads.
*
* User: depristo
* Date: 3/26/11
* Time: 10:09 PM
*/
public class FragmentPileup {
final Collection<PileupElement> oneReadPile;
final Collection<TwoReadPileupElement> twoReadPile = new ArrayList<TwoReadPileupElement>();
Collection<PileupElement> oneReadPile = null;
Collection<TwoReadPileupElement> twoReadPile = null;
protected enum FragmentMatchingAlgorithm {
ORIGINAL,
skipNonOverlapping,
}
/**
* Create a new Fragment-based pileup from the standard read-based pileup
* @param pileup
*/
public FragmentPileup(ReadBackedPileup pileup) {
Map<String, PileupElement> nameMap = new HashMap<String, PileupElement>();
skipNonOverlapping(pileup);
}
/** For performance testing only */
protected FragmentPileup(ReadBackedPileup pileup, FragmentMatchingAlgorithm algorithm) {
switch ( algorithm ) {
case ORIGINAL: oldSlowCalculation(pileup); break;
case skipNonOverlapping: skipNonOverlapping(pileup); break;
}
}
private final void oldSlowCalculation(final ReadBackedPileup pileup) {
final Map<String, PileupElement> nameMap = new HashMap<String, PileupElement>(pileup.size());
// build an initial map, grabbing all of the multi-read fragments
for ( PileupElement p : pileup ) {
String readName = p.getRead().getReadName();
for ( final PileupElement p : pileup ) {
final String readName = p.getRead().getReadName();
PileupElement pe1 = nameMap.get(readName);
final PileupElement pe1 = nameMap.get(readName);
if ( pe1 != null ) {
// assumes we have at most 2 reads per fragment
if ( twoReadPile == null ) twoReadPile = new ArrayList<TwoReadPileupElement>();
twoReadPile.add(new TwoReadPileupElement(pe1, p));
nameMap.remove(readName);
} else {
@ -45,17 +66,54 @@ public class FragmentPileup {
}
}
// now set the one Read pile to the values in the nameMap with only a single read
oneReadPile = nameMap.values();
}
private final void skipNonOverlapping(final ReadBackedPileup pileup) {
Map<String, PileupElement> nameMap = null;
// build an initial map, grabbing all of the multi-read fragments
for ( final PileupElement p : pileup ) {
final SAMRecord read = p.getRead();
final int mateStart = read.getMateAlignmentStart();
if ( mateStart == 0 || mateStart > read.getAlignmentEnd() ) {
// if we know that this read won't overlap its mate, or doesn't have one, jump out early
if ( oneReadPile == null ) oneReadPile = new ArrayList<PileupElement>(pileup.size()); // lazy init
oneReadPile.add(p);
} else {
// the read might overlap it's mate, or is the rightmost read of a pair
final String readName = p.getRead().getReadName();
final PileupElement pe1 = nameMap == null ? null : nameMap.get(readName);
if ( pe1 != null ) {
// assumes we have at most 2 reads per fragment
if ( twoReadPile == null ) twoReadPile = new ArrayList<TwoReadPileupElement>(); // lazy init
twoReadPile.add(new TwoReadPileupElement(pe1, p));
nameMap.remove(readName);
} else {
if ( nameMap == null ) nameMap = new HashMap<String, PileupElement>(pileup.size()); // lazy init
nameMap.put(readName, p);
}
}
}
// add all of the reads that are potentially overlapping but whose mate never showed
// up to the oneReadPile
if ( nameMap != null && ! nameMap.isEmpty() ) {
if ( oneReadPile == null )
oneReadPile = nameMap.values();
else
oneReadPile.addAll(nameMap.values());
}
}
/**
* Gets the pileup elements containing two reads, in no particular order
*
* @return
*/
public Collection<TwoReadPileupElement> getTwoReadPileup() {
return twoReadPile;
return twoReadPile == null ? Collections.<TwoReadPileupElement>emptyList() : twoReadPile;
}
/**
@ -64,7 +122,7 @@ public class FragmentPileup {
* @return
*/
public Collection<PileupElement> getOneReadPileup() {
return oneReadPile;
return oneReadPile == null ? Collections.<PileupElement>emptyList() : oneReadPile;
}
/**

View File

@ -4,6 +4,7 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
/**
@ -12,7 +13,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
* Date: Apr 14, 2009
* Time: 8:54:05 AM
*/
public class PileupElement {
public class PileupElement implements Comparable<PileupElement> {
public static final byte DELETION_BASE = BaseUtils.D;
public static final byte DELETION_QUAL = (byte) 16;
public static final byte A_FOLLOWED_BY_INSERTION_BASE = (byte) 87;
@ -75,6 +76,20 @@ public class PileupElement {
return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset];
}
@Override
public int compareTo(final PileupElement pileupElement) {
if ( offset < pileupElement.offset )
return -1;
else if ( offset > pileupElement.offset )
return 1;
else if ( read.getAlignmentStart() < pileupElement.read.getAlignmentStart() )
return -1;
else if ( read.getAlignmentStart() > pileupElement.read.getAlignmentStart() )
return 1;
else
return 0;
}
// --------------------------------------------------------------------------
//
// Reduced read accessors
@ -82,16 +97,16 @@ public class PileupElement {
// --------------------------------------------------------------------------
public boolean isReducedRead() {
return ReadUtils.isReducedRead(getRead());
return ((GATKSAMRecord)read).isReducedRead();
}
public int getReducedCount() {
if ( ! isReducedRead() ) throw new IllegalArgumentException("Cannot get reduced count for non-reduced read " + getRead().getReadName());
return ReadUtils.getReducedCount(getRead(), offset);
return ((GATKSAMRecord)read).getReducedCount(offset);
}
public byte getReducedQual() {
if ( ! isReducedRead() ) throw new IllegalArgumentException("Cannot get reduced qual for non-reduced read " + getRead().getReadName());
return ReadUtils.getReducedQual(getRead(), offset);
return getQual();
}
}

View File

@ -2,11 +2,15 @@ package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.*;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.*;
/**
* @author aaron
@ -29,7 +33,7 @@ public class ArtificialSAMUtils {
File outFile = new File(filename);
SAMFileWriter out = new SAMFileWriterFactory().makeBAMWriter(header, true, outFile);
for (int x = startingChromosome; x < startingChromosome + numberOfChromosomes; x++) {
for (int readNumber = 1; readNumber < readsPerChomosome; readNumber++) {
out.addAlignment(createArtificialRead(header, "Read_" + readNumber, x - startingChromosome, readNumber, DEFAULT_READ_LENGTH));
@ -134,6 +138,7 @@ public class ArtificialSAMUtils {
/**
* Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read
*
*
* @param header the SAM header to associate the read with
* @param name the name of the read
* @param refIndex the reference index, i.e. what chromosome to associate it with
@ -142,11 +147,11 @@ public class ArtificialSAMUtils {
*
* @return the artificial read
*/
public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, int length ) {
public static GATKSAMRecord createArtificialRead(SAMFileHeader header, String name, int refIndex, int alignmentStart, int length) {
if( (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart != SAMRecord.NO_ALIGNMENT_START) ||
(refIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart == SAMRecord.NO_ALIGNMENT_START) )
(refIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart == SAMRecord.NO_ALIGNMENT_START) )
throw new ReviewedStingException("Invalid alignment start for artificial read, start = " + alignmentStart);
SAMRecord record = new SAMRecord(header);
GATKSAMRecord record = new GATKSAMRecord(header);
record.setReadName(name);
record.setReferenceIndex(refIndex);
record.setAlignmentStart(alignmentStart);
@ -166,6 +171,7 @@ public class ArtificialSAMUtils {
if (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
record.setReadUnmappedFlag(true);
}
return record;
}
@ -181,19 +187,51 @@ public class ArtificialSAMUtils {
*
* @return the artificial read
*/
public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual ) {
public static GATKSAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual ) {
if (bases.length != qual.length) {
throw new ReviewedStingException("Passed in read string is different length then the quality array");
}
SAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length);
GATKSAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length);
rec.setReadBases(bases);
rec.setBaseQualities(qual);
if (refIndex == -1) {
rec.setReadUnmappedFlag(true);
}
return rec;
}
public final static List<SAMRecord> createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
SAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen);
SAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen);
left.setReadPairedFlag(true);
right.setReadPairedFlag(true);
left.setProperPairFlag(true);
right.setProperPairFlag(true);
left.setFirstOfPairFlag(leftIsFirst);
right.setFirstOfPairFlag(! leftIsFirst);
left.setReadNegativeStrandFlag(leftIsNegative);
left.setMateNegativeStrandFlag(!leftIsNegative);
right.setReadNegativeStrandFlag(!leftIsNegative);
right.setMateNegativeStrandFlag(leftIsNegative);
left.setMateAlignmentStart(right.getAlignmentStart());
right.setMateAlignmentStart(left.getAlignmentStart());
left.setMateReferenceIndex(0);
right.setMateReferenceIndex(0);
int isize = rightStart + readLen - leftStart;
left.setInferredInsertSize(isize);
right.setInferredInsertSize(-isize);
return Arrays.asList(left, right);
}
/**
* create an iterator containing the specified read piles
*
@ -255,4 +293,52 @@ public class ArtificialSAMUtils {
return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header);
}
private final static int ranIntInclusive(Random ran, int start, int stop) {
final int range = stop - start;
return ran.nextInt(range) + start;
}
/**
* Creates a read backed pileup containing up to pileupSize reads at refID 0 from header at loc with
* reads created that have readLen bases. Pairs are sampled from a gaussian distribution with mean insert
* size of insertSize and variation of insertSize / 10. The first read will be in the pileup, and the second
* may be, depending on where this sampled insertSize puts it.
* @param header
* @param loc
* @param readLen
* @param insertSize
* @param pileupSize
* @return
*/
public static ReadBackedPileup createReadBackedPileup(final SAMFileHeader header, final GenomeLoc loc, final int readLen, final int insertSize, final int pileupSize) {
final Random ran = new Random();
final boolean leftIsFirst = true;
final boolean leftIsNegative = false;
final int insertSizeVariation = insertSize / 10;
final int pos = loc.getStart();
final List<PileupElement> pileupElements = new ArrayList<PileupElement>();
for ( int i = 0; i < pileupSize / 2; i++ ) {
final String readName = "read" + i;
final int leftStart = ranIntInclusive(ran, 1, pos);
final int fragmentSize = (int)(ran.nextGaussian() * insertSizeVariation + insertSize);
final int rightStart = leftStart + fragmentSize - readLen;
if ( rightStart <= 0 ) continue;
List<SAMRecord> pair = createPair(header, readName, readLen, leftStart, rightStart, leftIsFirst, leftIsNegative);
final SAMRecord left = pair.get(0);
final SAMRecord right = pair.get(1);
pileupElements.add(new PileupElement(left, pos - leftStart));
if ( pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd() ) {
pileupElements.add(new PileupElement(right, pos - rightStart));
}
}
Collections.sort(pileupElements);
return new ReadBackedPileupImpl(loc, pileupElements);
}
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.utils.NGSPlatform;
/**
* @author ebanks
@ -15,16 +16,28 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord {
// the SAMReadGroupRecord data we're caching
private String mSample = null;
private String mPlatform = null;
private NGSPlatform mNGSPlatform = null;
// because some values can be null, we don't want to duplicate effort
private boolean retrievedSample = false;
private boolean retrievedPlatform = false;
private boolean retrievedNGSPlatform = false;
public GATKSAMReadGroupRecord(final String id) {
super(id);
}
public GATKSAMReadGroupRecord(SAMReadGroupRecord record) {
super(record.getReadGroupId(), record);
}
public GATKSAMReadGroupRecord(SAMReadGroupRecord record, NGSPlatform pl) {
super(record.getReadGroupId(), record);
setPlatform(pl.getDefaultPlatform());
mNGSPlatform = pl;
retrievedPlatform = retrievedNGSPlatform = true;
}
///////////////////////////////////////////////////////////////////////////////
// *** The following methods are overloaded to cache the appropriate data ***//
///////////////////////////////////////////////////////////////////////////////
@ -55,5 +68,15 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord {
super.setPlatform(s);
mPlatform = s;
retrievedPlatform = true;
retrievedNGSPlatform = false; // recalculate the NGSPlatform
}
public NGSPlatform getNGSPlatform() {
if ( ! retrievedNGSPlatform ) {
mNGSPlatform = NGSPlatform.fromReadGroupPL(getPlatform());
retrievedNGSPlatform = true;
}
return mNGSPlatform;
}
}

View File

@ -1,49 +1,56 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.NGSPlatform;
import java.lang.reflect.Method;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* @author ebanks
* @author ebanks, depristo
* GATKSAMRecord
*
* this class extends the samtools SAMRecord class and caches important
* this class extends the samtools BAMRecord class (and SAMRecord) and caches important
* (and oft-accessed) data that's not already cached by the SAMRecord class
*
* IMPORTANT NOTE: Because ReadGroups are not set through the SAMRecord,
* if they are ever modified externally then one must also invoke the
* setReadGroup() method here to ensure that the cache is kept up-to-date.
*
* 13 Oct 2010 - mhanna - this class is fundamentally flawed: it uses a decorator
* pattern to wrap a heavyweight object, which can lead
* to heinous side effects if the wrapping is not carefully
* done. Hopefully SAMRecord will become an interface and
* this will eventually be fixed.
*/
public class GATKSAMRecord extends SAMRecord {
// the underlying SAMRecord which we are wrapping
private final SAMRecord mRecord;
public class GATKSAMRecord extends BAMRecord {
// the SAMRecord data we're caching
private String mReadString = null;
private SAMReadGroupRecord mReadGroup = null;
private boolean mNegativeStrandFlag;
private boolean mUnmappedFlag;
private Boolean mSecondOfPairFlag = null;
private GATKSAMReadGroupRecord mReadGroup = null;
private byte[] reducedReadCounts = null;
// because some values can be null, we don't want to duplicate effort
private boolean retrievedReadGroup = false;
/** A private cache for the reduced read quality. Null indicates the value hasn't be fetched yet or isn't available */
private boolean lookedUpReducedReadQuality = false;
private Integer reducedReadQuality;
private boolean retrievedReduceReadCounts = false;
// These temporary attributes were added here to make life easier for
// certain algorithms by providing a way to label or attach arbitrary data to
@ -51,101 +58,112 @@ public class GATKSAMRecord extends SAMRecord {
// These attributes exist in memory only, and are never written to disk.
private Map<Object, Object> temporaryAttributes;
public GATKSAMRecord(SAMRecord record, boolean useOriginalBaseQualities, byte defaultBaseQualities) {
super(null); // it doesn't matter - this isn't used
if ( record == null )
throw new IllegalArgumentException("The SAMRecord argument cannot be null");
mRecord = record;
/**
* HACK TO CREATE GATKSAMRECORD WITH ONLY A HEADER FOR TESTING PURPOSES ONLY
* @param header
*/
public GATKSAMRecord(final SAMFileHeader header) {
this(new SAMRecord(header));
}
mNegativeStrandFlag = mRecord.getReadNegativeStrandFlag();
mUnmappedFlag = mRecord.getReadUnmappedFlag();
/**
* HACK TO CREATE GATKSAMRECORD BASED ONLY A SAMRECORD FOR TESTING PURPOSES ONLY
* @param read
*/
public GATKSAMRecord(final SAMRecord read) {
super(read.getHeader(), read.getMateReferenceIndex(),
read.getAlignmentStart(),
read.getReadName() != null ? (short)read.getReadNameLength() : 0,
(short)read.getMappingQuality(),
0,
read.getCigarLength(),
read.getFlags(),
read.getReadLength(),
read.getMateReferenceIndex(),
read.getMateAlignmentStart(),
read.getInferredInsertSize(),
new byte[]{});
super.clearAttributes();
}
// because attribute methods are declared to be final (and we can't overload them),
// we need to actually set all of the attributes here
List<SAMTagAndValue> attributes = record.getAttributes();
for ( SAMTagAndValue attribute : attributes )
setAttribute(attribute.tag, attribute.value);
// if we are using default quals, check if we need them, and add if necessary.
// 1. we need if reads are lacking or have incomplete quality scores
// 2. we add if defaultBaseQualities has a positive value
if (defaultBaseQualities >= 0) {
byte reads [] = record.getReadBases();
byte quals [] = record.getBaseQualities();
if (quals == null || quals.length < reads.length) {
byte new_quals [] = new byte [reads.length];
for (int i=0; i<reads.length; i++)
new_quals[i] = defaultBaseQualities;
record.setBaseQualities(new_quals);
}
}
// if we are using original quals, set them now if they are present in the record
if ( useOriginalBaseQualities ) {
byte[] originalQuals = mRecord.getOriginalBaseQualities();
if ( originalQuals != null )
mRecord.setBaseQualities(originalQuals);
}
public GATKSAMRecord(final SAMFileHeader header,
final int referenceSequenceIndex,
final int alignmentStart,
final short readNameLength,
final short mappingQuality,
final int indexingBin,
final int cigarLen,
final int flags,
final int readLen,
final int mateReferenceSequenceIndex,
final int mateAlignmentStart,
final int insertSize,
final byte[] variableLengthBlock) {
super(header, referenceSequenceIndex, alignmentStart, readNameLength, mappingQuality, indexingBin, cigarLen,
flags, readLen, mateReferenceSequenceIndex, mateAlignmentStart, insertSize, variableLengthBlock);
}
///////////////////////////////////////////////////////////////////////////////
// *** The following methods are overloaded to cache the appropriate data ***//
///////////////////////////////////////////////////////////////////////////////
@Override
public String getReadString() {
if ( mReadString == null )
mReadString = mRecord.getReadString();
mReadString = super.getReadString();
return mReadString;
}
@Override
public void setReadString(String s) {
mRecord.setReadString(s);
super.setReadString(s);
mReadString = s;
}
public SAMReadGroupRecord getReadGroup() {
@Override
public GATKSAMReadGroupRecord getReadGroup() {
if ( !retrievedReadGroup ) {
SAMReadGroupRecord tempReadGroup = mRecord.getReadGroup();
mReadGroup = (tempReadGroup == null ? tempReadGroup : new GATKSAMReadGroupRecord(tempReadGroup));
SAMReadGroupRecord tempReadGroup = super.getReadGroup();
mReadGroup = (tempReadGroup == null ? null : new GATKSAMReadGroupRecord(tempReadGroup));
retrievedReadGroup = true;
}
return mReadGroup;
}
public void setReadGroup(SAMReadGroupRecord record) {
mReadGroup = record;
/**
* Efficient caching accessor that returns the GATK NGSPlatform of this read
* @return
*/
public NGSPlatform getNGSPlatform() {
return getReadGroup().getNGSPlatform();
}
public boolean getReadUnmappedFlag() {
return mUnmappedFlag;
public void setReadGroup( final GATKSAMReadGroupRecord readGroup ) {
mReadGroup = readGroup;
retrievedReadGroup = true;
}
public void setReadUnmappedFlag(boolean b) {
mRecord.setReadUnmappedFlag(b);
mUnmappedFlag = b;
}
//
//
// Reduced read functions
//
//
public boolean getReadNegativeStrandFlag() {
return mNegativeStrandFlag;
}
public void setReadNegativeStrandFlag(boolean b) {
mRecord.setReadNegativeStrandFlag(b);
mNegativeStrandFlag = b;
}
public boolean getSecondOfPairFlag() {
if( mSecondOfPairFlag == null ) {
//not done in constructor because this method can't be called for
//all SAMRecords.
mSecondOfPairFlag = mRecord.getSecondOfPairFlag();
public byte[] getReducedReadCounts() {
if ( ! retrievedReduceReadCounts ) {
reducedReadCounts = getByteArrayAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG);
retrievedReduceReadCounts = true;
}
return mSecondOfPairFlag;
return reducedReadCounts;
}
public void setSecondOfPairFlag(boolean b) {
mRecord.setSecondOfPairFlag(b);
mSecondOfPairFlag = b;
public boolean isReducedRead() {
return getReducedReadCounts() != null;
}
public final byte getReducedCount(final int i) {
return getReducedReadCounts()[i];
}
/**
@ -201,269 +219,18 @@ public class GATKSAMRecord extends SAMRecord {
return null;
}
/**
* Removes the attribute that has the given key.
*
* Temporary attributes provide a way to label or attach arbitrary data to
* individual GATKSAMRecords. These attributes exist in memory only,
* and are never written to disk.
*
* @param key key
* @return The value that was associated with this key, or null.
*/
public Object removeTemporaryAttribute(Object key) {
if(temporaryAttributes != null) {
return temporaryAttributes.remove(key);
}
return null;
@Override
public int hashCode() {
return super.hashCode();
}
/////////////////////////////////////////////////////////////////////////////////
// *** The following methods just call the appropriate method in the record ***//
/////////////////////////////////////////////////////////////////////////////////
public String getReadName() { return mRecord.getReadName(); }
public int getReadNameLength() { return mRecord.getReadNameLength(); }
public void setReadName(String s) { mRecord.setReadName(s); }
public byte[] getReadBases() { return mRecord.getReadBases(); }
public void setReadBases(byte[] bytes) { mRecord.setReadBases(bytes); }
public int getReadLength() { return mRecord.getReadLength(); }
public byte[] getBaseQualities() { return mRecord.getBaseQualities(); }
public void setBaseQualities(byte[] bytes) { mRecord.setBaseQualities(bytes); }
public String getBaseQualityString() { return mRecord.getBaseQualityString(); }
public void setBaseQualityString(String s) { mRecord.setBaseQualityString(s); }
public byte[] getOriginalBaseQualities() { return mRecord.getOriginalBaseQualities(); }
public void setOriginalBaseQualities(byte[] bytes) { mRecord.setOriginalBaseQualities(bytes); }
public String getReferenceName() { return mRecord.getReferenceName(); }
public void setReferenceName(String s) { mRecord.setReferenceName(s); }
public Integer getReferenceIndex() { return mRecord.getReferenceIndex(); }
public void setReferenceIndex(int i) { mRecord.setReferenceIndex(i); }
public String getMateReferenceName() { return mRecord.getMateReferenceName(); }
public void setMateReferenceName(String s) { mRecord.setMateReferenceName(s); }
public Integer getMateReferenceIndex() { return mRecord.getMateReferenceIndex(); }
public void setMateReferenceIndex(int i) { mRecord.setMateReferenceIndex(i); }
public int getAlignmentStart() { return mRecord.getAlignmentStart(); }
public void setAlignmentStart(int i) { mRecord.setAlignmentStart(i); }
public int getAlignmentEnd() { return mRecord.getAlignmentEnd(); }
public int getUnclippedStart() { return mRecord.getUnclippedStart(); }
public int getUnclippedEnd() { return mRecord.getUnclippedEnd(); }
public void setAlignmentEnd(int i) { mRecord.setAlignmentEnd(i); }
public int getMateAlignmentStart() { return mRecord.getMateAlignmentStart(); }
public void setMateAlignmentStart(int i) { mRecord.setMateAlignmentStart(i); }
public int getInferredInsertSize() { return mRecord.getInferredInsertSize(); }
public void setInferredInsertSize(int i) { mRecord.setInferredInsertSize(i); }
public int getMappingQuality() { return mRecord.getMappingQuality(); }
public void setMappingQuality(int i) { mRecord.setMappingQuality(i); }
public String getCigarString() { return mRecord.getCigarString(); }
public void setCigarString(String s) { mRecord.setCigarString(s); }
public Cigar getCigar() { return mRecord.getCigar(); }
public int getCigarLength() { return mRecord.getCigarLength(); }
public void setCigar(Cigar cigar) { mRecord.setCigar(cigar); }
public int getFlags() { return mRecord.getFlags(); }
public void setFlags(int i) { mRecord.setFlags(i); }
public boolean getReadPairedFlag() { return mRecord.getReadPairedFlag(); }
public boolean getProperPairFlag() { return mRecord.getProperPairFlag(); }
public boolean getMateUnmappedFlag() { return mRecord.getMateUnmappedFlag(); }
public boolean getMateNegativeStrandFlag() { return mRecord.getMateNegativeStrandFlag(); }
public boolean getFirstOfPairFlag() { return mRecord.getFirstOfPairFlag(); }
public boolean getNotPrimaryAlignmentFlag() { return mRecord.getNotPrimaryAlignmentFlag(); }
public boolean getReadFailsVendorQualityCheckFlag() { return mRecord.getReadFailsVendorQualityCheckFlag(); }
public boolean getDuplicateReadFlag() { return mRecord.getDuplicateReadFlag(); }
public void setReadPairedFlag(boolean b) { mRecord.setReadPairedFlag(b); }
public void setProperPairFlag(boolean b) { mRecord.setProperPairFlag(b); }
public void setMateUnmappedFlag(boolean b) { mRecord.setMateUnmappedFlag(b); }
public void setMateNegativeStrandFlag(boolean b) { mRecord.setMateNegativeStrandFlag(b); }
public void setFirstOfPairFlag(boolean b) { mRecord.setFirstOfPairFlag(b); }
public void setNotPrimaryAlignmentFlag(boolean b) { mRecord.setNotPrimaryAlignmentFlag(b); }
public void setReadFailsVendorQualityCheckFlag(boolean b) { mRecord.setReadFailsVendorQualityCheckFlag(b); }
public void setDuplicateReadFlag(boolean b) { mRecord.setDuplicateReadFlag(b); }
public net.sf.samtools.SAMFileReader.ValidationStringency getValidationStringency() { return mRecord.getValidationStringency(); }
public void setValidationStringency(net.sf.samtools.SAMFileReader.ValidationStringency validationStringency) { mRecord.setValidationStringency(validationStringency); }
public Object getAttribute(final String tag) { return mRecord.getAttribute(tag); }
public Integer getIntegerAttribute(final String tag) {
if ( tag == ReadUtils.REDUCED_READ_QUALITY_TAG ) {
if ( ! lookedUpReducedReadQuality ) {
lookedUpReducedReadQuality = true;
reducedReadQuality = mRecord.getIntegerAttribute(tag);
}
return reducedReadQuality;
} else {
return mRecord.getIntegerAttribute(tag);
}
}
public Short getShortAttribute(final String tag) { return mRecord.getShortAttribute(tag); }
public Byte getByteAttribute(final String tag) { return mRecord.getByteAttribute(tag); }
public String getStringAttribute(final String tag) { return mRecord.getStringAttribute(tag); }
public Character getCharacterAttribute(final String tag) { return mRecord.getCharacterAttribute(tag); }
public Float getFloatAttribute(final String tag) { return mRecord.getFloatAttribute(tag); }
public byte[] getByteArrayAttribute(final String tag) { return mRecord.getByteArrayAttribute(tag); }
protected Object getAttribute(final short tag) {
Object attribute;
try {
Method method = mRecord.getClass().getDeclaredMethod("getAttribute",Short.TYPE);
method.setAccessible(true);
attribute = method.invoke(mRecord,tag);
}
catch(Exception ex) {
throw new ReviewedStingException("Unable to invoke getAttribute method",ex);
}
return attribute;
}
public void setAttribute(final String tag, final Object value) { mRecord.setAttribute(tag,value); }
protected void setAttribute(final short tag, final Object value) {
try {
Method method = mRecord.getClass().getDeclaredMethod("setAttribute",Short.TYPE,Object.class);
method.setAccessible(true);
method.invoke(mRecord,tag,value);
}
catch(Exception ex) {
throw new ReviewedStingException("Unable to invoke setAttribute method",ex);
}
}
public void clearAttributes() { mRecord.clearAttributes(); }
protected void setAttributes(final SAMBinaryTagAndValue attributes) {
try {
Method method = mRecord.getClass().getDeclaredMethod("setAttributes",SAMBinaryTagAndValue.class);
method.setAccessible(true);
method.invoke(mRecord,attributes);
}
catch(Exception ex) {
throw new ReviewedStingException("Unable to invoke setAttributes method",ex);
}
}
protected SAMBinaryTagAndValue getBinaryAttributes() {
SAMBinaryTagAndValue binaryAttributes;
try {
Method method = mRecord.getClass().getDeclaredMethod("getBinaryAttributes");
method.setAccessible(true);
binaryAttributes = (SAMBinaryTagAndValue)method.invoke(mRecord);
}
catch(Exception ex) {
throw new ReviewedStingException("Unable to invoke getBinaryAttributes method",ex);
}
return binaryAttributes;
}
public List<SAMTagAndValue> getAttributes() { return mRecord.getAttributes(); }
public SAMFileHeader getHeader() { return mRecord.getHeader(); }
public void setHeader(SAMFileHeader samFileHeader) { mRecord.setHeader(samFileHeader); }
public byte[] getVariableBinaryRepresentation() { return mRecord.getVariableBinaryRepresentation(); }
public int getAttributesBinarySize() { return mRecord.getAttributesBinarySize(); }
public String format() { return mRecord.format(); }
public List<AlignmentBlock> getAlignmentBlocks() { return mRecord.getAlignmentBlocks(); }
public List<SAMValidationError> validateCigar(long l) { return mRecord.validateCigar(l); }
@Override
public boolean equals(Object o) {
if (this == o) return true;
// note -- this forbids a GATKSAMRecord being equal to its underlying SAMRecord
if (!(o instanceof GATKSAMRecord)) return false;
// note that we do not consider the GATKSAMRecord internal state at all
return mRecord.equals(((GATKSAMRecord)o).mRecord);
}
public int hashCode() { return mRecord.hashCode(); }
public List<SAMValidationError> isValid() { return mRecord.isValid(); }
public Object clone() throws CloneNotSupportedException { return mRecord.clone(); }
public String toString() { return mRecord.toString(); }
public SAMFileSource getFileSource() { return mRecord.getFileSource(); }
/**
* Sets a marker providing the source reader for this file and the position in the file from which the read originated.
* @param fileSource source of the given file.
*/
@Override
protected void setFileSource(final SAMFileSource fileSource) {
try {
Method method = SAMRecord.class.getDeclaredMethod("setFileSource",SAMFileSource.class);
method.setAccessible(true);
method.invoke(mRecord,fileSource);
}
catch(Exception ex) {
throw new ReviewedStingException("Unable to invoke setFileSource method",ex);
}
return super.equals(o);
}
}

View File

@ -0,0 +1,74 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMRecordFactory;
import net.sf.samtools.BAMRecord;
import org.broadinstitute.sting.utils.exceptions.UserException;
/**
* Factory interface implementation used to create GATKSamRecords
* from SAMFileReaders with SAM-JDK
*
* @author Mark DePristo
*/
public class GATKSamRecordFactory implements SAMRecordFactory {
/** Create a new SAMRecord to be filled in */
public SAMRecord createSAMRecord(SAMFileHeader header) {
throw new UserException.BadInput("The GATK now longer supports input SAM files");
}
/** Create a new BAM Record. */
public BAMRecord createBAMRecord(final SAMFileHeader header,
final int referenceSequenceIndex,
final int alignmentStart,
final short readNameLength,
final short mappingQuality,
final int indexingBin,
final int cigarLen,
final int flags,
final int readLen,
final int mateReferenceSequenceIndex,
final int mateAlignmentStart,
final int insertSize,
final byte[] variableLengthBlock) {
return new GATKSAMRecord(header,
referenceSequenceIndex,
alignmentStart,
readNameLength,
mappingQuality,
indexingBin,
cigarLen,
flags,
readLen,
mateReferenceSequenceIndex,
mateAlignmentStart,
insertSize,
variableLengthBlock);
}
}

View File

@ -52,38 +52,6 @@ public class ReadUtils {
// ----------------------------------------------------------------------------------------------------
public static final String REDUCED_READ_QUALITY_TAG = "RQ";
public static final String REDUCED_READ_CONSENSUS_COUNTS_TAG = "CC";
public final static byte[] getReducedReadQualityTagValue(final SAMRecord read) {
return read.getByteArrayAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG);
}
public final static boolean isReducedRead(final SAMRecord read) {
return getReducedReadQualityTagValue(read) != null;
}
public final static byte getReducedQual(final SAMRecord read, final int i) {
return read.getBaseQualities()[i];
}
public final static byte getReducedCount(final SAMRecord read, final int i) {
return getReducedReadQualityTagValue(read)[i];
}
public final static SAMRecord reducedReadWithReducedQuals(final SAMRecord read) {
if ( ! isReducedRead(read) ) throw new IllegalArgumentException("read must be a reduced read");
return read;
// try {
// SAMRecord newRead = (SAMRecord)read.clone();
// byte reducedQual = (byte)(int)getReducedReadQualityTagValue(read);
// byte[] newQuals = new byte[read.getBaseQualities().length];
// Arrays.fill(newQuals, reducedQual);
// newRead.setBaseQualities(newQuals);
// return newRead;
// } catch ( CloneNotSupportedException e ) {
// throw new ReviewedStingException("SAMRecord no longer supports clone", e);
// }
}
// ----------------------------------------------------------------------------------------------------
//

View File

@ -50,6 +50,7 @@ public abstract class BaseTest {
public static final String hg18Reference = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
public static final String hg19Reference = "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta";
public static final String b36KGReference = "/humgen/1kg/reference/human_b36_both.fasta";
//public static final String b37KGReference = "/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta";
public static final String b37KGReference = "/humgen/1kg/reference/human_g1k_v37.fasta";
public static final String GATKDataLocation = "/humgen/gsa-hpprojects/GATK/data/";
public static final String validationDataLocation = GATKDataLocation + "Validation_Data/";
@ -99,10 +100,10 @@ public abstract class BaseTest {
logger.setLevel(Level.WARN);
// find our file sources
if (!fileExist(hg18Reference) || !fileExist(hg19Reference) || !fileExist(b36KGReference)) {
logger.fatal("We can't locate the reference directories. Aborting!");
throw new RuntimeException("BaseTest setup failed: unable to locate the reference directories");
}
// if (!fileExist(hg18Reference) || !fileExist(hg19Reference) || !fileExist(b36KGReference)) {
// logger.fatal("We can't locate the reference directories. Aborting!");
// throw new RuntimeException("BaseTest setup failed: unable to locate the reference directories");
// }
}
/**

View File

@ -18,7 +18,7 @@ public class BAQIntegrationTest extends WalkerTest {
// --------------------------------------------------------------------------------------------------------------
@Test
public void testPrintReadsNoBAQ() {
WalkerTestSpec spec = new WalkerTestSpec( baseCommand +" -baq OFF", 1, Arrays.asList("902197bf77ed5a828d50e08771685928"));
WalkerTestSpec spec = new WalkerTestSpec( baseCommand +" -baq OFF", 1, Arrays.asList("d97340a2bba2c6320d1ebeb86024a27c"));
executeTest(String.format("testPrintReadsNoBAQ"), spec);
}

View File

@ -224,7 +224,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("0bece77ce6bc447438ef9b2921b2dc41"));
Arrays.asList("eeba568272f9b42d5450da75c7cc6d2d"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -252,7 +252,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("790b1a1d6ab79eee8c24812bb8ca6fae"));
Arrays.asList("19ff9bd3139480bdf79dcbf117cf2b24"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -262,7 +262,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("408d3aba4d094c067fc00a43992c2292"));
Arrays.asList("118918f2e9e56a3cfc5ccb2856d529c8"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
}
@ -272,7 +272,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("5e4e09354410b76fc0d822050d84132a"));
Arrays.asList("a20799237accd52c1b8c2ac096309c8f"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
}
@ -282,7 +282,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
Arrays.asList("c599eedbeb422713b8a28529e805e4ae"));
Arrays.asList("18ef8181157b4ac3eb8492f538467f92"));
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
}
@ -291,7 +291,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
Arrays.asList("37d908a682ac269f8f19dec939ff5b01"));
Arrays.asList("ad884e511a751b05e64db5314314365a"));
executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4);
}

View File

@ -5,6 +5,7 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
@ -12,7 +13,7 @@ import org.testng.annotations.Test;
public class ReadUtilsUnitTest extends BaseTest {
SAMRecord read, reducedRead;
GATKSAMRecord read, reducedRead;
final static String BASES = "ACTG";
final static String QUALS = "!+5?";
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40};
@ -47,13 +48,12 @@ public class ReadUtilsUnitTest extends BaseTest {
@Test
public void testReducedReads() {
Assert.assertFalse(ReadUtils.isReducedRead(read), "isReducedRead is false for normal read");
Assert.assertEquals(ReadUtils.getReducedReadQualityTagValue(read), null, "No reduced read tag in normal read");
Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read");
Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read");
Assert.assertTrue(ReadUtils.isReducedRead(reducedRead), "isReducedRead is true for reduced read");
Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read");
for ( int i = 0; i < reducedRead.getReadLength(); i++) {
Assert.assertEquals(ReadUtils.getReducedQual(reducedRead, i), read.getBaseQualities()[i], "Reduced read quality not set to the expected value at " + i);
Assert.assertEquals(ReadUtils.getReducedCount(reducedRead, i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i);
Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i);
}
}

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.Test;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
@ -28,7 +29,7 @@ public class ReservoirDownsamplerUnitTest {
@Test
public void testOneElementWithPoolSizeOne() {
List<SAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
downsampler.addAll(reads);
@ -40,7 +41,7 @@ public class ReservoirDownsamplerUnitTest {
@Test
public void testOneElementWithPoolSizeGreaterThanOne() {
List<SAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
downsampler.addAll(reads);

View File

@ -0,0 +1,84 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.pileup;
import com.google.caliper.Param;
import com.google.caliper.SimpleBenchmark;
import com.google.caliper.runner.CaliperMain;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import java.util.*;
/**
* Caliper microbenchmark of fragment pileup
*/
public class FragmentPileupBenchmark extends SimpleBenchmark {
List<ReadBackedPileup> pileups;
@Param({"0", "4", "30", "150", "1000"})
int pileupSize; // set automatically by framework
@Param({"200", "400"})
int insertSize; // set automatically by framework
@Override protected void setUp() {
final int nPileupsToGenerate = 100;
pileups = new ArrayList<ReadBackedPileup>(nPileupsToGenerate);
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
GenomeLocParser genomeLocParser;
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 50);
final int readLen = 100;
for ( int pileupN = 0; pileupN < nPileupsToGenerate; pileupN++ ) {
ReadBackedPileup rbp = ArtificialSAMUtils.createReadBackedPileup(header, loc, readLen, insertSize, pileupSize);
pileups.add(rbp);
}
}
private void run(int rep, FragmentPileup.FragmentMatchingAlgorithm algorithm) {
int nFrags = 0;
for ( int i = 0; i < rep; i++ ) {
for ( ReadBackedPileup rbp : pileups )
nFrags += new FragmentPileup(rbp, algorithm).getTwoReadPileup().size();
}
}
public void timeOriginal(int rep) {
run(rep, FragmentPileup.FragmentMatchingAlgorithm.ORIGINAL);
}
public void timeSkipNonOverlapping(int rep) {
run(rep, FragmentPileup.FragmentMatchingAlgorithm.skipNonOverlapping);
}
public static void main(String[] args) {
CaliperMain.main(FragmentPileupBenchmark.class, args);
}
}

View File

@ -0,0 +1,126 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.pileup;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
/**
* Test routines for read-backed pileup.
*/
public class FragmentPileupUnitTest extends BaseTest {
private static SAMFileHeader header;
private class FragmentPileupTest extends TestDataProvider {
List<TestState> states = new ArrayList<TestState>();
private FragmentPileupTest(String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
super(FragmentPileupTest.class, String.format("%s-leftIsFirst:%b-leftIsNegative:%b", name, leftIsFirst, leftIsNegative));
List<SAMRecord> pair = ArtificialSAMUtils.createPair(header, "readpair", readLen, leftStart, rightStart, leftIsFirst, leftIsNegative);
SAMRecord left = pair.get(0);
SAMRecord right = pair.get(1);
for ( int pos = leftStart; pos < rightStart + readLen; pos++) {
boolean posCoveredByLeft = pos >= left.getAlignmentStart() && pos <= left.getAlignmentEnd();
boolean posCoveredByRight = pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd();
if ( posCoveredByLeft || posCoveredByRight ) {
List<SAMRecord> reads = new ArrayList<SAMRecord>();
List<Integer> offsets = new ArrayList<Integer>();
if ( posCoveredByLeft ) {
reads.add(left);
offsets.add(pos - left.getAlignmentStart());
}
if ( posCoveredByRight ) {
reads.add(right);
offsets.add(pos - right.getAlignmentStart());
}
boolean shouldBeFragment = posCoveredByLeft && posCoveredByRight;
ReadBackedPileup pileup = new ReadBackedPileupImpl(null, reads, offsets);
TestState testState = new TestState(shouldBeFragment, pileup);
states.add(testState);
}
}
}
}
private class TestState {
boolean shouldBeFragment;
ReadBackedPileup pileup;
private TestState(final boolean shouldBeFragment, final ReadBackedPileup pileup) {
this.shouldBeFragment = shouldBeFragment;
this.pileup = pileup;
}
}
@DataProvider(name = "fragmentPileupTest")
public Object[][] createTests() {
for ( boolean leftIsFirst : Arrays.asList(true, false) ) {
for ( boolean leftIsNegative : Arrays.asList(true, false) ) {
// Overlapping pair
// ----> [first]
// <--- [second]
new FragmentPileupTest("overlapping-pair", 10, 1, 5, leftIsFirst, leftIsNegative);
// Non-overlapping pair
// ---->
// <----
new FragmentPileupTest("nonoverlapping-pair", 10, 1, 15, leftIsFirst, leftIsNegative);
}
}
return FragmentPileupTest.getTests(FragmentPileupTest.class);
}
@Test(enabled = true, dataProvider = "fragmentPileupTest")
public void testMe(FragmentPileupTest test) {
for ( TestState testState : test.states ) {
ReadBackedPileup rbp = testState.pileup;
FragmentPileup fp = new FragmentPileup(rbp);
Assert.assertEquals(fp.getTwoReadPileup().size(), testState.shouldBeFragment ? 1 : 0);
Assert.assertEquals(fp.getOneReadPileup().size(), testState.shouldBeFragment ? 0 : 1);
}
}
@BeforeTest
public void setup() {
header = ArtificialSAMUtils.createArtificialSamHeader(1,1,1000);
}
}