Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
65c614fb4b
|
|
@ -215,7 +215,7 @@
|
||||||
|
|
||||||
<target name="git.describe">
|
<target name="git.describe">
|
||||||
<exec executable="git" outputproperty="git.describe.output" resultproperty="git.describe.exit.value" failonerror="false">
|
<exec executable="git" outputproperty="git.describe.output" resultproperty="git.describe.exit.value" failonerror="false">
|
||||||
<arg line="describe" />
|
<arg line="describe --long" />
|
||||||
</exec>
|
</exec>
|
||||||
<condition property="git.describe.succeeded">
|
<condition property="git.describe.succeeded">
|
||||||
<equals arg1="${git.describe.exit.value}" arg2="0" />
|
<equals arg1="${git.describe.exit.value}" arg2="0" />
|
||||||
|
|
|
||||||
2
ivy.xml
2
ivy.xml
|
|
@ -76,7 +76,7 @@
|
||||||
<dependency org="org.apache.poi" name="poi-ooxml" rev="3.8-beta3" />
|
<dependency org="org.apache.poi" name="poi-ooxml" rev="3.8-beta3" />
|
||||||
|
|
||||||
<!-- snpEff annotator for pipelines -->
|
<!-- snpEff annotator for pipelines -->
|
||||||
<dependency org="net.sf.snpeff" name="snpeff" rev="2.0.4rc3" />
|
<dependency org="net.sf.snpeff" name="snpeff" rev="2.0.5" />
|
||||||
|
|
||||||
<!-- Exclude dependencies on sun libraries where the downloads aren't available but included in the jvm. -->
|
<!-- Exclude dependencies on sun libraries where the downloads aren't available but included in the jvm. -->
|
||||||
<exclude org="javax.servlet" />
|
<exclude org="javax.servlet" />
|
||||||
|
|
|
||||||
|
|
@ -443,7 +443,7 @@ public class GenomeAnalysisEngine {
|
||||||
if(!readsDataSource.hasIndex() && intervals != null && !argCollection.allowIntervalsWithUnindexedBAM)
|
if(!readsDataSource.hasIndex() && intervals != null && !argCollection.allowIntervalsWithUnindexedBAM)
|
||||||
throw new UserException.CommandLineException("Cannot perform interval processing when reads are present but no index is available.");
|
throw new UserException.CommandLineException("Cannot perform interval processing when reads are present but no index is available.");
|
||||||
|
|
||||||
if(walker instanceof LocusWalker) {
|
if(walker instanceof LocusWalker || walker instanceof ActiveRegionWalker) {
|
||||||
if (readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
|
if (readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
|
||||||
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Locus walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately.");
|
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Locus walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately.");
|
||||||
if(intervals == null)
|
if(intervals == null)
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,6 @@ import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
|
||||||
import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;
|
import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||||
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
|
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
|
||||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
|
|
@ -55,7 +56,6 @@ public class LinearMicroScheduler extends MicroScheduler {
|
||||||
|
|
||||||
traversalEngine.startTimersIfNecessary();
|
traversalEngine.startTimersIfNecessary();
|
||||||
if(shard.getShardType() == Shard.ShardType.LOCUS) {
|
if(shard.getShardType() == Shard.ShardType.LOCUS) {
|
||||||
LocusWalker lWalker = (LocusWalker)walker;
|
|
||||||
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
|
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
|
||||||
getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));
|
getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));
|
||||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||||
|
|
@ -77,6 +77,12 @@ public class LinearMicroScheduler extends MicroScheduler {
|
||||||
done = walker.isDone();
|
done = walker.isDone();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Special function call to empty out the work queue. Ugly for now but will be cleaned up when we push this functionality more into the engine
|
||||||
|
if( traversalEngine instanceof TraverseActiveRegions ) {
|
||||||
|
final Object result = ((TraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
|
||||||
|
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
|
||||||
|
}
|
||||||
|
|
||||||
Object result = accumulator.finishTraversal();
|
Object result = accumulator.finishTraversal();
|
||||||
|
|
||||||
printOnTraversalDone(result);
|
printOnTraversalDone(result);
|
||||||
|
|
|
||||||
|
|
@ -128,6 +128,8 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
||||||
traversalEngine = new TraverseDuplicates();
|
traversalEngine = new TraverseDuplicates();
|
||||||
} else if (walker instanceof ReadPairWalker) {
|
} else if (walker instanceof ReadPairWalker) {
|
||||||
traversalEngine = new TraverseReadPairs();
|
traversalEngine = new TraverseReadPairs();
|
||||||
|
} else if (walker instanceof ActiveRegionWalker) {
|
||||||
|
traversalEngine = new TraverseActiveRegions();
|
||||||
} else {
|
} else {
|
||||||
throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type.");
|
throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type.");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,213 @@
|
||||||
|
package org.broadinstitute.sting.gatk.traversals;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.Queue;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: rpoplin
|
||||||
|
* Date: 12/9/11
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegionWalker<M,T>,LocusShardDataProvider> {
|
||||||
|
/**
|
||||||
|
* our log, which we want to capture anything from this class
|
||||||
|
*/
|
||||||
|
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||||
|
|
||||||
|
private final Queue<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
|
||||||
|
private final LinkedHashSet<SAMRecord> myReads = new LinkedHashSet<SAMRecord>();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String getTraversalType() {
|
||||||
|
return "active regions";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public T traverse( final ActiveRegionWalker<M,T> walker,
|
||||||
|
final LocusShardDataProvider dataProvider,
|
||||||
|
T sum) {
|
||||||
|
logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));
|
||||||
|
|
||||||
|
LocusView locusView = getLocusView( walker, dataProvider );
|
||||||
|
|
||||||
|
int minStart = Integer.MAX_VALUE;
|
||||||
|
final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||||
|
|
||||||
|
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
|
||||||
|
|
||||||
|
final ArrayList<ActiveRegion> isActiveList = new ArrayList<ActiveRegion>();
|
||||||
|
|
||||||
|
//ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
||||||
|
ReferenceOrderedView referenceOrderedDataView = null;
|
||||||
|
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
|
||||||
|
referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
|
||||||
|
else
|
||||||
|
referenceOrderedDataView = (RodLocusView)locusView;
|
||||||
|
|
||||||
|
// We keep processing while the next reference location is within the interval
|
||||||
|
while( locusView.hasNext() ) {
|
||||||
|
final AlignmentContext locus = locusView.next();
|
||||||
|
GenomeLoc location = locus.getLocation();
|
||||||
|
|
||||||
|
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||||
|
|
||||||
|
if ( locus.hasExtendedEventPileup() ) {
|
||||||
|
// if the alignment context we received holds an "extended" pileup (i.e. pileup of insertions/deletions
|
||||||
|
// associated with the current site), we need to update the location. The updated location still starts
|
||||||
|
// at the current genomic position, but it has to span the length of the longest deletion (if any).
|
||||||
|
location = engine.getGenomeLocParser().setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength());
|
||||||
|
|
||||||
|
// it is possible that the new expanded location spans the current shard boundary; the next method ensures
|
||||||
|
// that when it is the case, the reference sequence held by the ReferenceView will be reloaded so that
|
||||||
|
// the view has all the bases we are gonna need. If the location fits within the current view bounds,
|
||||||
|
// the next call will not do anything to the view:
|
||||||
|
referenceView.expandBoundsToAccomodateLoc(location);
|
||||||
|
}
|
||||||
|
|
||||||
|
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||||
|
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||||
|
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||||
|
|
||||||
|
// Iterate forward to get all reference ordered data covering this location
|
||||||
|
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
||||||
|
|
||||||
|
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||||
|
final boolean isActive = walker.isActive( tracker, refContext, locus );
|
||||||
|
isActiveList.add( new ActiveRegion(location, isActive, engine.getGenomeLocParser()) );
|
||||||
|
|
||||||
|
// Grab all the previously unseen reads from this pileup and add them to the massive read list
|
||||||
|
for( final PileupElement p : locus.getBasePileup() ) {
|
||||||
|
final SAMRecord read = p.getRead();
|
||||||
|
if( !myReads.contains(read) ) {
|
||||||
|
myReads.add(read);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this is the last pileup for this shard then need to calculate the minimum alignment start so that
|
||||||
|
// we know which active regions in the work queue are now safe to process
|
||||||
|
if( !locusView.hasNext() ) {
|
||||||
|
for( final PileupElement p : locus.getBasePileup() ) {
|
||||||
|
final SAMRecord read = p.getRead();
|
||||||
|
if( read.getAlignmentStart() < minStart ) { minStart = read.getAlignmentStart(); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printProgress(dataProvider.getShard(),locus.getLocation());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Take the individual isActive calls and integrate them into contiguous active regions and
|
||||||
|
// add these blocks of work to the work queue
|
||||||
|
final ArrayList<ActiveRegion> activeRegions = integrateActiveList( isActiveList );
|
||||||
|
logger.debug("Integrated " + isActiveList.size() + " isActive calls into " + activeRegions.size() + " regions." );
|
||||||
|
workQueue.addAll( activeRegions );
|
||||||
|
}
|
||||||
|
|
||||||
|
while( workQueue.peek().getLocation().getStop() < minStart ) {
|
||||||
|
final ActiveRegion activeRegion = workQueue.remove();
|
||||||
|
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Special function called in LinearMicroScheduler to empty out the work queue. Ugly for now but will be cleaned up when we push this functionality more into the engine
|
||||||
|
public T endTraversal( final Walker<M,T> walker, T sum) {
|
||||||
|
while( workQueue.peek() != null ) {
|
||||||
|
final ActiveRegion activeRegion = workQueue.remove();
|
||||||
|
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, (ActiveRegionWalker<M,T>) walker );
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
private T processActiveRegion( final ActiveRegion activeRegion, final LinkedHashSet<SAMRecord> reads, final Queue<ActiveRegion> workQueue, final T sum, final ActiveRegionWalker<M,T> walker ) {
|
||||||
|
final ArrayList<SAMRecord> placedReads = new ArrayList<SAMRecord>();
|
||||||
|
for( final SAMRecord read : reads ) {
|
||||||
|
final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );
|
||||||
|
if( activeRegion.getLocation().overlapsP( readLoc ) ) {
|
||||||
|
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
|
||||||
|
long maxOverlap = activeRegion.getLocation().sizeOfOverlap( readLoc );
|
||||||
|
ActiveRegion bestRegion = activeRegion;
|
||||||
|
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||||
|
if( otherRegionToTest.getLocation().sizeOfOverlap(readLoc) >= maxOverlap ) {
|
||||||
|
maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap(readLoc);
|
||||||
|
bestRegion = otherRegionToTest;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bestRegion.add( (GATKSAMRecord) read, true );
|
||||||
|
|
||||||
|
// The read is also added to all other region in which it overlaps but marked as non-primary
|
||||||
|
if( !bestRegion.equals(activeRegion) ) {
|
||||||
|
activeRegion.add( (GATKSAMRecord) read, false );
|
||||||
|
}
|
||||||
|
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||||
|
if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getLocation().overlapsP( readLoc ) ) {
|
||||||
|
activeRegion.add( (GATKSAMRecord) read, false );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
placedReads.add( read );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
|
||||||
|
|
||||||
|
logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLocation());
|
||||||
|
final M x = walker.map( activeRegion, null ); // BUGBUG: tracker needs to be filled in and passed to the walker
|
||||||
|
return walker.reduce( x, sum );
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the best view of loci for this walker given the available data.
|
||||||
|
* @param walker walker to interrogate.
|
||||||
|
* @param dataProvider Data which which to drive the locus view.
|
||||||
|
* @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal.
|
||||||
|
*/
|
||||||
|
private LocusView getLocusView( Walker<M,T> walker, LocusShardDataProvider dataProvider ) {
|
||||||
|
DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
|
||||||
|
if( dataSource == DataSource.READS )
|
||||||
|
return new CoveredLocusView(dataProvider);
|
||||||
|
else if( dataSource == DataSource.REFERENCE ) //|| ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers )
|
||||||
|
return new AllLocusView(dataProvider);
|
||||||
|
else if( dataSource == DataSource.REFERENCE_ORDERED_DATA )
|
||||||
|
return new RodLocusView(dataProvider);
|
||||||
|
else
|
||||||
|
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
||||||
|
}
|
||||||
|
|
||||||
|
// integrate active regions into contiguous chunks based on active status
|
||||||
|
private ArrayList<ActiveRegion> integrateActiveList( final ArrayList<ActiveRegion> activeList ) {
|
||||||
|
final ArrayList<ActiveRegion> returnList = new ArrayList<ActiveRegion>();
|
||||||
|
ActiveRegion prevLocus = activeList.remove(0);
|
||||||
|
ActiveRegion startLocus = prevLocus;
|
||||||
|
for( final ActiveRegion thisLocus : activeList ) {
|
||||||
|
if( prevLocus.isActive != thisLocus.isActive ) {
|
||||||
|
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(startLocus.getLocation().getContig(), startLocus.getLocation().getStart(), prevLocus.getLocation().getStart()),
|
||||||
|
prevLocus.isActive, engine.getGenomeLocParser() ) );
|
||||||
|
startLocus = thisLocus;
|
||||||
|
}
|
||||||
|
prevLocus = thisLocus;
|
||||||
|
}
|
||||||
|
// output the last region if necessary
|
||||||
|
if( startLocus != prevLocus ) {
|
||||||
|
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(startLocus.getLocation().getContig(), startLocus.getLocation().getStart(), prevLocus.getLocation().getStart()),
|
||||||
|
prevLocus.isActive, engine.getGenomeLocParser() ) );
|
||||||
|
}
|
||||||
|
return returnList;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,29 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: rpoplin
|
||||||
|
* Date: 12/7/11
|
||||||
|
*/
|
||||||
|
|
||||||
|
@By(DataSource.READS)
|
||||||
|
@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
|
||||||
|
@PartitionBy(PartitionType.READ)
|
||||||
|
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||||
|
// Do we actually want to operate on the context?
|
||||||
|
public boolean filter(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
|
||||||
|
return true; // We are keeping all the reads
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine active status over the AlignmentContext
|
||||||
|
public abstract boolean isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
||||||
|
|
||||||
|
// Map over the ActiveRegion
|
||||||
|
public abstract MapType map(final ActiveRegion activeRegion, final ReadMetaDataTracker metaDataTracker);
|
||||||
|
}
|
||||||
|
|
@ -56,7 +56,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
||||||
|
|
||||||
// We refuse to parse SnpEff output files generated by unsupported versions, or
|
// We refuse to parse SnpEff output files generated by unsupported versions, or
|
||||||
// lacking a SnpEff version number in the VCF header:
|
// lacking a SnpEff version number in the VCF header:
|
||||||
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.4" };
|
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.5" };
|
||||||
public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
|
public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
|
||||||
public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd";
|
public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd";
|
||||||
|
|
||||||
|
|
@ -204,11 +204,6 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
||||||
}
|
}
|
||||||
|
|
||||||
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
|
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
|
||||||
throw new UserException("SnpEff support is currently disabled in the GATK until SnpEff 2.0.4 is officially released " +
|
|
||||||
"due to a serious issue with SnpEff versions prior to 2.0.4. Please see this page for more details: " +
|
|
||||||
"http://www.broadinstitute.org/gsa/wiki/index.php/Adding_Genomic_Annotations_Using_SnpEff_and_VariantAnnotator");
|
|
||||||
|
|
||||||
/*
|
|
||||||
// Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
|
// Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
|
||||||
// without providing a SnpEff rod via --snpEffFile):
|
// without providing a SnpEff rod via --snpEffFile):
|
||||||
validateRodBinding(walker.getSnpEffRodBinding());
|
validateRodBinding(walker.getSnpEffRodBinding());
|
||||||
|
|
@ -228,7 +223,6 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
|
||||||
// mistaken in the future for a SnpEff output file:
|
// mistaken in the future for a SnpEff output file:
|
||||||
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
|
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
|
||||||
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
|
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
|
public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
|
||||||
|
|
|
||||||
|
|
@ -275,19 +275,22 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
||||||
public int add(PileupElement elt, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
|
public int add(PileupElement elt, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
|
||||||
byte obsBase = elt.getBase();
|
byte obsBase = elt.getBase();
|
||||||
byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
|
byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
|
||||||
|
if ( qual == 0 )
|
||||||
|
return 0;
|
||||||
|
|
||||||
if ( elt.isReducedRead() ) {
|
if ( elt.isReducedRead() ) {
|
||||||
// reduced read representation
|
// reduced read representation
|
||||||
if ( BaseUtils.isRegularBase( obsBase )) {
|
if ( BaseUtils.isRegularBase( obsBase )) {
|
||||||
add(obsBase, qual, (byte)0, (byte)0, elt.getRepresentativeCount()); // fast calculation of n identical likelihoods
|
int representativeCount = elt.getRepresentativeCount();
|
||||||
return elt.getRepresentativeCount(); // we added nObs bases here
|
add(obsBase, qual, (byte)0, (byte)0, representativeCount); // fast calculation of n identical likelihoods
|
||||||
|
return representativeCount; // we added nObs bases here
|
||||||
}
|
}
|
||||||
|
|
||||||
// odd bases or deletions => don't use them
|
// odd bases or deletions => don't use them
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return qual > 0 ? add(obsBase, qual, (byte)0, (byte)0, 1) : 0;
|
return add(obsBase, qual, (byte)0, (byte)0, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int add(List<PileupElement> overlappingPair, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
|
public int add(List<PileupElement> overlappingPair, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
|
||||||
|
|
@ -519,7 +522,7 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
||||||
if ( qual > SAMUtils.MAX_PHRED_SCORE )
|
if ( qual > SAMUtils.MAX_PHRED_SCORE )
|
||||||
throw new UserException.MalformedBAM(p.getRead(), String.format("the maximum allowed quality score is %d, but a quality of %d was observed in read %s. Perhaps your BAM incorrectly encodes the quality scores in Sanger format; see http://en.wikipedia.org/wiki/FASTQ_format for more details", SAMUtils.MAX_PHRED_SCORE, qual, p.getRead().getReadName()));
|
throw new UserException.MalformedBAM(p.getRead(), String.format("the maximum allowed quality score is %d, but a quality of %d was observed in read %s. Perhaps your BAM incorrectly encodes the quality scores in Sanger format; see http://en.wikipedia.org/wiki/FASTQ_format for more details", SAMUtils.MAX_PHRED_SCORE, qual, p.getRead().getReadName()));
|
||||||
if ( capBaseQualsAtMappingQual )
|
if ( capBaseQualsAtMappingQual )
|
||||||
qual = (byte)Math.min((int)p.getQual(), p.getMappingQual());
|
qual = (byte)Math.min((int)qual, p.getMappingQual());
|
||||||
if ( (int)qual < minBaseQual )
|
if ( (int)qual < minBaseQual )
|
||||||
qual = (byte)0;
|
qual = (byte)0;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
@ -73,7 +74,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Must be overridden by concrete subclasses
|
* Can be overridden by concrete subclasses
|
||||||
*
|
*
|
||||||
* @param tracker rod data
|
* @param tracker rod data
|
||||||
* @param ref reference context
|
* @param ref reference context
|
||||||
|
|
@ -82,6 +83,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
||||||
* @param priors priors to use for GLs
|
* @param priors priors to use for GLs
|
||||||
* @param alternateAlleleToUse the alternate allele to use, null if not set
|
* @param alternateAlleleToUse the alternate allele to use, null if not set
|
||||||
* @param useBAQedPileup should we use the BAQed pileup or the raw one?
|
* @param useBAQedPileup should we use the BAQed pileup or the raw one?
|
||||||
|
* @param locParser Genome Loc Parser
|
||||||
* @return variant context where genotypes are no-called but with GLs
|
* @return variant context where genotypes are no-called but with GLs
|
||||||
*/
|
*/
|
||||||
public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker,
|
public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker,
|
||||||
|
|
@ -90,7 +92,9 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
||||||
AlignmentContextUtils.ReadOrientation contextType,
|
AlignmentContextUtils.ReadOrientation contextType,
|
||||||
GenotypePriors priors,
|
GenotypePriors priors,
|
||||||
Allele alternateAlleleToUse,
|
Allele alternateAlleleToUse,
|
||||||
boolean useBAQedPileup);
|
boolean useBAQedPileup,
|
||||||
|
GenomeLocParser locParser);
|
||||||
|
|
||||||
|
|
||||||
protected int getFilteredDepth(ReadBackedPileup pileup) {
|
protected int getFilteredDepth(ReadBackedPileup pileup) {
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.Haplotype;
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
|
|
@ -54,9 +55,9 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
private final boolean getAlleleListFromVCF;
|
private final boolean getAlleleListFromVCF;
|
||||||
|
|
||||||
private boolean DEBUG = false;
|
private boolean DEBUG = false;
|
||||||
|
private final boolean doMultiAllelicCalls;
|
||||||
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
|
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
|
||||||
|
private final int maxAlternateAlleles;
|
||||||
private PairHMMIndelErrorModel pairModel;
|
private PairHMMIndelErrorModel pairModel;
|
||||||
|
|
||||||
private static ThreadLocal<HashMap<PileupElement,LinkedHashMap<Allele,Double>>> indelLikelihoodMap =
|
private static ThreadLocal<HashMap<PileupElement,LinkedHashMap<Allele,Double>>> indelLikelihoodMap =
|
||||||
|
|
@ -81,12 +82,14 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||||
super(UAC, logger);
|
super(UAC, logger);
|
||||||
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
||||||
UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.BANDED_INDEL_COMPUTATION);
|
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
|
||||||
alleleList = new ArrayList<Allele>();
|
alleleList = new ArrayList<Allele>();
|
||||||
getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||||
minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
|
minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
|
||||||
HAPLOTYPE_SIZE = UAC.INDEL_HAPLOTYPE_SIZE;
|
HAPLOTYPE_SIZE = UAC.INDEL_HAPLOTYPE_SIZE;
|
||||||
DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
|
DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
|
||||||
|
maxAlternateAlleles = UAC.MAX_ALTERNATE_ALLELES;
|
||||||
|
doMultiAllelicCalls = UAC.MULTI_ALLELIC;
|
||||||
|
|
||||||
haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
|
haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
|
||||||
ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES;
|
ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES;
|
||||||
|
|
@ -95,7 +98,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
|
|
||||||
private ArrayList<Allele> computeConsensusAlleles(ReferenceContext ref,
|
private ArrayList<Allele> computeConsensusAlleles(ReferenceContext ref,
|
||||||
Map<String, AlignmentContext> contexts,
|
Map<String, AlignmentContext> contexts,
|
||||||
AlignmentContextUtils.ReadOrientation contextType) {
|
AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser) {
|
||||||
Allele refAllele=null, altAllele=null;
|
Allele refAllele=null, altAllele=null;
|
||||||
GenomeLoc loc = ref.getLocus();
|
GenomeLoc loc = ref.getLocus();
|
||||||
ArrayList<Allele> aList = new ArrayList<Allele>();
|
ArrayList<Allele> aList = new ArrayList<Allele>();
|
||||||
|
|
@ -208,45 +211,24 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if (DEBUG) {
|
|
||||||
int icount = indelPileup.getNumberOfInsertions();
|
|
||||||
int dcount = indelPileup.getNumberOfDeletions();
|
|
||||||
if (icount + dcount > 0)
|
|
||||||
{
|
|
||||||
List<Pair<String,Integer>> eventStrings = indelPileup.getEventStringsWithCounts(ref.getBases());
|
|
||||||
System.out.format("#ins: %d, #del:%d\n", insCount, delCount);
|
|
||||||
|
|
||||||
for (int i=0 ; i < eventStrings.size() ; i++ ) {
|
|
||||||
System.out.format("%s:%d,",eventStrings.get(i).first,eventStrings.get(i).second);
|
|
||||||
// int k=0;
|
|
||||||
}
|
|
||||||
System.out.println();
|
|
||||||
}
|
|
||||||
} */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Collection<VariantContext> vcs = new ArrayList<VariantContext>();
|
||||||
int maxAlleleCnt = 0;
|
int maxAlleleCnt = 0;
|
||||||
String bestAltAllele = "";
|
String bestAltAllele = "";
|
||||||
|
|
||||||
for (String s : consensusIndelStrings.keySet()) {
|
for (String s : consensusIndelStrings.keySet()) {
|
||||||
int curCnt = consensusIndelStrings.get(s);
|
int curCnt = consensusIndelStrings.get(s), stop = 0;
|
||||||
if (curCnt > maxAlleleCnt) {
|
// if observed count if above minimum threshold, we will genotype this allele
|
||||||
maxAlleleCnt = curCnt;
|
if (curCnt < minIndelCountForGenotyping)
|
||||||
bestAltAllele = s;
|
continue;
|
||||||
}
|
|
||||||
// if (DEBUG)
|
|
||||||
// System.out.format("Key:%s, number: %d\n",s,consensusIndelStrings.get(s) );
|
|
||||||
} //gdebug-
|
|
||||||
|
|
||||||
if (maxAlleleCnt < minIndelCountForGenotyping)
|
if (s.startsWith("D")) {
|
||||||
return aList;
|
|
||||||
|
|
||||||
if (bestAltAllele.startsWith("D")) {
|
|
||||||
// get deletion length
|
// get deletion length
|
||||||
int dLen = Integer.valueOf(bestAltAllele.substring(1));
|
int dLen = Integer.valueOf(s.substring(1));
|
||||||
// get ref bases of accurate deletion
|
// get ref bases of accurate deletion
|
||||||
int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart();
|
int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart();
|
||||||
|
stop = loc.getStart() + dLen;
|
||||||
//System.out.println(new String(ref.getBases()));
|
|
||||||
byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen);
|
byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen);
|
||||||
|
|
||||||
if (Allele.acceptableAlleleBases(refBases)) {
|
if (Allele.acceptableAlleleBases(refBases)) {
|
||||||
|
|
@ -256,15 +238,42 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// insertion case
|
// insertion case
|
||||||
if (Allele.acceptableAlleleBases(bestAltAllele)) {
|
if (Allele.acceptableAlleleBases(s)) {
|
||||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||||
altAllele = Allele.create(bestAltAllele, false);
|
altAllele = Allele.create(s, false);
|
||||||
|
stop = loc.getStart();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (refAllele != null && altAllele != null) {
|
|
||||||
aList.add(0,refAllele);
|
|
||||||
aList.add(1,altAllele);
|
ArrayList vcAlleles = new ArrayList<Allele>();
|
||||||
|
vcAlleles.add(refAllele);
|
||||||
|
vcAlleles.add(altAllele);
|
||||||
|
|
||||||
|
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
||||||
|
builder.loc(loc.getContig(), loc.getStart(), stop);
|
||||||
|
builder.alleles(vcAlleles);
|
||||||
|
builder.referenceBaseForIndel(ref.getBase());
|
||||||
|
builder.noGenotypes();
|
||||||
|
if (doMultiAllelicCalls)
|
||||||
|
vcs.add(builder.make());
|
||||||
|
else {
|
||||||
|
if (curCnt > maxAlleleCnt) {
|
||||||
|
maxAlleleCnt = curCnt;
|
||||||
|
vcs.clear();
|
||||||
|
vcs.add(builder.make());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vcs.isEmpty())
|
||||||
|
return aList; // nothing else to do, no alleles passed minimum count criterion
|
||||||
|
|
||||||
|
VariantContext mergedVC = VariantContextUtils.simpleMerge(locParser, vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
|
||||||
|
|
||||||
|
aList = new ArrayList<Allele>(mergedVC.getAlleles());
|
||||||
|
|
||||||
return aList;
|
return aList;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -277,7 +286,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
AlignmentContextUtils.ReadOrientation contextType,
|
AlignmentContextUtils.ReadOrientation contextType,
|
||||||
GenotypePriors priors,
|
GenotypePriors priors,
|
||||||
Allele alternateAlleleToUse,
|
Allele alternateAlleleToUse,
|
||||||
boolean useBAQedPileup) {
|
boolean useBAQedPileup, GenomeLocParser locParser) {
|
||||||
|
|
||||||
if ( tracker == null )
|
if ( tracker == null )
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -323,7 +332,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
alleleList = computeConsensusAlleles(ref,contexts, contextType);
|
alleleList = computeConsensusAlleles(ref,contexts, contextType, locParser);
|
||||||
if (alleleList.isEmpty())
|
if (alleleList.isEmpty())
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,10 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
|
|
@ -46,8 +43,6 @@ import java.util.*;
|
||||||
|
|
||||||
public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
|
public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
|
||||||
|
|
||||||
private static final int MIN_QUAL_SUM_FOR_ALT_ALLELE = 50;
|
|
||||||
|
|
||||||
private boolean ALLOW_MULTIPLE_ALLELES;
|
private boolean ALLOW_MULTIPLE_ALLELES;
|
||||||
|
|
||||||
private final boolean useAlleleFromVCF;
|
private final boolean useAlleleFromVCF;
|
||||||
|
|
@ -56,15 +51,20 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
super(UAC, logger);
|
super(UAC, logger);
|
||||||
ALLOW_MULTIPLE_ALLELES = UAC.MULTI_ALLELIC;
|
ALLOW_MULTIPLE_ALLELES = UAC.MULTI_ALLELIC;
|
||||||
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||||
|
|
||||||
|
// make sure the PL cache has been initialized with enough alleles
|
||||||
|
if ( UnifiedGenotyperEngine.PLIndexToAlleleIndex == null || UnifiedGenotyperEngine.PLIndexToAlleleIndex.length < 4 ) // +1 for 0 alt alleles
|
||||||
|
UnifiedGenotyperEngine.calculatePLcache(3);
|
||||||
}
|
}
|
||||||
|
|
||||||
public VariantContext getLikelihoods(RefMetaDataTracker tracker,
|
public VariantContext getLikelihoods(final RefMetaDataTracker tracker,
|
||||||
ReferenceContext ref,
|
final ReferenceContext ref,
|
||||||
Map<String, AlignmentContext> contexts,
|
final Map<String, AlignmentContext> contexts,
|
||||||
AlignmentContextUtils.ReadOrientation contextType,
|
final AlignmentContextUtils.ReadOrientation contextType,
|
||||||
GenotypePriors priors,
|
final GenotypePriors priors,
|
||||||
Allele alternateAlleleToUse,
|
final Allele alternateAlleleToUse,
|
||||||
boolean useBAQedPileup) {
|
final boolean useBAQedPileup,
|
||||||
|
final GenomeLocParser locParser) {
|
||||||
|
|
||||||
if ( !(priors instanceof DiploidSNPGenotypePriors) )
|
if ( !(priors instanceof DiploidSNPGenotypePriors) )
|
||||||
throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model");
|
throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model");
|
||||||
|
|
@ -79,6 +79,20 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
alleles.add(Allele.create(refBase, true));
|
alleles.add(Allele.create(refBase, true));
|
||||||
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles);
|
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles);
|
||||||
|
|
||||||
|
// calculate the GLs
|
||||||
|
ArrayList<SampleGenotypeData> GLs = new ArrayList<SampleGenotypeData>(contexts.size());
|
||||||
|
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||||
|
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
|
||||||
|
if ( useBAQedPileup )
|
||||||
|
pileup = createBAQedPileup( pileup );
|
||||||
|
|
||||||
|
// create the GenotypeLikelihoods object
|
||||||
|
final DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error);
|
||||||
|
final int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE);
|
||||||
|
if ( nGoodBases > 0 )
|
||||||
|
GLs.add(new SampleGenotypeData(sample.getKey(), GL, getFilteredDepth(pileup)));
|
||||||
|
}
|
||||||
|
|
||||||
// find the alternate allele(s) that we should be using
|
// find the alternate allele(s) that we should be using
|
||||||
if ( alternateAlleleToUse != null ) {
|
if ( alternateAlleleToUse != null ) {
|
||||||
basesToUse[BaseUtils.simpleBaseToBaseIndex(alternateAlleleToUse.getBases()[0])] = true;
|
basesToUse[BaseUtils.simpleBaseToBaseIndex(alternateAlleleToUse.getBases()[0])] = true;
|
||||||
|
|
@ -93,7 +107,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
basesToUse[BaseUtils.simpleBaseToBaseIndex(allele.getBases()[0])] = true;
|
basesToUse[BaseUtils.simpleBaseToBaseIndex(allele.getBases()[0])] = true;
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
determineAlternateAlleles(basesToUse, refBase, contexts, useBAQedPileup);
|
determineAlternateAlleles(basesToUse, refBase, GLs);
|
||||||
|
|
||||||
// how many alternate alleles are we using?
|
// how many alternate alleles are we using?
|
||||||
int alleleCounter = Utils.countSetBits(basesToUse);
|
int alleleCounter = Utils.countSetBits(basesToUse);
|
||||||
|
|
@ -125,22 +139,12 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
builder.alleles(alleles);
|
builder.alleles(alleles);
|
||||||
|
|
||||||
// create the genotypes; no-call everyone for now
|
// create the genotypes; no-call everyone for now
|
||||||
GenotypesContext genotypes = GenotypesContext.create();
|
final GenotypesContext genotypes = GenotypesContext.create();
|
||||||
final List<Allele> noCall = new ArrayList<Allele>();
|
final List<Allele> noCall = new ArrayList<Allele>();
|
||||||
noCall.add(Allele.NO_CALL);
|
noCall.add(Allele.NO_CALL);
|
||||||
|
|
||||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
for ( SampleGenotypeData sampleData : GLs ) {
|
||||||
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
|
final double[] allLikelihoods = sampleData.GL.getLikelihoods();
|
||||||
if ( useBAQedPileup )
|
|
||||||
pileup = createBAQedPileup( pileup );
|
|
||||||
|
|
||||||
// create the GenotypeLikelihoods object
|
|
||||||
final DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error);
|
|
||||||
final int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE);
|
|
||||||
if ( nGoodBases == 0 )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
final double[] allLikelihoods = GL.getLikelihoods();
|
|
||||||
final double[] myLikelihoods = new double[numLikelihoods];
|
final double[] myLikelihoods = new double[numLikelihoods];
|
||||||
|
|
||||||
int myLikelihoodsIndex = 0;
|
int myLikelihoodsIndex = 0;
|
||||||
|
|
@ -151,60 +155,48 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
}
|
}
|
||||||
|
|
||||||
// normalize in log space so that max element is zero.
|
// normalize in log space so that max element is zero.
|
||||||
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true));
|
final GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true));
|
||||||
|
|
||||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
final HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||||
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
|
attributes.put(VCFConstants.DEPTH_KEY, sampleData.depth);
|
||||||
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
|
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
|
||||||
genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
|
genotypes.add(new Genotype(sampleData.name, noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
return builder.genotypes(genotypes).make();
|
return builder.genotypes(genotypes).make();
|
||||||
}
|
}
|
||||||
|
|
||||||
// fills in the allelesToUse array
|
// fills in the allelesToUse array
|
||||||
protected void determineAlternateAlleles(boolean[] allelesToUse, byte ref, Map<String, AlignmentContext> contexts, boolean useBAQedPileup) {
|
protected void determineAlternateAlleles(final boolean[] allelesToUse, final byte ref, final List<SampleGenotypeData> sampleDataList) {
|
||||||
int[] qualCounts = new int[4];
|
|
||||||
|
|
||||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
final int baseIndexOfRef = BaseUtils.simpleBaseToBaseIndex(ref);
|
||||||
// calculate the sum of quality scores for each base
|
final int PLindexOfRef = DiploidGenotype.createDiploidGenotype(ref, ref).ordinal();
|
||||||
ReadBackedPileup pileup = useBAQedPileup ? createBAQedPileup( sample.getValue().getBasePileup() ) : sample.getValue().getBasePileup();
|
final double[] likelihoodCounts = new double[4];
|
||||||
for ( PileupElement p : pileup ) {
|
|
||||||
// ignore deletions
|
|
||||||
if ( p.isDeletion() || (!p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE) )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
final int index = BaseUtils.simpleBaseToBaseIndex(p.getBase());
|
// based on the GLs, find the alternate alleles with the most probability
|
||||||
if ( index >= 0 ) {
|
for ( SampleGenotypeData sampleData : sampleDataList ) {
|
||||||
qualCounts[index] += p.getQual();
|
final double[] likelihoods = sampleData.GL.getLikelihoods();
|
||||||
}
|
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
|
||||||
|
if ( PLindexOfBestGL != PLindexOfRef ) {
|
||||||
|
int[] alleles = UnifiedGenotyperEngine.PLIndexToAlleleIndex[3][PLindexOfBestGL];
|
||||||
|
if ( alleles[0] != baseIndexOfRef )
|
||||||
|
likelihoodCounts[alleles[0]] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef];
|
||||||
|
// don't double-count it
|
||||||
|
if ( alleles[1] != baseIndexOfRef && alleles[1] != alleles[0] )
|
||||||
|
likelihoodCounts[alleles[1]] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( ALLOW_MULTIPLE_ALLELES ) {
|
if ( ALLOW_MULTIPLE_ALLELES ) {
|
||||||
for ( byte altAllele : BaseUtils.BASES ) {
|
for ( int i = 0; i < 4; i++ ) {
|
||||||
if ( altAllele == ref )
|
if ( likelihoodCounts[i] > 0.0 ) {
|
||||||
continue;
|
allelesToUse[i] = true;
|
||||||
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
|
|
||||||
if ( qualCounts[index] >= MIN_QUAL_SUM_FOR_ALT_ALLELE ) {
|
|
||||||
allelesToUse[index] = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// set the non-ref base which has the maximum quality score sum
|
// set the non-ref base which has the maximum sum of non-ref GLs
|
||||||
int maxCount = 0;
|
final int indexOfMax = MathUtils.maxElementIndex(likelihoodCounts);
|
||||||
int indexOfMax = 0;
|
if ( likelihoodCounts[indexOfMax] > 0.0 )
|
||||||
for ( byte altAllele : BaseUtils.BASES ) {
|
|
||||||
if ( altAllele == ref )
|
|
||||||
continue;
|
|
||||||
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
|
|
||||||
if ( qualCounts[index] > maxCount ) {
|
|
||||||
maxCount = qualCounts[index];
|
|
||||||
indexOfMax = index;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( maxCount > 0 )
|
|
||||||
allelesToUse[indexOfMax] = true;
|
allelesToUse[indexOfMax] = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -227,4 +219,16 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
public byte getQual( final int offset ) { return BAQ.calcBAQFromTag(getRead(), offset, true); }
|
public byte getQual( final int offset ) { return BAQ.calcBAQFromTag(getRead(), offset, true); }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static class SampleGenotypeData {
|
||||||
|
|
||||||
|
public final String name;
|
||||||
|
public final DiploidSNPGenotypeLikelihoods GL;
|
||||||
|
public final int depth;
|
||||||
|
|
||||||
|
public SampleGenotypeData(final String name, final DiploidSNPGenotypeLikelihoods GL, final int depth) {
|
||||||
|
this.name = name;
|
||||||
|
this.GL = GL;
|
||||||
|
this.depth = depth;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -109,7 +109,7 @@ public class UnifiedArgumentCollection {
|
||||||
* For advanced users only.
|
* For advanced users only.
|
||||||
*/
|
*/
|
||||||
@Advanced
|
@Advanced
|
||||||
@Argument(fullName = "multiallelic", shortName = "multiallelic", doc = "Allow the discovery of multiple alleles (SNPs only)", required = false)
|
@Argument(fullName = "multiallelic", shortName = "multiallelic", doc = "Allow the discovery of multiple alleles", required = false)
|
||||||
public boolean MULTI_ALLELIC = false;
|
public boolean MULTI_ALLELIC = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -146,8 +146,8 @@ public class UnifiedArgumentCollection {
|
||||||
public int INDEL_HAPLOTYPE_SIZE = 80;
|
public int INDEL_HAPLOTYPE_SIZE = 80;
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName = "bandedIndel", shortName = "bandedIndel", doc = "Banded Indel likelihood computation", required = false)
|
@Argument(fullName = "noBandedIndel", shortName = "noBandedIndel", doc = "Don't do Banded Indel likelihood computation", required = false)
|
||||||
public boolean BANDED_INDEL_COMPUTATION = false;
|
public boolean DONT_DO_BANDED_INDEL_COMPUTATION = false;
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
|
@Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
|
||||||
|
|
@ -184,7 +184,7 @@ public class UnifiedArgumentCollection {
|
||||||
|
|
||||||
// todo- arguments to remove
|
// todo- arguments to remove
|
||||||
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
|
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
|
||||||
uac.BANDED_INDEL_COMPUTATION = BANDED_INDEL_COMPUTATION;
|
uac.DONT_DO_BANDED_INDEL_COMPUTATION = DONT_DO_BANDED_INDEL_COMPUTATION;
|
||||||
uac.MULTI_ALLELIC = MULTI_ALLELIC;
|
uac.MULTI_ALLELIC = MULTI_ALLELIC;
|
||||||
return uac;
|
return uac;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -243,7 +243,7 @@ public class UnifiedGenotyperEngine {
|
||||||
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
|
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
|
||||||
}
|
}
|
||||||
|
|
||||||
return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
|
return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
|
||||||
}
|
}
|
||||||
|
|
||||||
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
||||||
|
|
@ -858,4 +858,171 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
return calls;
|
return calls;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param vc variant context with genotype likelihoods
|
||||||
|
* @param allelesToUse bit vector describing which alternate alleles from the vc are okay to use
|
||||||
|
* @param exactAC integer array describing the AC from the exact model for the corresponding alleles
|
||||||
|
* @return genotypes
|
||||||
|
*/
|
||||||
|
public static GenotypesContext constrainedAssignGenotypes(VariantContext vc, boolean[] allelesToUse, int[] exactAC ) {
|
||||||
|
|
||||||
|
final GenotypesContext GLs = vc.getGenotypes();
|
||||||
|
|
||||||
|
// samples
|
||||||
|
final List<String> sampleIndices = GLs.getSampleNamesOrderedByName();
|
||||||
|
|
||||||
|
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
|
||||||
|
final int numOriginalAltAlleles = allelesToUse.length;
|
||||||
|
final List<Allele> newAlleles = new ArrayList<Allele>(numOriginalAltAlleles+1);
|
||||||
|
newAlleles.add(vc.getReference());
|
||||||
|
final HashMap<Allele,Integer> alleleIndexMap = new HashMap<Allele,Integer>(); // need this for skipping dimensions
|
||||||
|
int[] alleleCount = new int[exactAC.length];
|
||||||
|
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
|
||||||
|
if ( allelesToUse[i] ) {
|
||||||
|
newAlleles.add(vc.getAlternateAllele(i));
|
||||||
|
alleleIndexMap.put(vc.getAlternateAllele(i),i);
|
||||||
|
alleleCount[i] = exactAC[i];
|
||||||
|
} else {
|
||||||
|
alleleCount[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
final List<Allele> newAltAlleles = newAlleles.subList(1,newAlleles.size());
|
||||||
|
final int numNewAltAlleles = newAltAlleles.size();
|
||||||
|
ArrayList<Integer> likelihoodIndexesToUse = null;
|
||||||
|
|
||||||
|
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
|
||||||
|
// then we can keep the PLs as is; otherwise, we determine which ones to keep
|
||||||
|
final int[][] PLcache;
|
||||||
|
if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) {
|
||||||
|
likelihoodIndexesToUse = new ArrayList<Integer>(30);
|
||||||
|
PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles];
|
||||||
|
|
||||||
|
for ( int PLindex = 0; PLindex < PLcache.length; PLindex++ ) {
|
||||||
|
int[] alleles = PLcache[PLindex];
|
||||||
|
// consider this entry only if both of the alleles are good
|
||||||
|
if ( (alleles[0] == 0 || allelesToUse[alleles[0] - 1]) && (alleles[1] == 0 || allelesToUse[alleles[1] - 1]) )
|
||||||
|
likelihoodIndexesToUse.add(PLindex);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles];
|
||||||
|
}
|
||||||
|
|
||||||
|
// set up the trellis dimensions
|
||||||
|
// SAMPLE x alt 1 x alt 2 x alt 3
|
||||||
|
// todo -- check that exactAC has alt counts at [1],[2],[3] (and not [0],[1],[2])
|
||||||
|
double[][][][] transitionTrellis = new double[sampleIndices.size()+1][exactAC[1]][exactAC[2]][exactAC[3]];
|
||||||
|
// N x AC1 x AC2 x AC3; worst performance in multi-allelic where all alleles are moderate frequency
|
||||||
|
// capped at the MLE ACs*
|
||||||
|
// todo -- there's an optimization: not all states in the rectangular matrix will be reached, in fact
|
||||||
|
// todo -- for tT[0] we only care about tT[0][0][0][0], and for tT[1], only combinations of 0,1,2.
|
||||||
|
int idx = 1; // index of which sample we're on
|
||||||
|
int prevMaxState = 0; // the maximum state (e.g. AC) reached by the previous sample. Symmetric. (AC capping handled by logic in loop)
|
||||||
|
// iterate over each sample
|
||||||
|
for ( String sample : sampleIndices ) {
|
||||||
|
// push the likelihoods into the next possible states, that is to say
|
||||||
|
// L[state] = L[prev state] + L[genotype getting into state]
|
||||||
|
// iterate over each previous state, by dimension
|
||||||
|
// and contribute the likelihoods for transitions to this state
|
||||||
|
double[][][] prevState = transitionTrellis[idx-1];
|
||||||
|
double[][][] thisState = transitionTrellis[idx];
|
||||||
|
Genotype genotype = GLs.get(sample);
|
||||||
|
if ( genotype.isNoCall() || genotype.isFiltered() ) {
|
||||||
|
thisState = prevState.clone();
|
||||||
|
} else {
|
||||||
|
double[] likelihoods = genotype.getLikelihoods().getAsVector();
|
||||||
|
int dim1min = Math.max(0, alleleCount[0]-2*(sampleIndices.size()-idx+1));
|
||||||
|
int dim1max = Math.min(prevMaxState,alleleCount[0]);
|
||||||
|
int dim2min = Math.max(0,alleleCount[1]-2*(sampleIndices.size()-idx+1));
|
||||||
|
int dim2max = Math.min(prevMaxState,alleleCount[1]);
|
||||||
|
int dim3min = Math.max(0,alleleCount[2]-2*(sampleIndices.size()-idx+1));
|
||||||
|
int dim3max = Math.min(prevMaxState,alleleCount[2]);
|
||||||
|
// cue annoying nested for loop
|
||||||
|
for ( int a1 = dim1min ; a1 <= dim1max; a1++ ) {
|
||||||
|
for ( int a2 = dim2min; a2 <= dim2max; a2++ ) {
|
||||||
|
for ( int a3 = dim3min; a3 <= dim3max; a3++ ) {
|
||||||
|
double base = prevState[a1][a2][a3];
|
||||||
|
for ( int likIdx : likelihoodIndexesToUse ) {
|
||||||
|
int[] offsets = calculateOffsets(PLcache[likIdx]);
|
||||||
|
thisState[a1+offsets[1]][a2+offsets[2]][a3+offsets[3]] = base + likelihoods[likIdx];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
prevMaxState += 2;
|
||||||
|
}
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// after all that pain, we have a fully calculated trellis. Now just march backwards from the EAC state and
|
||||||
|
// assign genotypes along the greedy path
|
||||||
|
|
||||||
|
GenotypesContext calls = GenotypesContext.create(sampleIndices.size());
|
||||||
|
int[] state = alleleCount;
|
||||||
|
for ( String sample : Utils.reverse(sampleIndices) ) {
|
||||||
|
--idx;
|
||||||
|
// the next state will be the maximum achievable state
|
||||||
|
Genotype g = GLs.get(sample);
|
||||||
|
if ( g.isNoCall() || ! g.hasLikelihoods() ) {
|
||||||
|
calls.add(g);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// subset to the new likelihoods. These are not used except for subsetting in the context iself.
|
||||||
|
// i.e. they are not a part of the calculation.
|
||||||
|
final double[] originalLikelihoods = GLs.get(sample).getLikelihoods().getAsVector();
|
||||||
|
double[] newLikelihoods;
|
||||||
|
if ( likelihoodIndexesToUse == null ) {
|
||||||
|
newLikelihoods = originalLikelihoods;
|
||||||
|
} else {
|
||||||
|
newLikelihoods = new double[likelihoodIndexesToUse.size()];
|
||||||
|
int newIndex = 0;
|
||||||
|
for ( int oldIndex : likelihoodIndexesToUse )
|
||||||
|
newLikelihoods[newIndex++] = originalLikelihoods[oldIndex];
|
||||||
|
|
||||||
|
// might need to re-normalize
|
||||||
|
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
// todo -- alter this. For ease of programming, likelihood indeces are
|
||||||
|
// todo -- used to iterate over achievable states.
|
||||||
|
double max = Double.NEGATIVE_INFINITY;
|
||||||
|
int[] bestState = null;
|
||||||
|
int[] bestAlleles = null;
|
||||||
|
int bestLikIdx = -1;
|
||||||
|
for ( int likIdx : likelihoodIndexesToUse ) {
|
||||||
|
int[] offsets = calculateOffsets(PLcache[likIdx]);
|
||||||
|
double val = transitionTrellis[idx-1][state[0]-offsets[0]][state[1]-offsets[1]][state[2]-offsets[2]];
|
||||||
|
if ( val > max ) {
|
||||||
|
max = val;
|
||||||
|
bestState = new int[] { state[0]-offsets[0],state[1]-offsets[1],state[2]-offsets[2]};
|
||||||
|
bestAlleles = PLcache[likIdx];
|
||||||
|
bestLikIdx = likIdx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
state = bestState;
|
||||||
|
List<Allele> gtAlleles = new ArrayList<Allele>(2);
|
||||||
|
gtAlleles.add(newAlleles.get(bestAlleles[0]));
|
||||||
|
gtAlleles.add(newAlleles.get(bestAlleles[1]));
|
||||||
|
|
||||||
|
final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(bestLikIdx, newLikelihoods);
|
||||||
|
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
|
||||||
|
if ( numNewAltAlleles == 0 )
|
||||||
|
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
|
||||||
|
else
|
||||||
|
attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods));
|
||||||
|
calls.add(new Genotype(sample, gtAlleles, qual, null, attrs, false));
|
||||||
|
|
||||||
|
}
|
||||||
|
return calls;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int[] calculateOffsets(int[] alleleIndeces) {
|
||||||
|
int[] offsets = new int[4];
|
||||||
|
for ( int i = 0; i < alleleIndeces.length; i++ ) {
|
||||||
|
offsets[alleleIndeces[i]]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return offsets;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -28,13 +28,13 @@ package org.broadinstitute.sting.gatk.walkers.indels;
|
||||||
import net.sf.samtools.Cigar;
|
import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.utils.Haplotype;
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
|
||||||
|
|
@ -409,9 +409,9 @@ public class PairHMMIndelErrorModel {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
//System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName());
|
// System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName());
|
||||||
SAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
|
GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
|
||||||
if (read == null)
|
if (read.isEmpty())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if(ReadUtils.is454Read(read)) {
|
if(ReadUtils.is454Read(read)) {
|
||||||
|
|
|
||||||
|
|
@ -106,37 +106,70 @@ public class ValidationSiteSelectorWalker extends RodWalker<Integer, Integer> {
|
||||||
POLY_BASED_ON_GL
|
POLY_BASED_ON_GL
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The input VCF file
|
||||||
|
*/
|
||||||
@Input(fullName="variant", shortName = "V", doc="Input VCF file, can be specified multiple times", required=true)
|
@Input(fullName="variant", shortName = "V", doc="Input VCF file, can be specified multiple times", required=true)
|
||||||
public List<RodBinding<VariantContext>> variants;
|
public List<RodBinding<VariantContext>> variants;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The output VCF file
|
||||||
|
*/
|
||||||
@Output(doc="File to which variants should be written",required=true)
|
@Output(doc="File to which variants should be written",required=true)
|
||||||
protected VCFWriter vcfWriter = null;
|
protected VCFWriter vcfWriter = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sample name(s) to subset the input VCF to, prior to selecting variants. -sn A -sn B subsets to samples A and B.
|
||||||
|
*/
|
||||||
@Argument(fullName="sample_name", shortName="sn", doc="Include genotypes from this sample. Can be specified multiple times", required=false)
|
@Argument(fullName="sample_name", shortName="sn", doc="Include genotypes from this sample. Can be specified multiple times", required=false)
|
||||||
public Set<String> sampleNames = new HashSet<String>(0);
|
public Set<String> sampleNames = new HashSet<String>(0);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sample regexps to subset the input VCF to, prior to selecting variants. -sn NA12* subsets to all samples with prefix NA12
|
||||||
|
*/
|
||||||
@Argument(fullName="sample_expressions", shortName="se", doc="Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times", required=false)
|
@Argument(fullName="sample_expressions", shortName="se", doc="Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times", required=false)
|
||||||
public Set<String> sampleExpressions ;
|
public Set<String> sampleExpressions ;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* File containing a list of sample names to subset the input vcf to. Equivalent to specifying the contents of the file separately with -sn
|
||||||
|
*/
|
||||||
@Input(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false)
|
@Input(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false)
|
||||||
public Set<File> sampleFiles;
|
public Set<File> sampleFiles;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A mode for selecting sites based on sample-level data. See the wiki documentation for more information.
|
||||||
|
*/
|
||||||
@Argument(fullName="sampleMode", shortName="sampleMode", doc="Sample selection mode", required=false)
|
@Argument(fullName="sampleMode", shortName="sampleMode", doc="Sample selection mode", required=false)
|
||||||
private SAMPLE_SELECTION_MODE sampleMode = SAMPLE_SELECTION_MODE.NONE;
|
private SAMPLE_SELECTION_MODE sampleMode = SAMPLE_SELECTION_MODE.NONE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An P[nonref] threshold for SAMPLE_SELECTION_MODE=POLY_BASED_ON_GL. See the wiki documentation for more information.
|
||||||
|
*/
|
||||||
@Argument(shortName="samplePNonref",fullName="samplePNonref", doc="GL-based selection mode only: the probability" +
|
@Argument(shortName="samplePNonref",fullName="samplePNonref", doc="GL-based selection mode only: the probability" +
|
||||||
" that a site is non-reference in the samples for which to include the site",required=false)
|
" that a site is non-reference in the samples for which to include the site",required=false)
|
||||||
private double samplePNonref = 0.99;
|
private double samplePNonref = 0.99;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The number of sites in your validation set
|
||||||
|
*/
|
||||||
@Argument(fullName="numValidationSites", shortName="numSites", doc="Number of output validation sites", required=true)
|
@Argument(fullName="numValidationSites", shortName="numSites", doc="Number of output validation sites", required=true)
|
||||||
private int numValidationSites;
|
private int numValidationSites;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Do not exclude filtered sites (e.g. not PASS or .) from consideration for validation
|
||||||
|
*/
|
||||||
@Argument(fullName="includeFilteredSites", shortName="ifs", doc="If true, will include filtered sites in set to choose variants from", required=false)
|
@Argument(fullName="includeFilteredSites", shortName="ifs", doc="If true, will include filtered sites in set to choose variants from", required=false)
|
||||||
private boolean INCLUDE_FILTERED_SITES = false;
|
private boolean INCLUDE_FILTERED_SITES = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Argument for the frequency selection mode. (AC/AF/AN) are taken from VCF info field, not recalculated. Typically specified for sites-only VCFs that still have AC/AF/AN information.
|
||||||
|
*/
|
||||||
@Argument(fullName="ignoreGenotypes", shortName="ignoreGenotypes", doc="If true, will ignore genotypes in VCF, will take AC,AF from annotations and will make no sample selection", required=false)
|
@Argument(fullName="ignoreGenotypes", shortName="ignoreGenotypes", doc="If true, will ignore genotypes in VCF, will take AC,AF from annotations and will make no sample selection", required=false)
|
||||||
private boolean IGNORE_GENOTYPES = false;
|
private boolean IGNORE_GENOTYPES = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Argument for the frequency selection mode. Allows reference (non-polymorphic) sites to be included in the validation set.
|
||||||
|
*/
|
||||||
@Argument(fullName="ignorePolymorphicStatus", shortName="ignorePolymorphicStatus", doc="If true, will ignore polymorphic status in VCF, and will take VCF record directly without pre-selection", required=false)
|
@Argument(fullName="ignorePolymorphicStatus", shortName="ignorePolymorphicStatus", doc="If true, will ignore polymorphic status in VCF, and will take VCF record directly without pre-selection", required=false)
|
||||||
private boolean IGNORE_POLYMORPHIC = false;
|
private boolean IGNORE_POLYMORPHIC = false;
|
||||||
|
|
||||||
|
|
@ -145,18 +178,13 @@ public class ValidationSiteSelectorWalker extends RodWalker<Integer, Integer> {
|
||||||
private int numFrequencyBins = 20;
|
private int numFrequencyBins = 20;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This argument selects allele frequency selection mode:
|
* This argument selects allele frequency selection mode. See the wiki for more information.
|
||||||
* KEEP_AF_SPECTRUM will choose variants so that the resulting allele frequency spectrum matches as closely as possible the input set
|
|
||||||
* UNIFORM will choose variants uniformly without regard to their allele frequency.
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
@Argument(fullName="frequencySelectionMode", shortName="freqMode", doc="Allele Frequency selection mode", required=false)
|
@Argument(fullName="frequencySelectionMode", shortName="freqMode", doc="Allele Frequency selection mode", required=false)
|
||||||
private AF_COMPUTATION_MODE freqMode = AF_COMPUTATION_MODE.KEEP_AF_SPECTRUM;
|
private AF_COMPUTATION_MODE freqMode = AF_COMPUTATION_MODE.KEEP_AF_SPECTRUM;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This argument selects particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria.
|
* This argument selects particular kinds of variants (i.e. SNP, INDEL) out of a list. If left unspecified, all types are considered.
|
||||||
* When specified one or more times, a particular type of variant is selected.
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
@Argument(fullName="selectTypeToInclude", shortName="selectType", doc="Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times", required=false)
|
@Argument(fullName="selectTypeToInclude", shortName="selectType", doc="Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times", required=false)
|
||||||
private List<VariantContext.Type> TYPES_TO_INCLUDE = new ArrayList<VariantContext.Type>();
|
private List<VariantContext.Type> TYPES_TO_INCLUDE = new ArrayList<VariantContext.Type>();
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,7 @@ import java.util.*;
|
||||||
public class VariantSummary extends VariantEvaluator implements StandardEval {
|
public class VariantSummary extends VariantEvaluator implements StandardEval {
|
||||||
final protected static Logger logger = Logger.getLogger(VariantSummary.class);
|
final protected static Logger logger = Logger.getLogger(VariantSummary.class);
|
||||||
|
|
||||||
|
/** Indels with size greater than this value are tallied in the CNV column */
|
||||||
private final static int MAX_INDEL_LENGTH = 50;
|
private final static int MAX_INDEL_LENGTH = 50;
|
||||||
private final static double MIN_CNV_OVERLAP = 0.5;
|
private final static double MIN_CNV_OVERLAP = 0.5;
|
||||||
private VariantEvalWalker walker;
|
private VariantEvalWalker walker;
|
||||||
|
|
@ -196,6 +197,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
||||||
}
|
}
|
||||||
|
|
||||||
private final boolean overlapsKnownCNV(VariantContext cnv) {
|
private final boolean overlapsKnownCNV(VariantContext cnv) {
|
||||||
|
if ( knownCNVs != null ) {
|
||||||
final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true);
|
final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true);
|
||||||
IntervalTree<GenomeLoc> intervalTree = knownCNVs.get(loc.getContig());
|
IntervalTree<GenomeLoc> intervalTree = knownCNVs.get(loc.getContig());
|
||||||
|
|
||||||
|
|
@ -205,6 +207,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
||||||
if ( overlapP > MIN_CNV_OVERLAP )
|
if ( overlapP > MIN_CNV_OVERLAP )
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -224,7 +227,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
||||||
allVariantCounts.inc(type, ALL);
|
allVariantCounts.inc(type, ALL);
|
||||||
|
|
||||||
// type specific calculations
|
// type specific calculations
|
||||||
if ( type == Type.SNP ) {
|
if ( type == Type.SNP && eval.isBiallelic() ) {
|
||||||
titvTable = VariantContextUtils.isTransition(eval) ? transitionsPerSample : transversionsPerSample;
|
titvTable = VariantContextUtils.isTransition(eval) ? transitionsPerSample : transversionsPerSample;
|
||||||
titvTable.inc(type, ALL);
|
titvTable.inc(type, ALL);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -145,7 +145,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
|
||||||
}
|
}
|
||||||
|
|
||||||
return new GenomeLoc(getContig(), this.contigIndex,
|
return new GenomeLoc(getContig(), this.contigIndex,
|
||||||
Math.min(getStart(), that.getStart()),
|
Math.min( getStart(), that.getStart() ),
|
||||||
Math.max( getStop(), that.getStop()) );
|
Math.max( getStop(), that.getStop()) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -465,4 +465,8 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
|
||||||
private final static double overlapPercent(final GenomeLoc gl1, final GenomeLoc gl2) {
|
private final static double overlapPercent(final GenomeLoc gl1, final GenomeLoc gl2) {
|
||||||
return (1.0 * gl1.intersect(gl2).size()) / gl1.size();
|
return (1.0 * gl1.intersect(gl2).size()) / gl1.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long sizeOfOverlap( final GenomeLoc that ) {
|
||||||
|
return ( this.overlapsP(that) ? Math.min( getStop(), that.getStop() ) - Math.max( getStart(), that.getStart() ) : 0L );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,19 @@
|
||||||
|
package org.broadinstitute.sting.utils.activeregion;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: rpoplin
|
||||||
|
* Date: 1/4/12
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class ActiveRead {
|
||||||
|
final public GATKSAMRecord read;
|
||||||
|
final public boolean isPrimaryRegion;
|
||||||
|
|
||||||
|
ActiveRead( final GATKSAMRecord read, final boolean isPrimaryRegion ) {
|
||||||
|
this.read = read;
|
||||||
|
this.isPrimaryRegion = isPrimaryRegion;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,55 @@
|
||||||
|
package org.broadinstitute.sting.utils.activeregion;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: rpoplin
|
||||||
|
* Date: 1/4/12
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class ActiveRegion implements HasGenomeLocation {
|
||||||
|
|
||||||
|
private final ArrayList<ActiveRead> reads = new ArrayList<ActiveRead>();
|
||||||
|
private byte[] reference = null;
|
||||||
|
private final GenomeLoc loc;
|
||||||
|
private GenomeLoc referenceLoc = null;
|
||||||
|
private final GenomeLocParser genomeLocParser;
|
||||||
|
public final boolean isActive;
|
||||||
|
|
||||||
|
public ActiveRegion( final GenomeLoc loc, final boolean isActive, final GenomeLocParser genomeLocParser ) {
|
||||||
|
this.loc = loc;
|
||||||
|
this.isActive = isActive;
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
|
referenceLoc = loc;
|
||||||
|
}
|
||||||
|
|
||||||
|
// add each read to the bin and extend the reference genome loc if needed
|
||||||
|
public void add( final GATKSAMRecord read, final boolean isPrimaryRegion ) {
|
||||||
|
referenceLoc = referenceLoc.union( genomeLocParser.createGenomeLoc( read ) );
|
||||||
|
reads.add( new ActiveRead(read, isPrimaryRegion) );
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArrayList<ActiveRead> getReads() { return reads; }
|
||||||
|
|
||||||
|
public byte[] getReference( final IndexedFastaSequenceFile referenceReader ) {
|
||||||
|
// set up the reference if we haven't done so yet
|
||||||
|
if ( reference == null ) {
|
||||||
|
reference = referenceReader.getSubsequenceAt(referenceLoc.getContig(), referenceLoc.getStart(), referenceLoc.getStop()).getBases();
|
||||||
|
}
|
||||||
|
|
||||||
|
return reference;
|
||||||
|
}
|
||||||
|
|
||||||
|
public GenomeLoc getLocation() { return loc; }
|
||||||
|
|
||||||
|
public GenomeLoc getReferenceLocation() { return referenceLoc; }
|
||||||
|
|
||||||
|
public int size() { return reads.size(); }
|
||||||
|
}
|
||||||
|
|
@ -70,27 +70,27 @@ public class ClippingOp {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SOFTCLIP_BASES:
|
case SOFTCLIP_BASES:
|
||||||
if ( read.getReadUnmappedFlag() ) {
|
if (read.getReadUnmappedFlag()) {
|
||||||
// we can't process unmapped reads
|
// we can't process unmapped reads
|
||||||
throw new UserException("Read Clipper cannot soft clip unmapped reads");
|
throw new UserException("Read Clipper cannot soft clip unmapped reads");
|
||||||
}
|
}
|
||||||
|
|
||||||
//System.out.printf("%d %d %d%n", stop, start, read.getReadLength());
|
//System.out.printf("%d %d %d%n", stop, start, read.getReadLength());
|
||||||
int myStop = stop;
|
int myStop = stop;
|
||||||
if ( (stop + 1 - start) == read.getReadLength() ) {
|
if ((stop + 1 - start) == read.getReadLength()) {
|
||||||
// BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it alone
|
// BAM representation issue -- we can't SOFTCLIP away all bases in a read, just leave it alone
|
||||||
//Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be represented with SOFTCLIP_BASES, just leaving it alone", read.getReadName()));
|
//Walker.logger.info(String.format("Warning, read %s has all bases clip but this can't be represented with SOFTCLIP_BASES, just leaving it alone", read.getReadName()));
|
||||||
//break;
|
//break;
|
||||||
myStop--; // just decrement stop
|
myStop--; // just decrement stop
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( start > 0 && myStop != read.getReadLength() - 1 )
|
if (start > 0 && myStop != read.getReadLength() - 1)
|
||||||
throw new RuntimeException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d", read.getReadName(), start, myStop));
|
throw new RuntimeException(String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d", read.getReadName(), start, myStop));
|
||||||
|
|
||||||
Cigar oldCigar = read.getCigar();
|
Cigar oldCigar = read.getCigar();
|
||||||
|
|
||||||
int scLeft = 0, scRight = read.getReadLength();
|
int scLeft = 0, scRight = read.getReadLength();
|
||||||
if ( start == 0 )
|
if (start == 0)
|
||||||
scLeft = myStop + 1;
|
scLeft = myStop + 1;
|
||||||
else
|
else
|
||||||
scRight = start;
|
scRight = start;
|
||||||
|
|
@ -134,8 +134,7 @@ public class ClippingOp {
|
||||||
unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
|
unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH));
|
||||||
matchesCount = 0;
|
matchesCount = 0;
|
||||||
unclippedCigar.add(element);
|
unclippedCigar.add(element);
|
||||||
}
|
} else
|
||||||
else
|
|
||||||
unclippedCigar.add(element);
|
unclippedCigar.add(element);
|
||||||
}
|
}
|
||||||
if (matchesCount > 0)
|
if (matchesCount > 0)
|
||||||
|
|
@ -284,10 +283,9 @@ public class ClippingOp {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"})
|
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"})
|
||||||
private GATKSAMRecord hardClip (GATKSAMRecord read, int start, int stop) {
|
private GATKSAMRecord hardClip(GATKSAMRecord read, int start, int stop) {
|
||||||
if (start == 0 && stop == read.getReadLength() - 1)
|
if (start == 0 && stop == read.getReadLength() - 1)
|
||||||
return GATKSAMRecord.emptyRead(read);
|
return GATKSAMRecord.emptyRead(read);
|
||||||
// return new GATKSAMRecord(read.getHeader());
|
|
||||||
|
|
||||||
|
|
||||||
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
|
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
|
||||||
|
|
@ -296,8 +294,8 @@ public class ClippingOp {
|
||||||
// the cigar may force a shift left or right (or both) in case we are left with insertions
|
// the cigar may force a shift left or right (or both) in case we are left with insertions
|
||||||
// starting or ending the read after applying the hard clip on start/stop.
|
// starting or ending the read after applying the hard clip on start/stop.
|
||||||
int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd;
|
int newLength = read.getReadLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd;
|
||||||
byte [] newBases = new byte[newLength];
|
byte[] newBases = new byte[newLength];
|
||||||
byte [] newQuals = new byte[newLength];
|
byte[] newQuals = new byte[newLength];
|
||||||
int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;
|
int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;
|
||||||
|
|
||||||
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
|
System.arraycopy(read.getReadBases(), copyStart, newBases, 0, newLength);
|
||||||
|
|
@ -321,11 +319,11 @@ public class ClippingOp {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires({"!cigar.isEmpty()"})
|
@Requires({"!cigar.isEmpty()"})
|
||||||
private CigarShift hardClipCigar (Cigar cigar, int start, int stop) {
|
private CigarShift hardClipCigar(Cigar cigar, int start, int stop) {
|
||||||
Cigar newCigar = new Cigar();
|
Cigar newCigar = new Cigar();
|
||||||
int index = 0;
|
int index = 0;
|
||||||
int totalHardClipCount = stop - start + 1;
|
int totalHardClipCount = stop - start + 1;
|
||||||
int alignmentShift = 0; // caused by hard clipping insertions or deletions
|
int alignmentShift = 0; // caused by hard clipping deletions
|
||||||
|
|
||||||
// hard clip the beginning of the cigar string
|
// hard clip the beginning of the cigar string
|
||||||
if (start == 0) {
|
if (start == 0) {
|
||||||
|
|
@ -353,7 +351,7 @@ public class ClippingOp {
|
||||||
// element goes beyond what we need to clip
|
// element goes beyond what we need to clip
|
||||||
else if (index + shift > stop + 1) {
|
else if (index + shift > stop + 1) {
|
||||||
int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1);
|
int elementLengthAfterChopping = cigarElement.getLength() - (stop - index + 1);
|
||||||
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop-index+1);
|
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, stop - index + 1);
|
||||||
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
|
newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
|
||||||
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
|
newCigar.add(new CigarElement(elementLengthAfterChopping, cigarElement.getOperator()));
|
||||||
}
|
}
|
||||||
|
|
@ -408,7 +406,7 @@ public class ClippingOp {
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if we are hard clipping indels
|
// check if we are hard clipping indels
|
||||||
while(cigarElementIterator.hasNext()) {
|
while (cigarElementIterator.hasNext()) {
|
||||||
cigarElement = cigarElementIterator.next();
|
cigarElement = cigarElementIterator.next();
|
||||||
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
|
alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
|
||||||
|
|
||||||
|
|
@ -444,34 +442,30 @@ public class ClippingOp {
|
||||||
boolean readHasStarted = false;
|
boolean readHasStarted = false;
|
||||||
boolean addedHardClips = false;
|
boolean addedHardClips = false;
|
||||||
|
|
||||||
while(!cigarStack.empty()) {
|
while (!cigarStack.empty()) {
|
||||||
CigarElement cigarElement = cigarStack.pop();
|
CigarElement cigarElement = cigarStack.pop();
|
||||||
|
|
||||||
if ( !readHasStarted &&
|
if (!readHasStarted &&
|
||||||
cigarElement.getOperator() != CigarOperator.INSERTION &&
|
// cigarElement.getOperator() != CigarOperator.INSERTION &&
|
||||||
cigarElement.getOperator() != CigarOperator.DELETION &&
|
cigarElement.getOperator() != CigarOperator.DELETION &&
|
||||||
cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||||
readHasStarted = true;
|
readHasStarted = true;
|
||||||
|
|
||||||
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||||
totalHardClip += cigarElement.getLength();
|
totalHardClip += cigarElement.getLength();
|
||||||
|
|
||||||
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.INSERTION)
|
else if (!readHasStarted && cigarElement.getOperator() == CigarOperator.DELETION)
|
||||||
shift += cigarElement.getLength();
|
|
||||||
|
|
||||||
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.DELETION)
|
|
||||||
totalHardClip += cigarElement.getLength();
|
totalHardClip += cigarElement.getLength();
|
||||||
|
|
||||||
if (readHasStarted) {
|
if (readHasStarted) {
|
||||||
if (i==1) {
|
if (i == 1) {
|
||||||
if (!addedHardClips) {
|
if (!addedHardClips) {
|
||||||
if (totalHardClip > 0)
|
if (totalHardClip > 0)
|
||||||
inverseCigarStack.push(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
|
inverseCigarStack.push(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
|
||||||
addedHardClips = true;
|
addedHardClips = true;
|
||||||
}
|
}
|
||||||
inverseCigarStack.push(cigarElement);
|
inverseCigarStack.push(cigarElement);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
if (!addedHardClips) {
|
if (!addedHardClips) {
|
||||||
if (totalHardClip > 0)
|
if (totalHardClip > 0)
|
||||||
cleanCigar.add(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
|
cleanCigar.add(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
|
||||||
|
|
@ -509,7 +503,7 @@ public class ClippingOp {
|
||||||
}
|
}
|
||||||
|
|
||||||
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
|
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
|
||||||
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP )
|
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP)
|
||||||
oldShift += cigarElement.getLength();
|
oldShift += cigarElement.getLength();
|
||||||
else if (readHasStarted)
|
else if (readHasStarted)
|
||||||
break;
|
break;
|
||||||
|
|
|
||||||
|
|
@ -374,24 +374,43 @@ public class ReadClipper {
|
||||||
* Generic functionality to hard clip a read, used internally by hardClipByReferenceCoordinatesLeftTail
|
* Generic functionality to hard clip a read, used internally by hardClipByReferenceCoordinatesLeftTail
|
||||||
* and hardClipByReferenceCoordinatesRightTail. Should not be used directly.
|
* and hardClipByReferenceCoordinatesRightTail. Should not be used directly.
|
||||||
*
|
*
|
||||||
|
* Note, it REQUIRES you to give the directionality of your hard clip (i.e. whether you're clipping the
|
||||||
|
* left of right tail) by specifying either refStart < 0 or refStop < 0.
|
||||||
|
*
|
||||||
* @param refStart first base to clip (inclusive)
|
* @param refStart first base to clip (inclusive)
|
||||||
* @param refStop last base to clip (inclusive)
|
* @param refStop last base to clip (inclusive)
|
||||||
* @return a new read, without the clipped bases
|
* @return a new read, without the clipped bases
|
||||||
*/
|
*/
|
||||||
@Requires("!read.getReadUnmappedFlag()") // can't handle unmapped reads, as we're using reference coordinates to clip
|
@Requires({"!read.getReadUnmappedFlag()", "refStart < 0 || refStop < 0"}) // can't handle unmapped reads, as we're using reference coordinates to clip
|
||||||
protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||||
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
|
if (read.isEmpty())
|
||||||
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
|
return read;
|
||||||
|
|
||||||
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
|
int start;
|
||||||
return GATKSAMRecord.emptyRead(read);
|
int stop;
|
||||||
// return new GATKSAMRecord(read.getHeader());
|
|
||||||
|
// Determine the read coordinate to start and stop hard clipping
|
||||||
|
if (refStart < 0) {
|
||||||
|
if (refStop < 0)
|
||||||
|
throw new ReviewedStingException("Only one of refStart or refStop must be < 0, not both (" + refStart + ", " + refStop + ")");
|
||||||
|
start = 0;
|
||||||
|
stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (refStop >= 0)
|
||||||
|
throw new ReviewedStingException("Either refStart or refStop must be < 0 (" + refStart + ", " + refStop + ")");
|
||||||
|
start = ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
|
||||||
|
stop = read.getReadLength() - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if ((start == 0 && stop == read.getReadLength() - 1))
|
||||||
|
// return GATKSAMRecord.emptyRead(read);
|
||||||
|
|
||||||
if (start < 0 || stop > read.getReadLength() - 1)
|
if (start < 0 || stop > read.getReadLength() - 1)
|
||||||
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
||||||
|
|
||||||
if ( start > stop )
|
if ( start > stop )
|
||||||
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
|
throw new ReviewedStingException(String.format("START (%d) > (%d) STOP -- this should never happen -- call Mauricio!", start, stop));
|
||||||
|
|
||||||
if ( start > 0 && stop < read.getReadLength() - 1)
|
if ( start > 0 && stop < read.getReadLength() - 1)
|
||||||
throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString()));
|
throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString()));
|
||||||
|
|
|
||||||
|
|
@ -526,6 +526,42 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the pileup for a set of read groups. Horrendously inefficient at this point.
|
||||||
|
* @param rgSet List of identifiers for the read groups.
|
||||||
|
* @return A read-backed pileup containing only the reads in the given read groups.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public RBP getPileupForReadGroups(final HashSet<String> rgSet) {
|
||||||
|
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||||
|
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||||
|
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||||
|
|
||||||
|
for(final String sample: tracker.getSamples()) {
|
||||||
|
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||||
|
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupForReadGroups(rgSet);
|
||||||
|
if(pileup != null)
|
||||||
|
filteredTracker.addElements(sample,pileup.pileupElementTracker);
|
||||||
|
}
|
||||||
|
return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||||
|
for(PE p: pileupElementTracker) {
|
||||||
|
GATKSAMRecord read = p.getRead();
|
||||||
|
if(rgSet != null && !rgSet.isEmpty()) {
|
||||||
|
if(read.getReadGroup() != null && rgSet.contains(read.getReadGroup().getReadGroupId()))
|
||||||
|
filteredTracker.add(p);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if(read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null)
|
||||||
|
filteredTracker.add(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public RBP getPileupForLane(String laneID) {
|
public RBP getPileupForLane(String laneID) {
|
||||||
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.fragments.FragmentCollection;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -129,6 +130,13 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
|
||||||
*/
|
*/
|
||||||
public ReadBackedPileup getPileupForReadGroup(String readGroupId);
|
public ReadBackedPileup getPileupForReadGroup(String readGroupId);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets all the reads associated with a given read groups.
|
||||||
|
* @param rgSet Set of identifiers for the read group.
|
||||||
|
* @return A pileup containing only the reads in the given read groups.
|
||||||
|
*/
|
||||||
|
public ReadBackedPileup getPileupForReadGroups(final HashSet<String> rgSet);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets all reads in a given lane id. (Lane ID is the read group
|
* Gets all reads in a given lane id. (Lane ID is the read group
|
||||||
* id stripped of the last .XX sample identifier added by the GATK).
|
* id stripped of the last .XX sample identifier added by the GATK).
|
||||||
|
|
|
||||||
|
|
@ -238,7 +238,7 @@ public class ArtificialSAMUtils {
|
||||||
*/
|
*/
|
||||||
public static GATKSAMRecord createArtificialRead( byte[] bases, byte[] qual, String cigar ) {
|
public static GATKSAMRecord createArtificialRead( byte[] bases, byte[] qual, String cigar ) {
|
||||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
|
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
|
||||||
return ArtificialSAMUtils.createArtificialRead(header, "default_read", 0, 1, bases, qual, cigar);
|
return ArtificialSAMUtils.createArtificialRead(header, "default_read", 0, 10000, bases, qual, cigar);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,6 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.sam;
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||||
|
|
||||||
|
|
@ -184,6 +183,12 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
return getReducedReadCounts() != null;
|
return getReducedReadCounts() != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The number of bases corresponding the i'th base of the reduced read.
|
||||||
|
*
|
||||||
|
* @param i the read based coordinate inside the read
|
||||||
|
* @return the number of bases corresponding to the i'th base of the reduced read
|
||||||
|
*/
|
||||||
public final byte getReducedCount(final int i) {
|
public final byte getReducedCount(final int i) {
|
||||||
byte firstCount = getReducedReadCounts()[0];
|
byte firstCount = getReducedReadCounts()[0];
|
||||||
byte offsetCount = getReducedReadCounts()[i];
|
byte offsetCount = getReducedReadCounts()[i];
|
||||||
|
|
@ -277,7 +282,6 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
*
|
*
|
||||||
* @return the unclipped start of the read taking soft clips (but not hard clips) into account
|
* @return the unclipped start of the read taking soft clips (but not hard clips) into account
|
||||||
*/
|
*/
|
||||||
@Ensures({"result >= getUnclippedStart()", "result <= getUnclippedEnd() || ReadUtils.readIsEntirelyInsertion(this)"})
|
|
||||||
public int getSoftStart() {
|
public int getSoftStart() {
|
||||||
int start = this.getUnclippedStart();
|
int start = this.getUnclippedStart();
|
||||||
for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
|
for (CigarElement cigarElement : this.getCigar().getCigarElements()) {
|
||||||
|
|
@ -286,17 +290,17 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculates the reference coordinate for the end of the read taking into account soft clips but not hard clips.
|
* Calculates the reference coordinate for the end of the read taking into account soft clips but not hard clips.
|
||||||
*
|
*
|
||||||
* Note: getUnclippedStart() adds soft and hard clips, this function only adds soft clips.
|
* Note: getUnclippedEnd() adds soft and hard clips, this function only adds soft clips.
|
||||||
*
|
*
|
||||||
* @return the unclipped end of the read taking soft clips (but not hard clips) into account
|
* @return the unclipped end of the read taking soft clips (but not hard clips) into account
|
||||||
*/
|
*/
|
||||||
@Ensures({"result >= getUnclippedStart()", "result <= getUnclippedEnd() || ReadUtils.readIsEntirelyInsertion(this)"})
|
|
||||||
public int getSoftEnd() {
|
public int getSoftEnd() {
|
||||||
int stop = this.getUnclippedStart();
|
int stop = this.getUnclippedStart();
|
||||||
|
|
||||||
|
|
@ -313,6 +317,7 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
else
|
else
|
||||||
shift = 0;
|
shift = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@ import com.google.java.contract.Ensures;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
|
@ -58,7 +59,7 @@ public class ReadUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A HashMap of the SAM spec read flag names
|
* A HashMap of the SAM spec read flag names
|
||||||
* <p/>
|
*
|
||||||
* Note: This is not being used right now, but can be useful in the future
|
* Note: This is not being used right now, but can be useful in the future
|
||||||
*/
|
*/
|
||||||
private static final Map<Integer, String> readFlagNames = new HashMap<Integer, String>();
|
private static final Map<Integer, String> readFlagNames = new HashMap<Integer, String>();
|
||||||
|
|
@ -79,49 +80,47 @@ public class ReadUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This enum represents all the different ways in which a read can overlap an interval.
|
* This enum represents all the different ways in which a read can overlap an interval.
|
||||||
* <p/>
|
*
|
||||||
* NO_OVERLAP_CONTIG:
|
* NO_OVERLAP_CONTIG:
|
||||||
* read and interval are in different contigs.
|
* read and interval are in different contigs.
|
||||||
* <p/>
|
*
|
||||||
* NO_OVERLAP_LEFT:
|
* NO_OVERLAP_LEFT:
|
||||||
* the read does not overlap the interval.
|
* the read does not overlap the interval.
|
||||||
* <p/>
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <----------------> (read)
|
* <----------------> (read)
|
||||||
* <p/>
|
*
|
||||||
* NO_OVERLAP_RIGHT:
|
* NO_OVERLAP_RIGHT:
|
||||||
* the read does not overlap the interval.
|
* the read does not overlap the interval.
|
||||||
* <p/>
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <----------------> (read)
|
* <----------------> (read)
|
||||||
* <p/>
|
*
|
||||||
* OVERLAP_LEFT:
|
* OVERLAP_LEFT:
|
||||||
* the read starts before the beginning of the interval but ends inside of it
|
* the read starts before the beginning of the interval but ends inside of it
|
||||||
* <p/>
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <----------------> (read)
|
* <----------------> (read)
|
||||||
* <p/>
|
*
|
||||||
* OVERLAP_RIGHT:
|
* OVERLAP_RIGHT:
|
||||||
* the read starts inside the interval but ends outside of it
|
* the read starts inside the interval but ends outside of it
|
||||||
* <p/>
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <----------------> (read)
|
* <----------------> (read)
|
||||||
* <p/>
|
*
|
||||||
* OVERLAP_LEFT_AND_RIGHT:
|
* OVERLAP_LEFT_AND_RIGHT:
|
||||||
* the read starts before the interval and ends after the interval
|
* the read starts before the interval and ends after the interval
|
||||||
* <p/>
|
*
|
||||||
* |-----------| (interval)
|
* |-----------| (interval)
|
||||||
* <-------------------> (read)
|
* <-------------------> (read)
|
||||||
* <p/>
|
*
|
||||||
* OVERLAP_CONTAINED:
|
* OVERLAP_CONTAINED:
|
||||||
* the read starts and ends inside the interval
|
* the read starts and ends inside the interval
|
||||||
* <p/>
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <--------> (read)
|
* <--------> (read)
|
||||||
*/
|
*/
|
||||||
public enum ReadAndIntervalOverlap {
|
public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED}
|
||||||
NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a SAMFileWriter with the given compression level if you request a bam file. Creates a regular
|
* Creates a SAMFileWriter with the given compression level if you request a bam file. Creates a regular
|
||||||
|
|
@ -141,12 +140,12 @@ public class ReadUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* is this base inside the adaptor of the read?
|
* is this base inside the adaptor of the read?
|
||||||
* <p/>
|
*
|
||||||
* There are two cases to treat here:
|
* There are two cases to treat here:
|
||||||
* <p/>
|
*
|
||||||
* 1) Read is in the negative strand => Adaptor boundary is on the left tail
|
* 1) Read is in the negative strand => Adaptor boundary is on the left tail
|
||||||
* 2) Read is in the positive strand => Adaptor boundary is on the right tail
|
* 2) Read is in the positive strand => Adaptor boundary is on the right tail
|
||||||
* <p/>
|
*
|
||||||
* Note: We return false to all reads that are UNMAPPED or have an weird big insert size (probably due to mismapping or bigger event)
|
* Note: We return false to all reads that are UNMAPPED or have an weird big insert size (probably due to mismapping or bigger event)
|
||||||
*
|
*
|
||||||
* @param read the read to test
|
* @param read the read to test
|
||||||
|
|
@ -166,21 +165,21 @@ public class ReadUtils {
|
||||||
* the read boundary. If the read is in the positive strand, this is the first base after the end of the
|
* the read boundary. If the read is in the positive strand, this is the first base after the end of the
|
||||||
* fragment (Picard calls it 'insert'), if the read is in the negative strand, this is the first base before the
|
* fragment (Picard calls it 'insert'), if the read is in the negative strand, this is the first base before the
|
||||||
* beginning of the fragment.
|
* beginning of the fragment.
|
||||||
* <p/>
|
*
|
||||||
* There are two cases we need to treat here:
|
* There are two cases we need to treat here:
|
||||||
* <p/>
|
*
|
||||||
* 1) Our read is in the reverse strand :
|
* 1) Our read is in the reverse strand :
|
||||||
* <p/>
|
*
|
||||||
* <----------------------| *
|
* <----------------------| *
|
||||||
* |--------------------->
|
* |--------------------->
|
||||||
* <p/>
|
*
|
||||||
* in these cases, the adaptor boundary is at the mate start (minus one)
|
* in these cases, the adaptor boundary is at the mate start (minus one)
|
||||||
* <p/>
|
*
|
||||||
* 2) Our read is in the forward strand :
|
* 2) Our read is in the forward strand :
|
||||||
* <p/>
|
*
|
||||||
* |----------------------> *
|
* |----------------------> *
|
||||||
* <----------------------|
|
* <----------------------|
|
||||||
* <p/>
|
*
|
||||||
* in these cases the adaptor boundary is at the start of the read plus the inferred insert size (plus one)
|
* in these cases the adaptor boundary is at the start of the read plus the inferred insert size (plus one)
|
||||||
*
|
*
|
||||||
* @param read the read being tested for the adaptor boundary
|
* @param read the read being tested for the adaptor boundary
|
||||||
|
|
@ -264,7 +263,7 @@ public class ReadUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If a read starts in INSERTION, returns the first element length.
|
* If a read starts in INSERTION, returns the first element length.
|
||||||
* <p/>
|
*
|
||||||
* Warning: If the read has Hard or Soft clips before the insertion this function will return 0.
|
* Warning: If the read has Hard or Soft clips before the insertion this function will return 0.
|
||||||
*
|
*
|
||||||
* @param read
|
* @param read
|
||||||
|
|
@ -272,7 +271,7 @@ public class ReadUtils {
|
||||||
*/
|
*/
|
||||||
public final static int getFirstInsertionOffset(SAMRecord read) {
|
public final static int getFirstInsertionOffset(SAMRecord read) {
|
||||||
CigarElement e = read.getCigar().getCigarElement(0);
|
CigarElement e = read.getCigar().getCigarElement(0);
|
||||||
if (e.getOperator() == CigarOperator.I)
|
if ( e.getOperator() == CigarOperator.I )
|
||||||
return e.getLength();
|
return e.getLength();
|
||||||
else
|
else
|
||||||
return 0;
|
return 0;
|
||||||
|
|
@ -280,7 +279,7 @@ public class ReadUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If a read ends in INSERTION, returns the last element length.
|
* If a read ends in INSERTION, returns the last element length.
|
||||||
* <p/>
|
*
|
||||||
* Warning: If the read has Hard or Soft clips after the insertion this function will return 0.
|
* Warning: If the read has Hard or Soft clips after the insertion this function will return 0.
|
||||||
*
|
*
|
||||||
* @param read
|
* @param read
|
||||||
|
|
@ -288,7 +287,7 @@ public class ReadUtils {
|
||||||
*/
|
*/
|
||||||
public final static int getLastInsertionOffset(SAMRecord read) {
|
public final static int getLastInsertionOffset(SAMRecord read) {
|
||||||
CigarElement e = read.getCigar().getCigarElement(read.getCigarLength() - 1);
|
CigarElement e = read.getCigar().getCigarElement(read.getCigarLength() - 1);
|
||||||
if (e.getOperator() == CigarOperator.I)
|
if ( e.getOperator() == CigarOperator.I )
|
||||||
return e.getLength();
|
return e.getLength();
|
||||||
else
|
else
|
||||||
return 0;
|
return 0;
|
||||||
|
|
@ -297,7 +296,6 @@ public class ReadUtils {
|
||||||
/**
|
/**
|
||||||
* Determines what is the position of the read in relation to the interval.
|
* Determines what is the position of the read in relation to the interval.
|
||||||
* Note: This function uses the UNCLIPPED ENDS of the reads for the comparison.
|
* Note: This function uses the UNCLIPPED ENDS of the reads for the comparison.
|
||||||
*
|
|
||||||
* @param read the read
|
* @param read the read
|
||||||
* @param interval the interval
|
* @param interval the interval
|
||||||
* @return the overlap type as described by ReadAndIntervalOverlap enum (see above)
|
* @return the overlap type as described by ReadAndIntervalOverlap enum (see above)
|
||||||
|
|
@ -309,30 +307,30 @@ public class ReadUtils {
|
||||||
int uStart = read.getUnclippedStart();
|
int uStart = read.getUnclippedStart();
|
||||||
int uStop = read.getUnclippedEnd();
|
int uStop = read.getUnclippedEnd();
|
||||||
|
|
||||||
if (!read.getReferenceName().equals(interval.getContig()))
|
if ( !read.getReferenceName().equals(interval.getContig()) )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
|
return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
|
||||||
|
|
||||||
else if (uStop < interval.getStart())
|
else if ( uStop < interval.getStart() )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_LEFT;
|
return ReadAndIntervalOverlap.NO_OVERLAP_LEFT;
|
||||||
|
|
||||||
else if (uStart > interval.getStop())
|
else if ( uStart > interval.getStop() )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_RIGHT;
|
return ReadAndIntervalOverlap.NO_OVERLAP_RIGHT;
|
||||||
|
|
||||||
else if (sStop < interval.getStart())
|
else if ( sStop < interval.getStart() )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_LEFT;
|
return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_LEFT;
|
||||||
|
|
||||||
else if (sStart > interval.getStop())
|
else if ( sStart > interval.getStop() )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_RIGHT;
|
return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_RIGHT;
|
||||||
|
|
||||||
else if ((sStart >= interval.getStart()) &&
|
else if ( (sStart >= interval.getStart()) &&
|
||||||
(sStop <= interval.getStop()))
|
(sStop <= interval.getStop()) )
|
||||||
return ReadAndIntervalOverlap.OVERLAP_CONTAINED;
|
return ReadAndIntervalOverlap.OVERLAP_CONTAINED;
|
||||||
|
|
||||||
else if ((sStart < interval.getStart()) &&
|
else if ( (sStart < interval.getStart()) &&
|
||||||
(sStop > interval.getStop()))
|
(sStop > interval.getStop()) )
|
||||||
return ReadAndIntervalOverlap.OVERLAP_LEFT_AND_RIGHT;
|
return ReadAndIntervalOverlap.OVERLAP_LEFT_AND_RIGHT;
|
||||||
|
|
||||||
else if ((sStart < interval.getStart()))
|
else if ( (sStart < interval.getStart()) )
|
||||||
return ReadAndIntervalOverlap.OVERLAP_LEFT;
|
return ReadAndIntervalOverlap.OVERLAP_LEFT;
|
||||||
|
|
||||||
else
|
else
|
||||||
|
|
@ -340,36 +338,52 @@ public class ReadUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) in case it falls in
|
* Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) to take care of
|
||||||
* a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns
|
* two corner cases:
|
||||||
* the base prior to the deletion. If clipping the right tail (end of the read) returns the base after the
|
*
|
||||||
* deletion.
|
* 1. If clipping the right tail (end of the read) getReadCoordinateForReferenceCoordinate and fall inside
|
||||||
|
* a deletion return the base after the deletion. If clipping the left tail (beginning of the read) it
|
||||||
|
* doesn't matter because it already returns the previous base by default.
|
||||||
|
*
|
||||||
|
* 2. If clipping the left tail (beginning of the read) getReadCoordinateForReferenceCoordinate and the
|
||||||
|
* read starts with an insertion, and you're requesting the first read based coordinate, it will skip
|
||||||
|
* the leading insertion (because it has the same reference coordinate as the following base).
|
||||||
*
|
*
|
||||||
* @param read
|
* @param read
|
||||||
* @param refCoord
|
* @param refCoord
|
||||||
* @param tail
|
* @param tail
|
||||||
* @return the read coordinate corresponding to the requested reference coordinate for clipping.
|
* @return the read coordinate corresponding to the requested reference coordinate for clipping.
|
||||||
*/
|
*/
|
||||||
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
|
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd() || (read.getUnclippedEnd() < read.getUnclippedStart())"})
|
||||||
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
||||||
public static int getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord, ClippingTail tail) {
|
public static int getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord, ClippingTail tail) {
|
||||||
Pair<Integer, Boolean> result = getReadCoordinateForReferenceCoordinate(read, refCoord);
|
Pair<Integer, Boolean> result = getReadCoordinateForReferenceCoordinate(read, refCoord);
|
||||||
int readCoord = result.getFirst();
|
int readCoord = result.getFirst();
|
||||||
|
|
||||||
|
// Corner case one: clipping the right tail and falls on deletion, move to the next
|
||||||
|
// read coordinate. It is not a problem for the left tail because the default answer
|
||||||
|
// from getReadCoordinateForReferenceCoordinate is to give the previous read coordinate.
|
||||||
if (result.getSecond() && tail == ClippingTail.RIGHT_TAIL)
|
if (result.getSecond() && tail == ClippingTail.RIGHT_TAIL)
|
||||||
readCoord++;
|
readCoord++;
|
||||||
|
|
||||||
|
// clipping the left tail and first base is insertion, go to the next read coordinate
|
||||||
|
// with the same reference coordinate. Advance to the next cigar element, or to the
|
||||||
|
// end of the read if there is no next element.
|
||||||
|
Pair<Boolean, CigarElement> firstElementIsInsertion = readStartsWithInsertion(read);
|
||||||
|
if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion.getFirst())
|
||||||
|
readCoord = Math.min(firstElementIsInsertion.getSecond().getLength(), read.getReadLength() - 1);
|
||||||
|
|
||||||
return readCoord;
|
return readCoord;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the read coordinate corresponding to the requested reference coordinate.
|
* Returns the read coordinate corresponding to the requested reference coordinate.
|
||||||
* <p/>
|
*
|
||||||
* WARNING: if the requested reference coordinate happens to fall inside a deletion in the read, this function
|
* WARNING: if the requested reference coordinate happens to fall inside a deletion in the read, this function
|
||||||
* will return the last read base before the deletion. This function returns a
|
* will return the last read base before the deletion. This function returns a
|
||||||
* Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use when faced with
|
* Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use when faced with
|
||||||
* a deletion.
|
* a deletion.
|
||||||
* <p/>
|
*
|
||||||
* SUGGESTION: Use getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int, ClippingTail) instead to get a
|
* SUGGESTION: Use getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int, ClippingTail) instead to get a
|
||||||
* pre-processed result according to normal clipping needs. Or you can use this function and tailor the
|
* pre-processed result according to normal clipping needs. Or you can use this function and tailor the
|
||||||
* behavior to your needs.
|
* behavior to your needs.
|
||||||
|
|
@ -457,7 +471,6 @@ public class ReadUtils {
|
||||||
if (!goalReached)
|
if (!goalReached)
|
||||||
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
||||||
|
|
||||||
|
|
||||||
return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
|
return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -465,12 +478,11 @@ public class ReadUtils {
|
||||||
* Compares two SAMRecords only the basis on alignment start. Note that
|
* Compares two SAMRecords only the basis on alignment start. Note that
|
||||||
* comparisons are performed ONLY on the basis of alignment start; any
|
* comparisons are performed ONLY on the basis of alignment start; any
|
||||||
* two SAM records with the same alignment start will be considered equal.
|
* two SAM records with the same alignment start will be considered equal.
|
||||||
* <p/>
|
*
|
||||||
* Unmapped alignments will all be considered equal.
|
* Unmapped alignments will all be considered equal.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@Requires({"read1 != null", "read2 != null"})
|
@Requires({"read1 != null", "read2 != null"})
|
||||||
@Ensures("result == 0 || result == 1 || result == -1")
|
|
||||||
public static int compareSAMRecords(GATKSAMRecord read1, GATKSAMRecord read2) {
|
public static int compareSAMRecords(GATKSAMRecord read1, GATKSAMRecord read2) {
|
||||||
AlignmentStartComparator comp = new AlignmentStartComparator();
|
AlignmentStartComparator comp = new AlignmentStartComparator();
|
||||||
return comp.compare(read1, read2);
|
return comp.compare(read1, read2);
|
||||||
|
|
@ -502,4 +514,134 @@ public class ReadUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a read starts with an insertion. It looks beyond Hard and Soft clips
|
||||||
|
* if there are any.
|
||||||
|
*
|
||||||
|
* @param read
|
||||||
|
* @return A pair with the answer (true/false) and the element or null if it doesn't exist
|
||||||
|
*/
|
||||||
|
public static Pair<Boolean, CigarElement> readStartsWithInsertion(GATKSAMRecord read) {
|
||||||
|
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||||
|
if (cigarElement.getOperator() == CigarOperator.INSERTION)
|
||||||
|
return new Pair<Boolean, CigarElement>(true, cigarElement);
|
||||||
|
|
||||||
|
else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return new Pair<Boolean, CigarElement>(false, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the coverage distribution of a list of reads within the desired region.
|
||||||
|
*
|
||||||
|
* See getCoverageDistributionOfRead for information on how the coverage is calculated.
|
||||||
|
*
|
||||||
|
* @param list the list of reads covering the region
|
||||||
|
* @param startLocation the first reference coordinate of the region (inclusive)
|
||||||
|
* @param stopLocation the last reference coordinate of the region (inclusive)
|
||||||
|
* @return an array with the coverage of each position from startLocation to stopLocation
|
||||||
|
*/
|
||||||
|
public static int [] getCoverageDistributionOfReads(List<GATKSAMRecord> list, int startLocation, int stopLocation) {
|
||||||
|
int [] totalCoverage = new int[stopLocation - startLocation + 1];
|
||||||
|
|
||||||
|
for (GATKSAMRecord read : list) {
|
||||||
|
int [] readCoverage = getCoverageDistributionOfRead(read, startLocation, stopLocation);
|
||||||
|
totalCoverage = MathUtils.addArrays(totalCoverage, readCoverage);
|
||||||
|
}
|
||||||
|
|
||||||
|
return totalCoverage;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the coverage distribution of a single read within the desired region.
|
||||||
|
*
|
||||||
|
* Note: This function counts DELETIONS as coverage (since the main purpose is to downsample
|
||||||
|
* reads for variant regions, and deletions count as variants)
|
||||||
|
*
|
||||||
|
* @param read the read to get the coverage distribution of
|
||||||
|
* @param startLocation the first reference coordinate of the region (inclusive)
|
||||||
|
* @param stopLocation the last reference coordinate of the region (inclusive)
|
||||||
|
* @return an array with the coverage of each position from startLocation to stopLocation
|
||||||
|
*/
|
||||||
|
public static int [] getCoverageDistributionOfRead(GATKSAMRecord read, int startLocation, int stopLocation) {
|
||||||
|
int [] coverage = new int[stopLocation - startLocation + 1];
|
||||||
|
int refLocation = read.getSoftStart();
|
||||||
|
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||||
|
switch (cigarElement.getOperator()) {
|
||||||
|
case S:
|
||||||
|
case M:
|
||||||
|
case EQ:
|
||||||
|
case N:
|
||||||
|
case X:
|
||||||
|
case D:
|
||||||
|
for (int i = 0; i < cigarElement.getLength(); i++) {
|
||||||
|
if (refLocation >= startLocation && refLocation <= stopLocation) {
|
||||||
|
int baseCount = read.isReducedRead() ? read.getReducedCount(refLocation - read.getSoftStart()) : 1;
|
||||||
|
coverage[refLocation - startLocation] += baseCount; // this may be a reduced read, so add the proper number of bases
|
||||||
|
}
|
||||||
|
refLocation++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case P:
|
||||||
|
case I:
|
||||||
|
case H:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (refLocation > stopLocation)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return coverage;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Makes association maps for the reads and loci coverage as described below :
|
||||||
|
*
|
||||||
|
* - First: locusToReadMap -- a HashMap that describes for each locus, which reads contribute to its coverage.
|
||||||
|
* Note: Locus is in reference coordinates.
|
||||||
|
* Example: Locus => {read1, read2, ..., readN}
|
||||||
|
*
|
||||||
|
* - Second: readToLocusMap -- a HashMap that describes for each read what loci it contributes to the coverage.
|
||||||
|
* Note: Locus is a boolean array, indexed from 0 (= startLocation) to N (= stopLocation), with true meaning it contributes to the coverage.
|
||||||
|
* Example: Read => {true, true, false, ... false}
|
||||||
|
*
|
||||||
|
* @param readList the list of reads to generate the association mappings
|
||||||
|
* @param startLocation the first reference coordinate of the region (inclusive)
|
||||||
|
* @param stopLocation the last reference coordinate of the region (inclusive)
|
||||||
|
* @return the two hashmaps described above
|
||||||
|
*/
|
||||||
|
public static Pair<HashMap<Integer, HashSet<GATKSAMRecord>> , HashMap<GATKSAMRecord, Boolean[]>> getBothReadToLociMappings (List<GATKSAMRecord> readList, int startLocation, int stopLocation) {
|
||||||
|
int arraySize = stopLocation - startLocation + 1;
|
||||||
|
|
||||||
|
HashMap<Integer, HashSet<GATKSAMRecord>> locusToReadMap = new HashMap<Integer, HashSet<GATKSAMRecord>>(2*(stopLocation - startLocation + 1), 0.5f);
|
||||||
|
HashMap<GATKSAMRecord, Boolean[]> readToLocusMap = new HashMap<GATKSAMRecord, Boolean[]>(2*readList.size(), 0.5f);
|
||||||
|
|
||||||
|
|
||||||
|
for (int i = startLocation; i <= stopLocation; i++)
|
||||||
|
locusToReadMap.put(i, new HashSet<GATKSAMRecord>()); // Initialize the locusToRead map with empty lists
|
||||||
|
|
||||||
|
for (GATKSAMRecord read : readList) {
|
||||||
|
readToLocusMap.put(read, new Boolean[arraySize]); // Initialize the readToLocus map with empty arrays
|
||||||
|
|
||||||
|
int [] readCoverage = getCoverageDistributionOfRead(read, startLocation, stopLocation);
|
||||||
|
|
||||||
|
for (int i=0; i<readCoverage.length; i++) {
|
||||||
|
int refLocation = i + startLocation;
|
||||||
|
if (readCoverage[i] > 0) {
|
||||||
|
// Update the hash for this locus
|
||||||
|
HashSet<GATKSAMRecord> readSet = locusToReadMap.get(refLocation);
|
||||||
|
readSet.add(read);
|
||||||
|
|
||||||
|
// Add this locus to the read hash
|
||||||
|
readToLocusMap.get(read)[refLocation - startLocation] = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
// Update the boolean array with a 'no coverage' from this read to this locus
|
||||||
|
readToLocusMap.get(read)[refLocation-startLocation] = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new Pair<HashMap<Integer, HashSet<GATKSAMRecord>>, HashMap<GATKSAMRecord, Boolean[]>>(locusToReadMap, readToLocusMap);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -145,19 +145,19 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test
|
||||||
public void testSnpEffAnnotations() {
|
public void testSnpEffAnnotations() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
|
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
|
||||||
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
|
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
|
||||||
"snpEff2.0.4.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429",
|
"snpEff2.0.5.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("51258f5c880bd1ca3eb45a1711335c66")
|
Arrays.asList("ffbda45b3682c9b83cb541d83f6c15d6")
|
||||||
);
|
);
|
||||||
executeTest("Testing SnpEff annotations", spec);
|
executeTest("Testing SnpEff annotations", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test
|
||||||
public void testSnpEffAnnotationsUnsupportedVersion() {
|
public void testSnpEffAnnotationsUnsupportedVersion() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
|
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
|
||||||
|
|
|
||||||
|
|
@ -129,8 +129,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testOutputParameter() {
|
public void testOutputParameter() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" );
|
e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" );
|
||||||
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "42e4ea7878ef8d96215accb3ba4e97b7" );
|
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "553f6b4cbf380885bec9dd634cf68742" );
|
||||||
e.put( "--output_mode EMIT_ALL_SITES", "e0443c720149647469f2a2f3fb73942f" );
|
e.put( "--output_mode EMIT_ALL_SITES", "6d8624e45ad9dae5803ac705b39e4ffa" );
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
|
|
@ -265,7 +265,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||||
Arrays.asList("fa4f3ee67d98b64102a8a3ec81a3bc81"));
|
Arrays.asList("c60a44ba94a80a0cb1fba8b6f90a13cd"));
|
||||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
|
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -275,7 +275,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||||
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||||
Arrays.asList("df90890e43d735573a3b3e4f289ca46b"));
|
Arrays.asList("36ce53ae4319718ad9c8ae391deebc8c"));
|
||||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
|
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -285,7 +285,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
|
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
|
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
|
||||||
Arrays.asList("cff6dd0f4eb1ef0b6fc476da6ffead19"));
|
Arrays.asList("d356cbaf240d7025d1aecdabaff3a3e0"));
|
||||||
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
|
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -294,11 +294,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
|
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||||
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
|
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
|
||||||
Arrays.asList("1e2a4aab26e9ab0dae709d33a669e036"));
|
Arrays.asList("947c12ef2a8c29ae787cd11be8c565c8"));
|
||||||
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
|
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test
|
||||||
public void testSnpEffAnnotationRequestedWithoutRodBinding() {
|
public void testSnpEffAnnotationRequestedWithoutRodBinding() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000 " +
|
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000 " +
|
||||||
|
|
|
||||||
|
|
@ -118,6 +118,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
Arrays.asList(md5));
|
Arrays.asList(md5));
|
||||||
executeTest("testTableRecalibrator1", spec);
|
executeTest("testTableRecalibrator1", spec);
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
throw new IllegalStateException("testTableRecalibrator1: paramsFile was null");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -144,7 +147,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test(dependsOnMethods = "testCountCovariates1")
|
||||||
public void testTableRecalibratorMaxQ70() {
|
public void testTableRecalibratorMaxQ70() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0b7123ae9f4155484b68e4a4f96c5504" );
|
e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0b7123ae9f4155484b68e4a4f96c5504" );
|
||||||
|
|
@ -170,6 +173,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
Arrays.asList(md5));
|
Arrays.asList(md5));
|
||||||
executeTest("testTableRecalibratorMaxQ70", spec);
|
executeTest("testTableRecalibratorMaxQ70", spec);
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
throw new IllegalStateException("testTableRecalibratorMaxQ70: paramsFile was null");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -199,7 +205,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test(dependsOnMethods = "testCountCovariatesSolidIndelsRemoveRefBias")
|
||||||
public void testTableRecalibratorSolidIndelsRemoveRefBias() {
|
public void testTableRecalibratorSolidIndelsRemoveRefBias() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "2ad4c17ac3ed380071137e4e53a398a5" );
|
e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "2ad4c17ac3ed380071137e4e53a398a5" );
|
||||||
|
|
@ -224,6 +230,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
Arrays.asList(md5));
|
Arrays.asList(md5));
|
||||||
executeTest("testTableRecalibratorSolidIndelsRemoveRefBias", spec);
|
executeTest("testTableRecalibratorSolidIndelsRemoveRefBias", spec);
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
throw new IllegalStateException("testTableRecalibratorSolidIndelsRemoveRefBias: paramsFile was null");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -305,7 +314,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test(dependsOnMethods = "testCountCovariatesNoIndex")
|
||||||
public void testTableRecalibratorNoIndex() {
|
public void testTableRecalibratorNoIndex() {
|
||||||
HashMap<String, String> e = new HashMap<String, String>();
|
HashMap<String, String> e = new HashMap<String, String>();
|
||||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "991f093a0e610df235d28ada418ebf33" );
|
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "991f093a0e610df235d28ada418ebf33" );
|
||||||
|
|
@ -329,6 +338,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
Arrays.asList(md5));
|
Arrays.asList(md5));
|
||||||
executeTest("testTableRecalibratorNoIndex", spec);
|
executeTest("testTableRecalibratorNoIndex", spec);
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
throw new IllegalStateException("testTableRecalibratorNoIndex: paramsFile was null");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,19 +14,19 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
private static String cmdRoot = "-T VariantEval" +
|
private static String cmdRoot = "-T VariantEval" +
|
||||||
" -R " + b36KGReference;
|
" -R " + b36KGReference;
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test
|
||||||
public void testFunctionClassWithSnpeff() {
|
public void testFunctionClassWithSnpeff() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
buildCommandLine(
|
buildCommandLine(
|
||||||
"-T VariantEval",
|
"-T VariantEval",
|
||||||
"-R " + b37KGReference,
|
"-R " + b37KGReference,
|
||||||
"--dbsnp " + b37dbSNP132,
|
"--dbsnp " + b37dbSNP132,
|
||||||
"--eval " + validationDataLocation + "snpEff2.0.4.AFR.unfiltered.VariantAnnotator.output.vcf",
|
"--eval " + validationDataLocation + "snpEff2.0.5.AFR.unfiltered.VariantAnnotator.output.vcf",
|
||||||
"-noEV",
|
"-noEV",
|
||||||
"-EV TiTvVariantEvaluator",
|
"-EV TiTvVariantEvaluator",
|
||||||
"-noST",
|
"-noST",
|
||||||
"-ST FunctionalClass",
|
"-ST FunctionalClass",
|
||||||
"-L " + validationDataLocation + "snpEff2.0.4.AFR.unfiltered.VariantAnnotator.output.vcf",
|
"-L " + validationDataLocation + "snpEff2.0.5.AFR.unfiltered.VariantAnnotator.output.vcf",
|
||||||
"-o %s"
|
"-o %s"
|
||||||
),
|
),
|
||||||
1,
|
1,
|
||||||
|
|
@ -450,4 +450,21 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
);
|
);
|
||||||
executeTest("testIntervalStrat", spec);
|
executeTest("testIntervalStrat", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testModernVCFWithLargeIndels() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
buildCommandLine(
|
||||||
|
"-T VariantEval",
|
||||||
|
"-R " + b37KGReference,
|
||||||
|
"-eval " + validationDataLocation + "/NA12878.HiSeq.WGS.b37_decoy.indel.recalibrated.vcf",
|
||||||
|
"-L 20",
|
||||||
|
"-D " + b37dbSNP132,
|
||||||
|
"-o %s"
|
||||||
|
),
|
||||||
|
1,
|
||||||
|
Arrays.asList("a6f8b32fa732632da13dfe3ddcc73cef")
|
||||||
|
);
|
||||||
|
executeTest("testModernVCFWithLargeIndels", spec);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,22 +26,20 @@
|
||||||
package org.broadinstitute.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.BeforeClass;
|
import org.testng.annotations.BeforeClass;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Basic unit test for MathUtils
|
* Basic unit test for MathUtils
|
||||||
*/
|
*/
|
||||||
public class MathUtilsUnitTest extends BaseTest {
|
public class MathUtilsUnitTest extends BaseTest {
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public void init() { }
|
public void init() {
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests that we get the right values from the binomial distribution
|
* Tests that we get the right values from the binomial distribution
|
||||||
|
|
@ -66,20 +64,20 @@ public class MathUtilsUnitTest extends BaseTest {
|
||||||
public void testMultinomialProbability() {
|
public void testMultinomialProbability() {
|
||||||
logger.warn("Executing testMultinomialProbability");
|
logger.warn("Executing testMultinomialProbability");
|
||||||
|
|
||||||
int[] counts0 = { 2, 0, 1 };
|
int[] counts0 = {2, 0, 1};
|
||||||
double[] probs0 = { 0.33, 0.33, 0.34 };
|
double[] probs0 = {0.33, 0.33, 0.34};
|
||||||
Assert.assertEquals(MathUtils.multinomialProbability(counts0, probs0), 0.111078, 1e-6);
|
Assert.assertEquals(MathUtils.multinomialProbability(counts0, probs0), 0.111078, 1e-6);
|
||||||
|
|
||||||
int[] counts1 = { 10, 20, 30 };
|
int[] counts1 = {10, 20, 30};
|
||||||
double[] probs1 = { 0.25, 0.25, 0.50 };
|
double[] probs1 = {0.25, 0.25, 0.50};
|
||||||
Assert.assertEquals(MathUtils.multinomialProbability(counts1, probs1), 0.002870301, 1e-9);
|
Assert.assertEquals(MathUtils.multinomialProbability(counts1, probs1), 0.002870301, 1e-9);
|
||||||
|
|
||||||
int[] counts2 = { 38, 82, 50, 36 };
|
int[] counts2 = {38, 82, 50, 36};
|
||||||
double[] probs2 = { 0.25, 0.25, 0.25, 0.25 };
|
double[] probs2 = {0.25, 0.25, 0.25, 0.25};
|
||||||
Assert.assertEquals(MathUtils.multinomialProbability(counts2, probs2), 1.88221e-09, 1e-10);
|
Assert.assertEquals(MathUtils.multinomialProbability(counts2, probs2), 1.88221e-09, 1e-10);
|
||||||
|
|
||||||
int[] counts3 = { 1, 600, 1 };
|
int[] counts3 = {1, 600, 1};
|
||||||
double[] probs3 = { 0.33, 0.33, 0.34 };
|
double[] probs3 = {0.33, 0.33, 0.34};
|
||||||
Assert.assertEquals(MathUtils.multinomialProbability(counts3, probs3), 5.20988e-285, 1e-286);
|
Assert.assertEquals(MathUtils.multinomialProbability(counts3, probs3), 5.20988e-285, 1e-286);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -123,19 +121,21 @@ public class MathUtilsUnitTest extends BaseTest {
|
||||||
Assert.assertTrue(FiveAlpha.containsAll(BigFiveAlpha));
|
Assert.assertTrue(FiveAlpha.containsAll(BigFiveAlpha));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Tests that we correctly compute mean and standard deviation from a stream of numbers */
|
/**
|
||||||
|
* Tests that we correctly compute mean and standard deviation from a stream of numbers
|
||||||
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testRunningAverage() {
|
public void testRunningAverage() {
|
||||||
logger.warn("Executing testRunningAverage");
|
logger.warn("Executing testRunningAverage");
|
||||||
|
|
||||||
int [] numbers = {1,2,4,5,3,128,25678,-24};
|
int[] numbers = {1, 2, 4, 5, 3, 128, 25678, -24};
|
||||||
MathUtils.RunningAverage r = new MathUtils.RunningAverage();
|
MathUtils.RunningAverage r = new MathUtils.RunningAverage();
|
||||||
|
|
||||||
for ( int i = 0 ; i < numbers.length ; i++ ) r.add((double)numbers[i]);
|
for (int i = 0; i < numbers.length; i++) r.add((double) numbers[i]);
|
||||||
|
|
||||||
Assert.assertEquals((long)numbers.length, r.observationCount());
|
Assert.assertEquals((long) numbers.length, r.observationCount());
|
||||||
Assert.assertTrue(r.mean()- 3224.625 < 2e-10 );
|
Assert.assertTrue(r.mean() - 3224.625 < 2e-10);
|
||||||
Assert.assertTrue(r.stddev()-9072.6515881128 < 2e-10);
|
Assert.assertTrue(r.stddev() - 9072.6515881128 < 2e-10);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -174,4 +174,56 @@ public class MathUtilsUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(MathUtils.log10Factorial(12342), 45138.26, 1e-1);
|
Assert.assertEquals(MathUtils.log10Factorial(12342), 45138.26, 1e-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testRandomSubset() {
|
||||||
|
Integer[] x = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
|
||||||
|
Assert.assertEquals(MathUtils.randomSubset(x, 0).length, 0);
|
||||||
|
Assert.assertEquals(MathUtils.randomSubset(x, 1).length, 1);
|
||||||
|
Assert.assertEquals(MathUtils.randomSubset(x, 2).length, 2);
|
||||||
|
Assert.assertEquals(MathUtils.randomSubset(x, 3).length, 3);
|
||||||
|
Assert.assertEquals(MathUtils.randomSubset(x, 4).length, 4);
|
||||||
|
Assert.assertEquals(MathUtils.randomSubset(x, 5).length, 5);
|
||||||
|
Assert.assertEquals(MathUtils.randomSubset(x, 6).length, 6);
|
||||||
|
Assert.assertEquals(MathUtils.randomSubset(x, 7).length, 7);
|
||||||
|
Assert.assertEquals(MathUtils.randomSubset(x, 8).length, 8);
|
||||||
|
Assert.assertEquals(MathUtils.randomSubset(x, 9).length, 9);
|
||||||
|
Assert.assertEquals(MathUtils.randomSubset(x, 10).length, 10);
|
||||||
|
Assert.assertEquals(MathUtils.randomSubset(x, 11).length, 10);
|
||||||
|
|
||||||
|
for (int i = 0; i < 25; i++)
|
||||||
|
Assert.assertTrue(hasUniqueElements(MathUtils.randomSubset(x, 5)));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testArrayShuffle() {
|
||||||
|
Integer[] x = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
|
||||||
|
for (int i = 0; i < 25; i++) {
|
||||||
|
Object[] t = MathUtils.arrayShuffle(x);
|
||||||
|
Assert.assertTrue(hasUniqueElements(t));
|
||||||
|
Assert.assertTrue(hasAllElements(x, t));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean hasUniqueElements(Object[] x) {
|
||||||
|
for (int i = 0; i < x.length; i++)
|
||||||
|
for (int j = i + 1; j < x.length; j++)
|
||||||
|
if (x[i].equals(x[j]) || x[i] == x[j])
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean hasAllElements(final Object[] expected, final Object[] actual) {
|
||||||
|
HashSet<Object> set = new HashSet<Object>();
|
||||||
|
set.addAll(Arrays.asList(expected));
|
||||||
|
set.removeAll(Arrays.asList(actual));
|
||||||
|
return set.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void p (Object []x) {
|
||||||
|
for (Object v: x)
|
||||||
|
System.out.print((Integer) v + " ");
|
||||||
|
System.out.println();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -112,7 +112,8 @@ public class ReadClipperTestUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION && startingOp != CigarOperator.INSERTION && endingOp != CigarOperator.INSERTION)
|
// if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION && startingOp != CigarOperator.INSERTION && endingOp != CigarOperator.INSERTION)
|
||||||
|
if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION)
|
||||||
return true; // we don't accept reads starting or ending in deletions (add any other constraint here)
|
return true; // we don't accept reads starting or ending in deletions (add any other constraint here)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -190,4 +191,18 @@ public class ReadClipperTestUtils {
|
||||||
return invertedCigar;
|
return invertedCigar;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks whether or not the read has any cigar element that is not H or S
|
||||||
|
*
|
||||||
|
* @param read
|
||||||
|
* @return true if it has any M, I or D, false otherwise
|
||||||
|
*/
|
||||||
|
public static boolean readHasNonClippedBases(GATKSAMRecord read) {
|
||||||
|
for (CigarElement cigarElement : read.getCigar().getCigarElements())
|
||||||
|
if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,12 +30,12 @@ import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.BeforeClass;
|
import org.testng.annotations.BeforeClass;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -59,10 +59,11 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
int alnStart = read.getAlignmentStart();
|
int alnStart = read.getAlignmentStart();
|
||||||
int alnEnd = read.getAlignmentEnd();
|
int alnEnd = read.getAlignmentEnd();
|
||||||
int readLength = alnStart - alnEnd;
|
int readLength = alnStart - alnEnd;
|
||||||
for (int i=0; i<readLength/2; i++) {
|
for (int i = 0; i < readLength / 2; i++) {
|
||||||
GATKSAMRecord clippedRead = ReadClipper.hardClipBothEndsByReferenceCoordinates(read, alnStart + i, alnEnd - i);
|
GATKSAMRecord clippedRead = ReadClipper.hardClipBothEndsByReferenceCoordinates(read, alnStart + i, alnEnd - i);
|
||||||
Assert.assertTrue(clippedRead.getAlignmentStart() >= alnStart + i, String.format("Clipped alignment start is less than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
|
Assert.assertTrue(clippedRead.getAlignmentStart() >= alnStart + i, String.format("Clipped alignment start is less than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
|
||||||
Assert.assertTrue(clippedRead.getAlignmentEnd() <= alnEnd + i, String.format("Clipped alignment end is greater than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
|
Assert.assertTrue(clippedRead.getAlignmentEnd() <= alnEnd + i, String.format("Clipped alignment end is greater than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clippedRead);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -72,12 +73,14 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
for (Cigar cigar : cigarList) {
|
for (Cigar cigar : cigarList) {
|
||||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
int readLength = read.getReadLength();
|
int readLength = read.getReadLength();
|
||||||
for (int i=0; i<readLength; i++) {
|
for (int i = 0; i < readLength; i++) {
|
||||||
GATKSAMRecord clipLeft = ReadClipper.hardClipByReadCoordinates(read, 0, i);
|
GATKSAMRecord clipLeft = ReadClipper.hardClipByReadCoordinates(read, 0, i);
|
||||||
Assert.assertTrue(clipLeft.getReadLength() <= readLength - i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipLeft.getCigarString()));
|
Assert.assertTrue(clipLeft.getReadLength() <= readLength - i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipLeft.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clipLeft);
|
||||||
|
|
||||||
GATKSAMRecord clipRight = ReadClipper.hardClipByReadCoordinates(read, i, readLength-1);
|
GATKSAMRecord clipRight = ReadClipper.hardClipByReadCoordinates(read, i, readLength - 1);
|
||||||
Assert.assertTrue(clipRight.getReadLength() <= i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipRight.getCigarString()));
|
Assert.assertTrue(clipRight.getReadLength() <= i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipRight.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clipRight);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -86,19 +89,27 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
public void testHardClipByReferenceCoordinates() {
|
public void testHardClipByReferenceCoordinates() {
|
||||||
for (Cigar cigar : cigarList) {
|
for (Cigar cigar : cigarList) {
|
||||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
int alnStart = read.getAlignmentStart();
|
int start = read.getSoftStart();
|
||||||
int alnEnd = read.getAlignmentEnd();
|
int stop = read.getSoftEnd();
|
||||||
for (int i=alnStart; i<=alnEnd; i++) {
|
|
||||||
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
// System.out.println(String.format("CIGAR: %s (%d, %d)", cigar.toString(), start, stop));
|
||||||
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i);
|
|
||||||
if (!clipLeft.isEmpty())
|
// if (ReadUtils.readIsEntirelyInsertion(read))
|
||||||
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
// System.out.println("debug");
|
||||||
|
|
||||||
|
for (int i = start; i <= stop; i++) {
|
||||||
|
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, i);
|
||||||
|
if (!clipLeft.isEmpty()) {
|
||||||
|
// System.out.println(String.format("\t left [%d] %s -> %s ", i-start+1, cigar.toString(), clipLeft.getCigarString()));
|
||||||
|
Assert.assertTrue(clipLeft.getAlignmentStart() >= Math.min(read.getAlignmentEnd(), i + 1), String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clipLeft);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, -1);
|
||||||
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd);
|
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
||||||
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
// System.out.println(String.format("\t right [%d] %s -> %s ", i-start+1, cigar.toString(), clipRight.getCigarString()));
|
||||||
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
Assert.assertTrue(clipRight.getAlignmentEnd() <= Math.max(read.getAlignmentStart(), i - 1), String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clipRight);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -111,10 +122,14 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
int alnStart = read.getAlignmentStart();
|
int alnStart = read.getAlignmentStart();
|
||||||
int alnEnd = read.getAlignmentEnd();
|
int alnEnd = read.getAlignmentEnd();
|
||||||
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
||||||
for (int i=alnStart; i<=alnEnd; i++) {
|
for (int i = alnStart; i <= alnEnd; i++) {
|
||||||
GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i);
|
GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i);
|
||||||
if (!clipLeft.isEmpty())
|
|
||||||
|
if (!clipLeft.isEmpty()) {
|
||||||
|
// System.out.println(String.format("Left Tail [%d]: %s (%d,%d,%d : %d,%d,%d) -> %s (%d,%d,%d : %d,%d,%d)", i, cigar.toString(), read.getUnclippedStart(), read.getSoftStart(), read.getAlignmentStart(), read.getAlignmentEnd(), read.getSoftEnd(), read.getUnclippedEnd(), clipLeft.getCigarString(), clipLeft.getUnclippedStart(), clipLeft.getSoftStart(), clipLeft.getAlignmentStart(), clipLeft.getAlignmentEnd(), clipLeft.getSoftEnd(), clipLeft.getUnclippedEnd()));
|
||||||
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clipLeft);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -127,10 +142,12 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
int alnStart = read.getAlignmentStart();
|
int alnStart = read.getAlignmentStart();
|
||||||
int alnEnd = read.getAlignmentEnd();
|
int alnEnd = read.getAlignmentEnd();
|
||||||
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
||||||
for (int i=alnStart; i<=alnEnd; i++) {
|
for (int i = alnStart; i <= alnEnd; i++) {
|
||||||
GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i);
|
GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i);
|
||||||
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
||||||
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
||||||
|
assertUnclippedLimits(read, clipRight);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -145,43 +162,36 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
for (Cigar cigar : cigarList) {
|
for (Cigar cigar : cigarList) {
|
||||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
int readLength = read.getReadLength();
|
int readLength = read.getReadLength();
|
||||||
byte [] quals = new byte[readLength];
|
byte[] quals = new byte[readLength];
|
||||||
|
|
||||||
for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) {
|
for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) {
|
||||||
|
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the left tail
|
||||||
// create a read with nLowQualBases in the left tail
|
|
||||||
Utils.fillArrayWithByte(quals, HIGH_QUAL);
|
|
||||||
for (int addLeft = 0; addLeft < nLowQualBases; addLeft++)
|
for (int addLeft = 0; addLeft < nLowQualBases; addLeft++)
|
||||||
quals[addLeft] = LOW_QUAL;
|
quals[addLeft] = LOW_QUAL;
|
||||||
read.setBaseQualities(quals);
|
read.setBaseQualities(quals);
|
||||||
GATKSAMRecord clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
GATKSAMRecord clipLeft = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
||||||
|
|
||||||
// Tests
|
assertUnclippedLimits(read, clipLeft); // Make sure limits haven't changed
|
||||||
|
assertNoLowQualBases(clipLeft, LOW_QUAL); // Make sure the low qualities are gone
|
||||||
|
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
|
||||||
|
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
|
||||||
|
|
||||||
// Make sure the low qualities are gone
|
|
||||||
assertNoLowQualBases(clipLeft, LOW_QUAL);
|
|
||||||
|
|
||||||
// Can't run this test with the current contract of no hanging insertions
|
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases in the right tail
|
||||||
// Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
|
|
||||||
|
|
||||||
// create a read with nLowQualBases in the right tail
|
|
||||||
Utils.fillArrayWithByte(quals, HIGH_QUAL);
|
|
||||||
for (int addRight = 0; addRight < nLowQualBases; addRight++)
|
for (int addRight = 0; addRight < nLowQualBases; addRight++)
|
||||||
quals[readLength - addRight - 1] = LOW_QUAL;
|
quals[readLength - addRight - 1] = LOW_QUAL;
|
||||||
read.setBaseQualities(quals);
|
read.setBaseQualities(quals);
|
||||||
GATKSAMRecord clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
GATKSAMRecord clipRight = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
||||||
|
|
||||||
// Tests
|
// System.out.println(String.format("Debug [%d]: %s -> %s / %s", nLowQualBases, cigar.toString(), clipLeft.getCigarString(), clipRight.getCigarString()));
|
||||||
|
|
||||||
// Make sure the low qualities are gone
|
assertUnclippedLimits(read, clipRight); // Make sure limits haven't changed
|
||||||
assertNoLowQualBases(clipRight, LOW_QUAL);
|
assertNoLowQualBases(clipRight, LOW_QUAL); // Make sure the low qualities are gone
|
||||||
|
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
|
||||||
|
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString()));
|
||||||
|
|
||||||
// Make sure we haven't clipped any high quals -- Can't run this test with the current contract of no hanging insertions
|
if (nLowQualBases <= readLength / 2) {
|
||||||
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString()));
|
Utils.fillArrayWithByte(quals, HIGH_QUAL); // create a read with nLowQualBases on both tails
|
||||||
|
|
||||||
// create a read with nLowQualBases in the both tails
|
|
||||||
if (nLowQualBases <= readLength/2) {
|
|
||||||
Utils.fillArrayWithByte(quals, HIGH_QUAL);
|
|
||||||
for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) {
|
for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) {
|
||||||
quals[addBoth] = LOW_QUAL;
|
quals[addBoth] = LOW_QUAL;
|
||||||
quals[readLength - addBoth - 1] = LOW_QUAL;
|
quals[readLength - addBoth - 1] = LOW_QUAL;
|
||||||
|
|
@ -189,83 +199,25 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
read.setBaseQualities(quals);
|
read.setBaseQualities(quals);
|
||||||
GATKSAMRecord clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
GATKSAMRecord clipBoth = ReadClipper.hardClipLowQualEnds(read, LOW_QUAL);
|
||||||
|
|
||||||
// Tests
|
assertUnclippedLimits(read, clipBoth); // Make sure limits haven't changed
|
||||||
|
assertNoLowQualBases(clipBoth, LOW_QUAL); // Make sure the low qualities are gone
|
||||||
// Make sure the low qualities are gone
|
Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, // Make sure only low quality bases were clipped
|
||||||
assertNoLowQualBases(clipBoth, LOW_QUAL);
|
String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2 * nLowQualBases), read.getCigarString(), clipBoth.getCigarString()));
|
||||||
|
|
||||||
// Can't run this test with the current contract of no hanging insertions
|
|
||||||
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2*nLowQualBases), read.getCigarString(), clipBoth.getCigarString()));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// logger.warn(String.format("Testing %s for all combinations of low/high qual... PASSED", read.getCigarString()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ONE OFF Testing clipping that ends inside an insertion ( Ryan's bug )
|
|
||||||
final byte[] BASES = {'A','C','G','T','A','C','G','T'};
|
|
||||||
final byte[] QUALS = {2, 2, 2, 2, 20, 20, 20, 2};
|
|
||||||
final String CIGAR = "1S1M5I1S";
|
|
||||||
|
|
||||||
final byte[] CLIPPED_BASES = {};
|
|
||||||
final byte[] CLIPPED_QUALS = {};
|
|
||||||
final String CLIPPED_CIGAR = "";
|
|
||||||
|
|
||||||
|
|
||||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(BASES, QUALS, CIGAR);
|
|
||||||
GATKSAMRecord expected = ArtificialSAMUtils.createArtificialRead(CLIPPED_BASES, CLIPPED_QUALS, CLIPPED_CIGAR);
|
|
||||||
|
|
||||||
ReadClipperTestUtils.assertEqualReads(ReadClipper.hardClipLowQualEnds(read, (byte) 2), expected);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testHardClipSoftClippedBases() {
|
public void testHardClipSoftClippedBases() {
|
||||||
|
|
||||||
// Generate a list of cigars to test
|
|
||||||
for (Cigar cigar : cigarList) {
|
for (Cigar cigar : cigarList) {
|
||||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
GATKSAMRecord clippedRead = ReadClipper.hardClipSoftClippedBases(read);
|
GATKSAMRecord clippedRead = ReadClipper.hardClipSoftClippedBases(read);
|
||||||
|
CigarCounter original = new CigarCounter(read);
|
||||||
|
CigarCounter clipped = new CigarCounter(clippedRead);
|
||||||
|
|
||||||
int sumHardClips = 0;
|
assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
|
||||||
int sumMatches = 0;
|
original.assertHardClippingSoftClips(clipped); // Make sure we have only clipped SOFT_CLIPS
|
||||||
|
|
||||||
boolean tail = true;
|
|
||||||
for (CigarElement element : read.getCigar().getCigarElements()) {
|
|
||||||
// Assuming cigars are well formed, if we see S or H, it means we're on the tail (left or right)
|
|
||||||
if (element.getOperator() == CigarOperator.HARD_CLIP || element.getOperator() == CigarOperator.SOFT_CLIP)
|
|
||||||
tail = true;
|
|
||||||
|
|
||||||
// Adds all H, S and D's (next to hard/soft clips).
|
|
||||||
// All these should be hard clips after clipping.
|
|
||||||
if (tail && (element.getOperator() == CigarOperator.HARD_CLIP || element.getOperator() == CigarOperator.SOFT_CLIP || element.getOperator() == CigarOperator.DELETION))
|
|
||||||
sumHardClips += element.getLength();
|
|
||||||
|
|
||||||
// this means we're no longer on the tail (insertions can still potentially be the tail because
|
|
||||||
// of the current contract of clipping out hanging insertions
|
|
||||||
else if (element.getOperator() != CigarOperator.INSERTION)
|
|
||||||
tail = false;
|
|
||||||
|
|
||||||
// Adds all matches to verify that they remain the same after clipping
|
|
||||||
if (element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
|
|
||||||
sumMatches += element.getLength();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (CigarElement element : clippedRead.getCigar().getCigarElements()) {
|
|
||||||
// Test if clipped read has Soft Clips (shouldn't have any!)
|
|
||||||
Assert.assertTrue( element.getOperator() != CigarOperator.SOFT_CLIP, String.format("Cigar %s -> %s -- FAILED (resulting cigar has soft clips)", read.getCigarString(), clippedRead.getCigarString()));
|
|
||||||
|
|
||||||
// Keep track of the total number of Hard Clips after clipping to make sure everything was accounted for
|
|
||||||
if (element.getOperator() == CigarOperator.HARD_CLIP)
|
|
||||||
sumHardClips -= element.getLength();
|
|
||||||
|
|
||||||
// Make sure all matches are still there
|
|
||||||
if (element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
|
|
||||||
sumMatches -= element.getLength();
|
|
||||||
}
|
|
||||||
Assert.assertTrue( sumHardClips == 0, String.format("Cigar %s -> %s -- FAILED (number of hard clips mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumHardClips));
|
|
||||||
Assert.assertTrue( sumMatches == 0, String.format("Cigar %s -> %s -- FAILED (number of matches mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumMatches));
|
|
||||||
|
|
||||||
|
|
||||||
// logger.warn(String.format("Cigar %s -> %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString()));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -276,38 +228,39 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
GATKSAMRecord clippedRead = ReadClipper.hardClipLeadingInsertions(read);
|
GATKSAMRecord clippedRead = ReadClipper.hardClipLeadingInsertions(read);
|
||||||
|
|
||||||
|
assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed
|
||||||
|
|
||||||
int expectedLength = read.getReadLength() - leadingCigarElementLength(read.getCigar(), CigarOperator.INSERTION);
|
int expectedLength = read.getReadLength() - leadingCigarElementLength(read.getCigar(), CigarOperator.INSERTION);
|
||||||
if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar()))
|
if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar()))
|
||||||
expectedLength -= leadingCigarElementLength(ReadClipperTestUtils.invertCigar(read.getCigar()), CigarOperator.INSERTION);
|
expectedLength -= leadingCigarElementLength(ReadClipperTestUtils.invertCigar(read.getCigar()), CigarOperator.INSERTION);
|
||||||
|
|
||||||
if (! clippedRead.isEmpty()) {
|
if (!clippedRead.isEmpty()) {
|
||||||
Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there
|
Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there
|
||||||
Assert.assertFalse(startsWithInsertion(clippedRead.getCigar())); // check that the insertions are gone
|
Assert.assertFalse(startsWithInsertion(clippedRead.getCigar())); // check that the insertions are gone
|
||||||
}
|
} else
|
||||||
else
|
|
||||||
Assert.assertTrue(expectedLength == 0, String.format("expected length: %d", expectedLength)); // check that the read was expected to be fully clipped
|
Assert.assertTrue(expectedLength == 0, String.format("expected length: %d", expectedLength)); // check that the read was expected to be fully clipped
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testRevertSoftClippedBases()
|
public void testRevertSoftClippedBases() {
|
||||||
{
|
for (Cigar cigar : cigarList) {
|
||||||
for (Cigar cigar: cigarList) {
|
|
||||||
final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP);
|
final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP);
|
||||||
final int tailSoftClips = leadingCigarElementLength(ReadClipperTestUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP);
|
final int tailSoftClips = leadingCigarElementLength(ReadClipperTestUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP);
|
||||||
|
|
||||||
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
|
||||||
final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read);
|
final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read);
|
||||||
|
|
||||||
if ( leadingSoftClips > 0 || tailSoftClips > 0) {
|
assertUnclippedLimits(read, unclipped); // Make sure limits haven't changed
|
||||||
|
|
||||||
|
if (leadingSoftClips > 0 || tailSoftClips > 0) {
|
||||||
final int expectedStart = read.getAlignmentStart() - leadingSoftClips;
|
final int expectedStart = read.getAlignmentStart() - leadingSoftClips;
|
||||||
final int expectedEnd = read.getAlignmentEnd() + tailSoftClips;
|
final int expectedEnd = read.getAlignmentEnd() + tailSoftClips;
|
||||||
|
|
||||||
Assert.assertEquals(unclipped.getAlignmentStart(), expectedStart);
|
Assert.assertEquals(unclipped.getAlignmentStart(), expectedStart);
|
||||||
Assert.assertEquals(unclipped.getAlignmentEnd(), expectedEnd);
|
Assert.assertEquals(unclipped.getAlignmentEnd(), expectedEnd);
|
||||||
}
|
} else
|
||||||
else
|
|
||||||
Assert.assertEquals(read.getCigarString(), unclipped.getCigarString());
|
Assert.assertEquals(read.getCigarString(), unclipped.getCigarString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -315,12 +268,25 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
|
|
||||||
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) {
|
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) {
|
||||||
if (!read.isEmpty()) {
|
if (!read.isEmpty()) {
|
||||||
byte [] quals = read.getBaseQualities();
|
byte[] quals = read.getBaseQualities();
|
||||||
for (int i=0; i<quals.length; i++)
|
for (int i = 0; i < quals.length; i++)
|
||||||
Assert.assertFalse(quals[i] <= low_qual, String.format("Found low qual (%d) base after hard clipping. Position: %d -- %s", low_qual, i, read.getCigarString()));
|
Assert.assertFalse(quals[i] <= low_qual, String.format("Found low qual (%d) base after hard clipping. Position: %d -- %s", low_qual, i, read.getCigarString()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Asserts that clipping doesn't change the getUnclippedStart / getUnclippedEnd
|
||||||
|
*
|
||||||
|
* @param original
|
||||||
|
* @param clipped
|
||||||
|
*/
|
||||||
|
private void assertUnclippedLimits(GATKSAMRecord original, GATKSAMRecord clipped) {
|
||||||
|
if (ReadClipperTestUtils.readHasNonClippedBases(clipped)) {
|
||||||
|
Assert.assertEquals(original.getUnclippedStart(), clipped.getUnclippedStart());
|
||||||
|
Assert.assertEquals(original.getUnclippedEnd(), clipped.getUnclippedEnd());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private boolean startsWithInsertion(Cigar cigar) {
|
private boolean startsWithInsertion(Cigar cigar) {
|
||||||
return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0;
|
return leadingCigarElementLength(cigar, CigarOperator.INSERTION) > 0;
|
||||||
}
|
}
|
||||||
|
|
@ -335,10 +301,46 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean cigarHasElementsDifferentThanInsertionsAndHardClips (Cigar cigar) {
|
private boolean cigarHasElementsDifferentThanInsertionsAndHardClips(Cigar cigar) {
|
||||||
for (CigarElement cigarElement : cigar.getCigarElements())
|
for (CigarElement cigarElement : cigar.getCigarElements())
|
||||||
if (cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
if (cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private class CigarCounter {
|
||||||
|
private HashMap<CigarOperator, Integer> counter;
|
||||||
|
|
||||||
|
public Integer getCounterForOp(CigarOperator operator) {
|
||||||
|
return counter.get(operator);
|
||||||
|
}
|
||||||
|
|
||||||
|
public CigarCounter(GATKSAMRecord read) {
|
||||||
|
CigarOperator[] operators = CigarOperator.values();
|
||||||
|
counter = new HashMap<CigarOperator, Integer>(operators.length);
|
||||||
|
|
||||||
|
for (CigarOperator op : operators)
|
||||||
|
counter.put(op, 0);
|
||||||
|
|
||||||
|
for (CigarElement cigarElement : read.getCigar().getCigarElements())
|
||||||
|
counter.put(cigarElement.getOperator(), counter.get(cigarElement.getOperator()) + cigarElement.getLength());
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean assertHardClippingSoftClips(CigarCounter clipped) {
|
||||||
|
for (CigarOperator op : counter.keySet()) {
|
||||||
|
if (op == CigarOperator.HARD_CLIP || op == CigarOperator.SOFT_CLIP) {
|
||||||
|
int counterTotal = counter.get(CigarOperator.HARD_CLIP) + counter.get(CigarOperator.SOFT_CLIP);
|
||||||
|
int clippedHard = clipped.getCounterForOp(CigarOperator.HARD_CLIP);
|
||||||
|
int clippedSoft = clipped.getCounterForOp(CigarOperator.SOFT_CLIP);
|
||||||
|
|
||||||
|
Assert.assertEquals(counterTotal, clippedHard);
|
||||||
|
Assert.assertTrue(clippedSoft == 0);
|
||||||
|
} else
|
||||||
|
Assert.assertEquals(counter.get(op), clipped.getCounterForOp(op));
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
Binary file not shown.
|
|
@ -1,3 +1,3 @@
|
||||||
<ivy-module version="1.0">
|
<ivy-module version="1.0">
|
||||||
<info organisation="net.sf.snpeff" module="snpeff" revision="2.0.4rc3" status="release" />
|
<info organisation="net.sf.snpeff" module="snpeff" revision="2.0.5" status="release" />
|
||||||
</ivy-module>
|
</ivy-module>
|
||||||
Loading…
Reference in New Issue