Added code to retrieve dangling heads from the read threading graph (previously we were rescuing just the tails).
The purpose of this is to be able to call SNPs that fall at the beginning of a capture region (or exon). Before, the read threading code would only start threading from the first kmer that matched the reference. But that means that, in the case of a SNP at the beginning of an exome, it wouldn't start threading the read until after the SNP position - so we'd lose the SNP. For now, this is still very experimental. It works well for RNAseq data, but does introduce FPs in normal exomes. I know why this is and how to fix it, but it requires a much larger fix to the HC: the HC needs to pass all reads and bases to the annotation engine (like UG does) instead of just the high quality ones. So for now, the head merging is disabled by default. As per reviewer comments, I moved the head and tail merging code out into their own class.
This commit is contained in:
parent
cecdd2f2c5
commit
fa65716fe9
|
|
@ -278,6 +278,13 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
@Argument(fullName="numPruningSamples", shortName="numPruningSamples", doc="The number of samples that must pass the minPuning factor in order for the path to be kept", required = false)
|
||||
protected int numPruningSamples = 1;
|
||||
|
||||
/**
|
||||
* This mode is currently experimental and should only be used in the RNA-seq calling pipeline.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName="recoverDanglingHeads", shortName="recoverDanglingHeads", doc="Should we enable dangling head recovery in the read threading assembler?", required = false)
|
||||
protected boolean recoverDanglingHeads = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="dontRecoverDanglingTails", shortName="dontRecoverDanglingTails", doc="Should we disable dangling tail recovery in the read threading assembler?", required = false)
|
||||
protected boolean dontRecoverDanglingTails = false;
|
||||
|
|
@ -634,6 +641,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
assemblyEngine.setDebugGraphTransformations(debugGraphTransformations);
|
||||
assemblyEngine.setAllowCyclesInKmerGraphToGeneratePaths(allowCyclesInKmerGraphToGeneratePaths);
|
||||
assemblyEngine.setRecoverDanglingTails(!dontRecoverDanglingTails);
|
||||
assemblyEngine.setRecoverDanglingHeads(recoverDanglingHeads);
|
||||
assemblyEngine.setMinBaseQualityToUseInAssembly(MIN_BASE_QUALTY_SCORE);
|
||||
|
||||
MIN_TAIL_QUALITY = (byte)(MIN_BASE_QUALTY_SCORE - 1);
|
||||
|
|
|
|||
|
|
@ -86,6 +86,7 @@ public abstract class LocalAssemblyEngine {
|
|||
protected boolean allowCyclesInKmerGraphToGeneratePaths = false;
|
||||
protected boolean debugGraphTransformations = false;
|
||||
protected boolean recoverDanglingTails = true;
|
||||
protected boolean recoverDanglingHeads = true;
|
||||
|
||||
protected byte minBaseQualityToUseInAssembly = DEFAULT_MIN_BASE_QUALITY_TO_USE;
|
||||
protected int pruneFactor = 2;
|
||||
|
|
@ -456,4 +457,12 @@ public abstract class LocalAssemblyEngine {
|
|||
public void setRecoverDanglingTails(boolean recoverDanglingTails) {
|
||||
this.recoverDanglingTails = recoverDanglingTails;
|
||||
}
|
||||
|
||||
public boolean isRecoverDanglingHeads() {
|
||||
return recoverDanglingHeads;
|
||||
}
|
||||
|
||||
public void setRecoverDanglingHeads(boolean recoverDanglingHeads) {
|
||||
this.recoverDanglingHeads = recoverDanglingHeads;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -646,24 +646,6 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
|
|||
'}';
|
||||
}
|
||||
|
||||
/**
|
||||
* The base sequence for the given path.
|
||||
* Note, this assumes that the path does not start with a source node.
|
||||
*
|
||||
* @param path the list of vertexes that make up the path
|
||||
* @return non-null sequence of bases corresponding to the given path
|
||||
*/
|
||||
@Ensures({"result != null"})
|
||||
public byte[] getBasesForPath(final List<? extends DeBruijnVertex> path) {
|
||||
if ( path == null ) throw new IllegalArgumentException("Path cannot be null");
|
||||
|
||||
final StringBuffer sb = new StringBuffer();
|
||||
for ( final DeBruijnVertex v : path )
|
||||
sb.append((char)v.getSuffix());
|
||||
|
||||
return sb.toString().getBytes();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the set of vertices within distance edges of source, regardless of edge direction
|
||||
*
|
||||
|
|
|
|||
|
|
@ -65,14 +65,6 @@ public class DeBruijnVertex extends BaseVertex {
|
|||
super(sequence);
|
||||
}
|
||||
|
||||
/**
|
||||
* For testing purposes only
|
||||
* @param sequence
|
||||
*/
|
||||
protected DeBruijnVertex( final String sequence ) {
|
||||
this(sequence.getBytes());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the kmer size for this DeBruijnVertex
|
||||
* @return integer >= 1
|
||||
|
|
|
|||
|
|
@ -0,0 +1,520 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.smithwaterman.*;
|
||||
import org.jgrapht.EdgeFactory;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public abstract class DanglingChainMergingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSampleEdge> {
|
||||
|
||||
private static final int MAX_CIGAR_COMPLEXITY = 3;
|
||||
private static final int MIN_DANGLING_TAIL_LENGTH = 5; // SNP + 3 stabilizing nodes + the LCA
|
||||
private static final int MAXIMUM_MISMATCHES_IN_DANGLING_HEAD_MERGE = 1;
|
||||
|
||||
protected boolean alreadyBuilt;
|
||||
|
||||
/**
|
||||
* Create a new ReadThreadingAssembler using kmerSize for matching
|
||||
* @param kmerSize must be >= 1
|
||||
*/
|
||||
protected DanglingChainMergingGraph(final int kmerSize, final EdgeFactory<MultiDeBruijnVertex, MultiSampleEdge> edgeFactory) {
|
||||
super(kmerSize, edgeFactory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Edge factory that encapsulates the numPruningSamples assembly parameter
|
||||
*/
|
||||
protected static class MyEdgeFactory implements EdgeFactory<MultiDeBruijnVertex, MultiSampleEdge> {
|
||||
final int numPruningSamples;
|
||||
|
||||
public MyEdgeFactory(int numPruningSamples) {
|
||||
this.numPruningSamples = numPruningSamples;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MultiSampleEdge createEdge(final MultiDeBruijnVertex sourceVertex, final MultiDeBruijnVertex targetVertex) {
|
||||
return new MultiSampleEdge(false, 1, numPruningSamples);
|
||||
}
|
||||
|
||||
public MultiSampleEdge createEdge(final boolean isRef, final int multiplicity) {
|
||||
return new MultiSampleEdge(isRef, multiplicity, numPruningSamples);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Class to keep track of the important dangling chain merging data
|
||||
*/
|
||||
protected final class DanglingChainMergeHelper {
|
||||
final List<MultiDeBruijnVertex> danglingPath, referencePath;
|
||||
final byte[] danglingPathString, referencePathString;
|
||||
final Cigar cigar;
|
||||
|
||||
public DanglingChainMergeHelper(final List<MultiDeBruijnVertex> danglingPath,
|
||||
final List<MultiDeBruijnVertex> referencePath,
|
||||
final byte[] danglingPathString,
|
||||
final byte[] referencePathString,
|
||||
final Cigar cigar) {
|
||||
this.danglingPath = danglingPath;
|
||||
this.referencePath = referencePath;
|
||||
this.danglingPathString = danglingPathString;
|
||||
this.referencePathString = referencePathString;
|
||||
this.cigar = cigar;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to recover dangling tails
|
||||
*
|
||||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
*/
|
||||
public void recoverDanglingTails(final int pruneFactor) {
|
||||
if ( ! alreadyBuilt ) throw new IllegalStateException("recoverDanglingTails requires the graph be already built");
|
||||
|
||||
int attempted = 0;
|
||||
int nRecovered = 0;
|
||||
for ( final MultiDeBruijnVertex v : vertexSet() ) {
|
||||
if ( outDegreeOf(v) == 0 && ! isRefSink(v) ) {
|
||||
attempted++;
|
||||
nRecovered += recoverDanglingTail(v, pruneFactor);
|
||||
}
|
||||
}
|
||||
|
||||
logger.debug("Recovered " + nRecovered + " of " + attempted + " dangling tails");
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to recover dangling heads
|
||||
*
|
||||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
*/
|
||||
public void recoverDanglingHeads(final int pruneFactor) {
|
||||
if ( ! alreadyBuilt ) throw new IllegalStateException("recoverDanglingHeads requires the graph be already built");
|
||||
|
||||
// we need to build a list of dangling heads because that process can modify the graph (and otherwise generate
|
||||
// a ConcurrentModificationException if we do it while iterating over the vertexes)
|
||||
final List<MultiDeBruijnVertex> danglingHeads = new ArrayList<>();
|
||||
|
||||
int attempted = 0;
|
||||
int nRecovered = 0;
|
||||
for ( final MultiDeBruijnVertex v : vertexSet() ) {
|
||||
if ( inDegreeOf(v) == 0 && ! isRefSource(v) )
|
||||
danglingHeads.add(v);
|
||||
}
|
||||
|
||||
// now we can try to recover the dangling heads
|
||||
for ( final MultiDeBruijnVertex v : danglingHeads ) {
|
||||
attempted++;
|
||||
nRecovered += recoverDanglingHead(v, pruneFactor);
|
||||
}
|
||||
|
||||
logger.debug("Recovered " + nRecovered + " of " + attempted + " dangling heads");
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to attach vertex with out-degree == 0 to the graph
|
||||
*
|
||||
* @param vertex the vertex to recover
|
||||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
* @return 1 if we successfully recovered the vertex and 0 otherwise
|
||||
*/
|
||||
protected int recoverDanglingTail(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
||||
if ( outDegreeOf(vertex) != 0 ) throw new IllegalStateException("Attempting to recover a dangling tail for " + vertex + " but it has out-degree > 0");
|
||||
|
||||
// generate the CIGAR string from Smith-Waterman between the dangling tail and reference paths
|
||||
final DanglingChainMergeHelper danglingTailMergeResult = generateCigarAgainstDownwardsReferencePath(vertex, pruneFactor);
|
||||
|
||||
// if the CIGAR is too complex (or couldn't be computed) then we do not allow the merge into the reference path
|
||||
if ( danglingTailMergeResult == null || ! cigarIsOkayToMerge(danglingTailMergeResult.cigar, false, true) )
|
||||
return 0;
|
||||
|
||||
// merge
|
||||
return mergeDanglingTail(danglingTailMergeResult);
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to attach vertex with in-degree == 0, or a vertex on its path, to the graph
|
||||
*
|
||||
* @param vertex the vertex to recover
|
||||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
* @return 1 if we successfully recovered a vertex and 0 otherwise
|
||||
*/
|
||||
protected int recoverDanglingHead(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
||||
if ( inDegreeOf(vertex) != 0 ) throw new IllegalStateException("Attempting to recover a dangling head for " + vertex + " but it has in-degree > 0");
|
||||
|
||||
// generate the CIGAR string from Smith-Waterman between the dangling tail and reference paths
|
||||
final DanglingChainMergeHelper danglingHeadMergeResult = generateCigarAgainstUpwardsReferencePath(vertex, pruneFactor);
|
||||
|
||||
// if the CIGAR is too complex (or couldn't be computed) then we do not allow the merge into the reference path
|
||||
if ( danglingHeadMergeResult == null || ! cigarIsOkayToMerge(danglingHeadMergeResult.cigar, true, false) )
|
||||
return 0;
|
||||
|
||||
// merge
|
||||
return mergeDanglingHead(danglingHeadMergeResult);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine whether the provided cigar is okay to merge into the reference path
|
||||
*
|
||||
* @param cigar the cigar to analyze
|
||||
* @param requireFirstElementM if true, require that the first cigar element be an M operator in order for it to be okay
|
||||
* @param requireLastElementM if true, require that the last cigar element be an M operator in order for it to be okay
|
||||
* @return true if it's okay to merge, false otherwise
|
||||
*/
|
||||
protected boolean cigarIsOkayToMerge(final Cigar cigar, final boolean requireFirstElementM, final boolean requireLastElementM) {
|
||||
|
||||
final List<CigarElement> elements = cigar.getCigarElements();
|
||||
final int numElements = elements.size();
|
||||
|
||||
// don't allow more than a couple of different ops
|
||||
if ( numElements == 0 || numElements > MAX_CIGAR_COMPLEXITY )
|
||||
return false;
|
||||
|
||||
// the last element must be an M
|
||||
if ( requireFirstElementM && elements.get(0).getOperator() != CigarOperator.M )
|
||||
return false;
|
||||
|
||||
// the last element must be an M
|
||||
if ( requireLastElementM && elements.get(numElements - 1).getOperator() != CigarOperator.M )
|
||||
return false;
|
||||
|
||||
// TODO -- do we want to check whether the Ms mismatch too much also?
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Actually merge the dangling tail if possible
|
||||
*
|
||||
* @param danglingTailMergeResult the result from generating a Cigar for the dangling tail against the reference
|
||||
* @return 1 if merge was successful, 0 otherwise
|
||||
*/
|
||||
protected int mergeDanglingTail(final DanglingChainMergeHelper danglingTailMergeResult) {
|
||||
|
||||
final List<CigarElement> elements = danglingTailMergeResult.cigar.getCigarElements();
|
||||
final CigarElement lastElement = elements.get(elements.size() - 1);
|
||||
if ( lastElement.getOperator() != CigarOperator.M )
|
||||
throw new IllegalArgumentException("The last Cigar element must be an M");
|
||||
|
||||
final int lastRefIndex = danglingTailMergeResult.cigar.getReferenceLength() - 1;
|
||||
final int matchingSuffix = Math.min(GraphUtils.longestSuffixMatch(danglingTailMergeResult.referencePathString, danglingTailMergeResult.danglingPathString, lastRefIndex), lastElement.getLength());
|
||||
if ( matchingSuffix == 0 )
|
||||
return 0;
|
||||
|
||||
final int altIndexToMerge = Math.max(danglingTailMergeResult.cigar.getReadLength() - matchingSuffix - 1, 0);
|
||||
|
||||
// there is an important edge condition that we need to handle here: Smith-Waterman correctly calculates that there is a
|
||||
// deletion, that deletion is left-aligned such that the LCA node is part of that deletion, and the rest of the dangling
|
||||
// tail is a perfect match to the suffix of the reference path. In this case we need to push the reference index to merge
|
||||
// down one position so that we don't incorrectly cut a base off of the deletion.
|
||||
final boolean firstElementIsDeletion = elements.get(0).getOperator() == CigarOperator.D;
|
||||
final boolean mustHandleLeadingDeletionCase = firstElementIsDeletion && (elements.get(0).getLength() + matchingSuffix == lastRefIndex + 1);
|
||||
final int refIndexToMerge = lastRefIndex - matchingSuffix + 1 + (mustHandleLeadingDeletionCase ? 1 : 0);
|
||||
|
||||
addEdge(danglingTailMergeResult.danglingPath.get(altIndexToMerge), danglingTailMergeResult.referencePath.get(refIndexToMerge), ((MyEdgeFactory)getEdgeFactory()).createEdge(false, 1));
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Actually merge the dangling head if possible
|
||||
*
|
||||
* @param danglingHeadMergeResult the result from generating a Cigar for the dangling head against the reference
|
||||
* @return 1 if merge was successful, 0 otherwise
|
||||
*/
|
||||
protected int mergeDanglingHead(final DanglingChainMergeHelper danglingHeadMergeResult) {
|
||||
|
||||
final List<CigarElement> elements = danglingHeadMergeResult.cigar.getCigarElements();
|
||||
final CigarElement firstElement = elements.get(0);
|
||||
if ( firstElement.getOperator() != CigarOperator.M )
|
||||
throw new IllegalArgumentException("The first Cigar element must be an M");
|
||||
|
||||
final int indexesToMerge = bestPrefixMatch(danglingHeadMergeResult.referencePathString, danglingHeadMergeResult.danglingPathString, firstElement.getLength());
|
||||
if ( indexesToMerge <= 0 )
|
||||
return 0;
|
||||
|
||||
// we can't push back the reference path
|
||||
if ( indexesToMerge >= danglingHeadMergeResult.referencePath.size() - 1 )
|
||||
return 0;
|
||||
|
||||
// but we can manipulate the dangling path if we need to
|
||||
if ( indexesToMerge >= danglingHeadMergeResult.danglingPath.size() &&
|
||||
! extendDanglingPathAgainstReference(danglingHeadMergeResult, indexesToMerge - danglingHeadMergeResult.danglingPath.size() + 2) )
|
||||
return 0;
|
||||
|
||||
addEdge(danglingHeadMergeResult.referencePath.get(indexesToMerge+1), danglingHeadMergeResult.danglingPath.get(indexesToMerge), ((MyEdgeFactory)getEdgeFactory()).createEdge(false, 1));
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates the CIGAR string from the Smith-Waterman alignment of the dangling path (where the
|
||||
* provided vertex is the sink) and the reference path.
|
||||
*
|
||||
* @param vertex the sink of the dangling chain
|
||||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
* @return a SmithWaterman object which can be null if no proper alignment could be generated
|
||||
*/
|
||||
protected DanglingChainMergeHelper generateCigarAgainstDownwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
||||
|
||||
// find the lowest common ancestor path between vertex and the reference sink if available
|
||||
final List<MultiDeBruijnVertex> altPath = findPathUpwardsToLowestCommonAncestorOfReference(vertex, pruneFactor);
|
||||
if ( altPath == null || isRefSource(altPath.get(0)) || altPath.size() < MIN_DANGLING_TAIL_LENGTH )
|
||||
return null;
|
||||
|
||||
// now get the reference path from the LCA
|
||||
final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.downwards);
|
||||
|
||||
// create the Smith-Waterman strings to use
|
||||
final byte[] refBases = getBasesForPath(refPath, false);
|
||||
final byte[] altBases = getBasesForPath(altPath, false);
|
||||
|
||||
// run Smith-Waterman to determine the best alignment (and remove trailing deletions since they aren't interesting)
|
||||
final SmithWaterman alignment = new SWPairwiseAlignment(refBases, altBases, SWParameterSet.STANDARD_NGS, SWPairwiseAlignment.OVERHANG_STRATEGY.LEADING_INDEL);
|
||||
return new DanglingChainMergeHelper(altPath, refPath, altBases, refBases, AlignmentUtils.removeTrailingDeletions(alignment.getCigar()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates the CIGAR string from the Smith-Waterman alignment of the dangling path (where the
|
||||
* provided vertex is the source) and the reference path.
|
||||
*
|
||||
* @param vertex the source of the dangling head
|
||||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
* @return a SmithWaterman object which can be null if no proper alignment could be generated
|
||||
*/
|
||||
protected DanglingChainMergeHelper generateCigarAgainstUpwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
||||
|
||||
// find the highest common descendant path between vertex and the reference source if available
|
||||
final List<MultiDeBruijnVertex> altPath = findPathDownwardsToHighestCommonDescendantOfReference(vertex, pruneFactor);
|
||||
if ( altPath == null || isRefSink(altPath.get(0)) )
|
||||
return null;
|
||||
|
||||
// now get the reference path from the LCA
|
||||
final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.upwards);
|
||||
|
||||
// create the Smith-Waterman strings to use
|
||||
final byte[] refBases = getBasesForPath(refPath, true);
|
||||
final byte[] altBases = getBasesForPath(altPath, true);
|
||||
|
||||
// run Smith-Waterman to determine the best alignment (and remove trailing deletions since they aren't interesting)
|
||||
final SmithWaterman alignment = new SWPairwiseAlignment(refBases, altBases, SWParameterSet.STANDARD_NGS, SWPairwiseAlignment.OVERHANG_STRATEGY.LEADING_INDEL);
|
||||
return new DanglingChainMergeHelper(altPath, refPath, altBases, refBases, AlignmentUtils.removeTrailingDeletions(alignment.getCigar()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the path upwards in the graph from this vertex to the reference sequence, including the lowest common ancestor vertex.
|
||||
* Note that nodes are excluded if their pruning weight is less than the pruning factor.
|
||||
*
|
||||
* @param vertex the original vertex
|
||||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
* @return the path if it can be determined or null if this vertex either doesn't merge onto the reference path or
|
||||
* has an ancestor with multiple incoming edges before hitting the reference path
|
||||
*/
|
||||
protected List<MultiDeBruijnVertex> findPathUpwardsToLowestCommonAncestorOfReference(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
||||
final LinkedList<MultiDeBruijnVertex> path = new LinkedList<>();
|
||||
|
||||
MultiDeBruijnVertex v = vertex;
|
||||
while ( ! isReferenceNode(v) && inDegreeOf(v) == 1 ) {
|
||||
final MultiSampleEdge edge = incomingEdgeOf(v);
|
||||
// if it has too low a weight, don't use it (or previous vertexes) for the path
|
||||
if ( edge.getPruningMultiplicity() < pruneFactor )
|
||||
path.clear();
|
||||
// otherwise it is safe to use
|
||||
else
|
||||
path.addFirst(v);
|
||||
v = getEdgeSource(edge);
|
||||
}
|
||||
path.addFirst(v);
|
||||
|
||||
return isReferenceNode(v) ? path : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the path downwards in the graph from this vertex to the reference sequence, including the highest common descendant vertex.
|
||||
* However note that the path is reversed so that this vertex ends up at the end of the path.
|
||||
* Also note that nodes are excluded if their pruning weight is less than the pruning factor.
|
||||
*
|
||||
* @param vertex the original vertex
|
||||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
* @return the path if it can be determined or null if this vertex either doesn't merge onto the reference path or
|
||||
* has a descendant with multiple outgoing edges before hitting the reference path
|
||||
*/
|
||||
protected List<MultiDeBruijnVertex> findPathDownwardsToHighestCommonDescendantOfReference(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
||||
final LinkedList<MultiDeBruijnVertex> path = new LinkedList<>();
|
||||
|
||||
MultiDeBruijnVertex v = vertex;
|
||||
while ( ! isReferenceNode(v) && outDegreeOf(v) == 1 ) {
|
||||
final MultiSampleEdge edge = outgoingEdgeOf(v);
|
||||
// if it has too low a weight, don't use it (or previous vertexes) for the path
|
||||
if ( edge.getPruningMultiplicity() < pruneFactor )
|
||||
path.clear();
|
||||
// otherwise it is safe to use
|
||||
else
|
||||
path.addFirst(v);
|
||||
v = getEdgeTarget(edge);
|
||||
}
|
||||
path.addFirst(v);
|
||||
|
||||
return isReferenceNode(v) ? path : null;
|
||||
}
|
||||
|
||||
private enum TraversalDirection {
|
||||
downwards,
|
||||
upwards
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the path in the graph from this vertex to the reference sink, including this vertex
|
||||
*
|
||||
* @param start the reference vertex to start from
|
||||
* @param direction describes which direction to move in the graph (i.e. down to the reference sink or up to the source)
|
||||
* @return the path (non-null, non-empty)
|
||||
*/
|
||||
protected List<MultiDeBruijnVertex> getReferencePath(final MultiDeBruijnVertex start, final TraversalDirection direction) {
|
||||
if ( ! isReferenceNode(start) ) throw new IllegalArgumentException("Cannot construct the reference path from a vertex that is not on that path");
|
||||
|
||||
final List<MultiDeBruijnVertex> path = new ArrayList<>();
|
||||
|
||||
MultiDeBruijnVertex v = start;
|
||||
while ( v != null ) {
|
||||
path.add(v);
|
||||
v = (direction == TraversalDirection.downwards ? getNextReferenceVertex(v) : getPrevReferenceVertex(v));
|
||||
}
|
||||
|
||||
return path;
|
||||
}
|
||||
|
||||
/**
|
||||
* The base sequence for the given path.
|
||||
*
|
||||
* @param path the list of vertexes that make up the path
|
||||
* @param reverseIfSource if true and if we encounter a source node, then reverse the character sequence for that node
|
||||
* @return non-null sequence of bases corresponding to the given path
|
||||
*/
|
||||
@Ensures({"result != null"})
|
||||
public byte[] getBasesForPath(final List<MultiDeBruijnVertex> path, final boolean reverseIfSource) {
|
||||
if ( path == null ) throw new IllegalArgumentException("Path cannot be null");
|
||||
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
for ( final MultiDeBruijnVertex v : path ) {
|
||||
if ( isSource(v) ) {
|
||||
final String seq = v.getSequenceString();
|
||||
sb.append(reverseIfSource ? new StringBuilder(seq).reverse().toString() : seq);
|
||||
} else {
|
||||
sb.append((char)v.getSuffix());
|
||||
}
|
||||
}
|
||||
|
||||
return sb.toString().getBytes();
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the index of the best extent of the prefix match between the provided paths, for dangling head merging.
|
||||
* Assumes that path1.length >= maxIndex and path2.length >= maxIndex.
|
||||
*
|
||||
* @param path1 the first path
|
||||
* @param path2 the second path
|
||||
* @param maxIndex the maximum index to traverse (not inclusive)
|
||||
* @return the index of the ideal prefix match or -1 if it cannot find one, must be less than maxIndex
|
||||
*/
|
||||
protected static int bestPrefixMatch(final byte[] path1, final byte[] path2, final int maxIndex) {
|
||||
int mismatches = 0;
|
||||
int index = 0;
|
||||
int lastGoodIndex = -1;
|
||||
while ( index < maxIndex ) {
|
||||
if ( path1[index] != path2[index] ) {
|
||||
if ( ++mismatches > MAXIMUM_MISMATCHES_IN_DANGLING_HEAD_MERGE )
|
||||
return lastGoodIndex;
|
||||
lastGoodIndex = index;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
// if we got here then we hit the max index
|
||||
return lastGoodIndex;
|
||||
}
|
||||
|
||||
protected boolean extendDanglingPathAgainstReference(final DanglingChainMergeHelper danglingHeadMergeResult, final int numNodesToExtend) {
|
||||
|
||||
final int indexOfLastDanglingNode = danglingHeadMergeResult.danglingPath.size() - 1;
|
||||
final int indexOfRefNodeToUse = indexOfLastDanglingNode + numNodesToExtend;
|
||||
if ( indexOfRefNodeToUse >= danglingHeadMergeResult.referencePath.size() )
|
||||
return false;
|
||||
|
||||
final MultiDeBruijnVertex danglingSource = danglingHeadMergeResult.danglingPath.remove(indexOfLastDanglingNode);
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
final byte[] refSourceSequence = danglingHeadMergeResult.referencePath.get(indexOfRefNodeToUse).getSequence();
|
||||
for ( int i = 0; i < numNodesToExtend; i++ )
|
||||
sb.append((char)refSourceSequence[i]);
|
||||
sb.append(danglingSource.getSequenceString());
|
||||
final byte[] sequenceToExtend = sb.toString().getBytes();
|
||||
|
||||
// clean up the source and edge
|
||||
final MultiSampleEdge sourceEdge = outgoingEdgeOf(danglingSource);
|
||||
MultiDeBruijnVertex prevV = getEdgeTarget(sourceEdge);
|
||||
removeEdge(danglingSource, prevV);
|
||||
|
||||
// extend the path
|
||||
for ( int i = numNodesToExtend; i > 0; i-- ) {
|
||||
final MultiDeBruijnVertex newV = new MultiDeBruijnVertex(Arrays.copyOfRange(sequenceToExtend, i, i+kmerSize));
|
||||
addVertex(newV);
|
||||
final MultiSampleEdge newE = addEdge(newV, prevV);
|
||||
newE.setMultiplicity(sourceEdge.getMultiplicity());
|
||||
danglingHeadMergeResult.danglingPath.add(newV);
|
||||
prevV = newV;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -391,10 +391,9 @@ public class HaplotypeGraph extends ReadThreadingGraph {
|
|||
graphWriter.println("}");
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Pair<MultiDeBruijnVertex,Integer> findStart(final SequenceForKmers seqForKmers) {
|
||||
return getOrCreateKmerVertex(seqForKmers.sequence, 0, true);
|
||||
protected int findStart(final SequenceForKmers seqForKmers) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -186,9 +186,10 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine {
|
|||
// tails that we'll ultimately just trim away anyway, as the dangling tail edges have weight of 1
|
||||
rtgraph.pruneLowWeightChains(pruneFactor);
|
||||
|
||||
// look at all chains in the graph that terminate in a non-ref node (dangling sinks) and see if
|
||||
// look at all chains in the graph that terminate in a non-ref node (dangling sources and sinks) and see if
|
||||
// we can recover them by merging some N bases from the chain back into the reference
|
||||
if ( recoverDanglingTails ) rtgraph.recoverDanglingTails(pruneFactor);
|
||||
if ( recoverDanglingHeads ) rtgraph.recoverDanglingHeads(pruneFactor);
|
||||
|
||||
// remove all heading and trailing paths
|
||||
if ( removePathsNotConnectedToRef ) rtgraph.removePathsNotConnectedToRef();
|
||||
|
|
|
|||
|
|
@ -46,21 +46,12 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.KMerCounter;
|
||||
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.Kmer;
|
||||
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.smithwaterman.SWPairwiseAlignment;
|
||||
import org.broadinstitute.sting.utils.smithwaterman.SWParameterSet;
|
||||
import org.broadinstitute.sting.utils.smithwaterman.SmithWaterman;
|
||||
import org.jgrapht.EdgeFactory;
|
||||
import org.jgrapht.alg.CycleDetector;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -68,28 +59,7 @@ import java.util.*;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSampleEdge> implements KmerSearchableGraph<MultiDeBruijnVertex,MultiSampleEdge> {
|
||||
|
||||
/**
|
||||
* Edge factory that encapsulates the numPruningSamples assembly parameter
|
||||
*/
|
||||
private static class MyEdgeFactory implements EdgeFactory<MultiDeBruijnVertex, MultiSampleEdge> {
|
||||
final int numPruningSamples;
|
||||
|
||||
public MyEdgeFactory(int numPruningSamples) {
|
||||
this.numPruningSamples = numPruningSamples;
|
||||
}
|
||||
|
||||
@Override
|
||||
public MultiSampleEdge createEdge(final MultiDeBruijnVertex sourceVertex, final MultiDeBruijnVertex targetVertex) {
|
||||
return new MultiSampleEdge(false, 1, numPruningSamples);
|
||||
}
|
||||
|
||||
public MultiSampleEdge createEdge(final boolean isRef, final int multiplicity) {
|
||||
return new MultiSampleEdge(isRef, multiplicity, numPruningSamples);
|
||||
}
|
||||
|
||||
}
|
||||
public class ReadThreadingGraph extends DanglingChainMergingGraph implements KmerSearchableGraph<MultiDeBruijnVertex,MultiSampleEdge> {
|
||||
|
||||
private final static Logger logger = Logger.getLogger(ReadThreadingGraph.class);
|
||||
|
||||
|
|
@ -97,9 +67,6 @@ public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSamp
|
|||
private final static boolean WRITE_GRAPH = false;
|
||||
private final static boolean DEBUG_NON_UNIQUE_CALC = false;
|
||||
|
||||
private final static int MAX_CIGAR_COMPLEXITY = 3;
|
||||
private final static int MIN_DANGLING_TAIL_LENGTH = 5; // SNP + 3 stabilizing nodes + the LCA
|
||||
|
||||
/** for debugging info printing */
|
||||
private static int counter = 0;
|
||||
|
||||
|
|
@ -132,13 +99,12 @@ public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSamp
|
|||
// state variables, initialized in resetToInitialState()
|
||||
// --------------------------------------------------------------------------------
|
||||
private Kmer refSource;
|
||||
protected boolean alreadyBuilt;
|
||||
|
||||
/**
|
||||
* Constructs an empty read-threading-grpah provided the kmerSize.
|
||||
* @param kmerSize 1 or greater.
|
||||
*
|
||||
* @throw IllegalArgumentException if (@code kmerSize) < 1.
|
||||
* @throws IllegalArgumentException if (@code kmerSize) < 1.
|
||||
*/
|
||||
public ReadThreadingGraph(final int kmerSize) {
|
||||
this(kmerSize, false, (byte)6, 1);
|
||||
|
|
@ -250,12 +216,11 @@ public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSamp
|
|||
* @param seqForKmers a non-null sequence
|
||||
*/
|
||||
private void threadSequence(final SequenceForKmers seqForKmers) {
|
||||
final Pair<MultiDeBruijnVertex,Integer> startingInfo = findStart(seqForKmers);
|
||||
if ( startingInfo == null )
|
||||
final int uniqueStartPos = findStart(seqForKmers);
|
||||
if ( uniqueStartPos == -1 )
|
||||
return;
|
||||
|
||||
final MultiDeBruijnVertex startingVertex = startingInfo.getFirst();
|
||||
final int uniqueStartPos = startingInfo.getSecond();
|
||||
final MultiDeBruijnVertex startingVertex = getOrCreateKmerVertex(seqForKmers.sequence, uniqueStartPos);
|
||||
|
||||
// increase the counts of all edges incoming into the starting vertex supported by going back in sequence
|
||||
if ( increaseCountsBackwards )
|
||||
|
|
@ -278,177 +243,22 @@ public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSamp
|
|||
}
|
||||
|
||||
/**
|
||||
* Class to keep track of the important dangling tail merging data
|
||||
*/
|
||||
protected final class DanglingTailMergeResult {
|
||||
final List<MultiDeBruijnVertex> danglingPath, referencePath;
|
||||
final byte[] danglingPathString, referencePathString;
|
||||
final Cigar cigar;
|
||||
|
||||
public DanglingTailMergeResult(final List<MultiDeBruijnVertex> danglingPath,
|
||||
final List<MultiDeBruijnVertex> referencePath,
|
||||
final byte[] danglingPathString,
|
||||
final byte[] referencePathString,
|
||||
final Cigar cigar) {
|
||||
this.danglingPath = danglingPath;
|
||||
this.referencePath = referencePath;
|
||||
this.danglingPathString = danglingPathString;
|
||||
this.referencePathString = referencePathString;
|
||||
this.cigar = cigar;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to attach vertex with out-degree == 0 to the graph
|
||||
* Find vertex and its position in seqForKmers where we should start assembling seqForKmers
|
||||
*
|
||||
* @param vertex the vertex to recover
|
||||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
* @return 1 if we successfully recovered the vertex and 0 otherwise
|
||||
* @param seqForKmers the sequence we want to thread into the graph
|
||||
* @return the position of the starting vertex in seqForKmer, or -1 if it cannot find one
|
||||
*/
|
||||
protected int recoverDanglingChain(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
||||
if ( outDegreeOf(vertex) != 0 ) throw new IllegalStateException("Attempting to recover a dangling tail for " + vertex + " but it has out-degree > 0");
|
||||
|
||||
// generate the CIGAR string from Smith-Waterman between the dangling tail and reference paths
|
||||
final DanglingTailMergeResult danglingTailMergeResult = generateCigarAgainstReferencePath(vertex, pruneFactor);
|
||||
|
||||
// if the CIGAR is too complex (or couldn't be computed) then we do not allow the merge into the reference path
|
||||
if ( danglingTailMergeResult == null || ! cigarIsOkayToMerge(danglingTailMergeResult.cigar) )
|
||||
protected int findStart(final SequenceForKmers seqForKmers) {
|
||||
if ( seqForKmers.isRef )
|
||||
return 0;
|
||||
|
||||
// merge
|
||||
return mergeDanglingTail(danglingTailMergeResult);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine whether the provided cigar is okay to merge into the reference path
|
||||
*
|
||||
* @param cigar the cigar to analyze
|
||||
* @return true if it's okay to merge, false otherwise
|
||||
*/
|
||||
protected boolean cigarIsOkayToMerge(final Cigar cigar) {
|
||||
|
||||
final List<CigarElement> elements = cigar.getCigarElements();
|
||||
final int numElements = elements.size();
|
||||
|
||||
// don't allow more than a couple of different ops
|
||||
if ( numElements > MAX_CIGAR_COMPLEXITY )
|
||||
return false;
|
||||
|
||||
// the last element must be an M
|
||||
if ( elements.get(numElements - 1).getOperator() != CigarOperator.M )
|
||||
return false;
|
||||
|
||||
// TODO -- do we want to check whether the Ms mismatch too much also?
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Actually merge the dangling tail if possible
|
||||
*
|
||||
* @param danglingTailMergeResult the result from generating a Cigar for the dangling tail against the reference
|
||||
* @return 1 if merge was successful, 0 otherwise
|
||||
*/
|
||||
protected int mergeDanglingTail(final DanglingTailMergeResult danglingTailMergeResult) {
|
||||
|
||||
final List<CigarElement> elements = danglingTailMergeResult.cigar.getCigarElements();
|
||||
final CigarElement lastElement = elements.get(elements.size() - 1);
|
||||
if ( lastElement.getOperator() != CigarOperator.M )
|
||||
throw new IllegalArgumentException("The last Cigar element must be an M");
|
||||
|
||||
final int lastRefIndex = danglingTailMergeResult.cigar.getReferenceLength() - 1;
|
||||
final int matchingSuffix = Math.min(GraphUtils.longestSuffixMatch(danglingTailMergeResult.referencePathString, danglingTailMergeResult.danglingPathString, lastRefIndex), lastElement.getLength());
|
||||
if ( matchingSuffix == 0 )
|
||||
return 0;
|
||||
|
||||
final int altIndexToMerge = Math.max(danglingTailMergeResult.cigar.getReadLength() - matchingSuffix - 1, 0);
|
||||
|
||||
// there is an important edge condition that we need to handle here: Smith-Waterman correctly calculates that there is a
|
||||
// deletion, that deletion is left-aligned such that the LCA node is part of that deletion, and the rest of the dangling
|
||||
// tail is a perfect match to the suffix of the reference path. In this case we need to push the reference index to merge
|
||||
// down one position so that we don't incorrectly cut a base off of the deletion.
|
||||
final boolean firstElementIsDeletion = elements.get(0).getOperator() == CigarOperator.D;
|
||||
final boolean mustHandleLeadingDeletionCase = firstElementIsDeletion && (elements.get(0).getLength() + matchingSuffix == lastRefIndex + 1);
|
||||
final int refIndexToMerge = lastRefIndex - matchingSuffix + 1 + (mustHandleLeadingDeletionCase ? 1 : 0);
|
||||
|
||||
addEdge(danglingTailMergeResult.danglingPath.get(altIndexToMerge), danglingTailMergeResult.referencePath.get(refIndexToMerge), ((MyEdgeFactory)getEdgeFactory()).createEdge(false, 1));
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates the CIGAR string from the Smith-Waterman alignment of the dangling path (where the
|
||||
* provided vertex is the sink) and the reference path.
|
||||
*
|
||||
* @param vertex the sink of the dangling tail
|
||||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
* @return a SmithWaterman object which can be null if no proper alignment could be generated
|
||||
*/
|
||||
protected DanglingTailMergeResult generateCigarAgainstReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
||||
|
||||
// find the lowest common ancestor path between vertex and the reference sink if available
|
||||
final List<MultiDeBruijnVertex> altPath = findPathToLowestCommonAncestorOfReference(vertex, pruneFactor);
|
||||
if ( altPath == null || isRefSource(altPath.get(0)) || altPath.size() < MIN_DANGLING_TAIL_LENGTH )
|
||||
return null;
|
||||
|
||||
// now get the reference path from the LCA
|
||||
final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0));
|
||||
|
||||
// create the Smith-Waterman strings to use
|
||||
final byte[] refBases = getBasesForPath(refPath);
|
||||
final byte[] altBases = getBasesForPath(altPath);
|
||||
|
||||
// run Smith-Waterman to determine the best alignment (and remove trailing deletions since they aren't interesting)
|
||||
final SmithWaterman alignment = new SWPairwiseAlignment(refBases, altBases, SWParameterSet.STANDARD_NGS, SWPairwiseAlignment.OVERHANG_STRATEGY.LEADING_INDEL);
|
||||
return new DanglingTailMergeResult(altPath, refPath, altBases, refBases, AlignmentUtils.removeTrailingDeletions(alignment.getCigar()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the path upwards in the graph from this vertex to the reference sequence, including the lowest common ancestor vertex.
|
||||
* Note that nodes are excluded if their pruning weight is less than the pruning factor.
|
||||
*
|
||||
* @param vertex the original vertex
|
||||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
* @return the path if it can be determined or null if this vertex either doesn't merge onto the reference path or
|
||||
* has an ancestor with multiple incoming edges before hitting the reference path
|
||||
*/
|
||||
protected List<MultiDeBruijnVertex> findPathToLowestCommonAncestorOfReference(final MultiDeBruijnVertex vertex, final int pruneFactor) {
|
||||
final LinkedList<MultiDeBruijnVertex> path = new LinkedList<>();
|
||||
|
||||
MultiDeBruijnVertex v = vertex;
|
||||
while ( ! isReferenceNode(v) && inDegreeOf(v) == 1 ) {
|
||||
final MultiSampleEdge edge = incomingEdgeOf(v);
|
||||
// if it has too low a weight, don't use it (or previous vertexes) for the path
|
||||
if ( edge.getPruningMultiplicity() < pruneFactor )
|
||||
path.clear();
|
||||
// otherwise it is safe to use
|
||||
else
|
||||
path.addFirst(v);
|
||||
v = getEdgeSource(edge);
|
||||
}
|
||||
path.addFirst(v);
|
||||
|
||||
return isReferenceNode(v) ? path : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the path downwards in the graph from this vertex to the reference sink, including this vertex
|
||||
*
|
||||
* @param start the reference vertex to start from
|
||||
* @return the path (non-null, non-empty)
|
||||
*/
|
||||
protected List<MultiDeBruijnVertex> getReferencePath(final MultiDeBruijnVertex start) {
|
||||
if ( ! isReferenceNode(start) ) throw new IllegalArgumentException("Cannot construct the reference path from a vertex that is not on that path");
|
||||
|
||||
final List<MultiDeBruijnVertex> path = new ArrayList<>();
|
||||
|
||||
MultiDeBruijnVertex v = start;
|
||||
while ( v != null ) {
|
||||
path.add(v);
|
||||
v = getNextReferenceVertex(v);
|
||||
for ( int i = seqForKmers.start; i < seqForKmers.stop - kmerSize; i++ ) {
|
||||
final Kmer kmer1 = new Kmer(seqForKmers.sequence, i, kmerSize);
|
||||
if ( !nonUniqueKmers.contains(kmer1) )
|
||||
return i;
|
||||
}
|
||||
|
||||
return path;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -526,26 +336,6 @@ public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSamp
|
|||
return nonUniqueKmers.size() * 4 > uniqueKmers.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to recover dangling tails
|
||||
*
|
||||
* @param pruneFactor the prune factor to use in ignoring chain pieces
|
||||
*/
|
||||
public void recoverDanglingTails(final int pruneFactor) {
|
||||
if ( ! alreadyBuilt ) throw new IllegalStateException("recoverDanglingTails requires the graph be already built");
|
||||
|
||||
int attempted = 0;
|
||||
int nRecovered = 0;
|
||||
for ( final MultiDeBruijnVertex v : vertexSet() ) {
|
||||
if ( outDegreeOf(v) == 0 && ! isRefSink(v) ) {
|
||||
attempted++;
|
||||
nRecovered += recoverDanglingChain(v, pruneFactor);
|
||||
}
|
||||
}
|
||||
|
||||
if ( debugGraphTransformations ) logger.info("Recovered " + nRecovered + " of " + attempted + " dangling tails");
|
||||
}
|
||||
|
||||
/** structure that keeps track of the non-unique kmers for a given kmer size */
|
||||
private static class NonUniqueResult {
|
||||
final Set<Kmer> nonUniques;
|
||||
|
|
@ -568,7 +358,7 @@ public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSamp
|
|||
*/
|
||||
protected NonUniqueResult determineKmerSizeAndNonUniques(final int minKmerSize, final int maxKmerSize) {
|
||||
final Collection<SequenceForKmers> withNonUniques = getAllPendingSequences();
|
||||
final Set<Kmer> nonUniqueKmers = new HashSet<Kmer>();
|
||||
final Set<Kmer> nonUniqueKmers = new HashSet<>();
|
||||
|
||||
// go through the sequences and determine which kmers aren't unique within each read
|
||||
int kmerSize = minKmerSize;
|
||||
|
|
@ -606,7 +396,7 @@ public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSamp
|
|||
* @return non-null Collection
|
||||
*/
|
||||
private Collection<SequenceForKmers> getAllPendingSequences() {
|
||||
final LinkedList<SequenceForKmers> result = new LinkedList<SequenceForKmers>();
|
||||
final LinkedList<SequenceForKmers> result = new LinkedList<>();
|
||||
for ( final List<SequenceForKmers> oneSampleWorth : pending.values() ) result.addAll(oneSampleWorth);
|
||||
return result;
|
||||
}
|
||||
|
|
@ -642,7 +432,7 @@ public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSamp
|
|||
buildGraphIfNecessary();
|
||||
|
||||
final SeqGraph seqGraph = new SeqGraph(kmerSize);
|
||||
final Map<MultiDeBruijnVertex, SeqVertex> vertexMap = new HashMap<MultiDeBruijnVertex, SeqVertex>();
|
||||
final Map<MultiDeBruijnVertex, SeqVertex> vertexMap = new HashMap<>();
|
||||
|
||||
|
||||
// create all of the equivalent seq graph vertices
|
||||
|
|
@ -683,52 +473,16 @@ public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSamp
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find vertex and its position in seqForKmers where we should start assembling seqForKmers
|
||||
*
|
||||
* @param seqForKmers the sequence we want to thread into the graph
|
||||
* @return a pair of the starting vertex and its position in seqForKmer
|
||||
*/
|
||||
protected Pair<MultiDeBruijnVertex, Integer> findStart(final SequenceForKmers seqForKmers) {
|
||||
final int uniqueStartPos = seqForKmers.isRef ? 0 : findUniqueStartPosition(seqForKmers.sequence, seqForKmers.start, seqForKmers.stop);
|
||||
|
||||
if ( uniqueStartPos == -1 )
|
||||
return null;
|
||||
|
||||
return getOrCreateKmerVertex(seqForKmers.sequence, uniqueStartPos, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find a starting point in sequence that begins a unique kmer among all kmers in the graph
|
||||
* @param sequence the sequence of bases
|
||||
* @param start the first base to use in sequence
|
||||
* @param stop the last base to use in sequence
|
||||
* @return the index into sequence that begins a unique kmer of size kmerSize, or -1 if none could be found
|
||||
*/
|
||||
private int findUniqueStartPosition(final byte[] sequence, final int start, final int stop) {
|
||||
for ( int i = start; i < stop - kmerSize; i++ ) {
|
||||
final Kmer kmer1 = new Kmer(sequence, i, kmerSize);
|
||||
if ( uniqueKmers.containsKey(kmer1) )
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the vertex for the kmer in sequence starting at start
|
||||
* @param sequence the sequence
|
||||
* @param start the position of the kmer start
|
||||
* @param allowRefSource if true, we will allow matches to the kmer that represents the reference starting kmer
|
||||
* @return a non-null vertex
|
||||
*/
|
||||
protected Pair<MultiDeBruijnVertex, Integer> getOrCreateKmerVertex(final byte[] sequence, final int start, final boolean allowRefSource) {
|
||||
private MultiDeBruijnVertex getOrCreateKmerVertex(final byte[] sequence, final int start) {
|
||||
final Kmer kmer = new Kmer(sequence, start, kmerSize);
|
||||
final MultiDeBruijnVertex vertex = getUniqueKmerVertex(kmer, allowRefSource);
|
||||
if ( vertex != null ) {
|
||||
return new Pair<>(vertex, start);
|
||||
} else {
|
||||
return new Pair<>(createVertex(kmer), start);
|
||||
}
|
||||
final MultiDeBruijnVertex vertex = getUniqueKmerVertex(kmer, true);
|
||||
return ( vertex != null ) ? vertex : createVertex(kmer);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -878,11 +632,11 @@ public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSamp
|
|||
|
||||
|
||||
/**
|
||||
* Constructs a read-threadingg-graph for a string representation.
|
||||
* Constructs a read-threading-graph for a string representation.
|
||||
*
|
||||
* <p>
|
||||
* Note: only used for testing.
|
||||
* Checkout {@link HaplotypeGraphUnitTest} for examples.
|
||||
* Checkout {@see HaplotypeGraphUnitTest} for examples.
|
||||
* </p>
|
||||
* @param s the string representation of the graph {@code null}.
|
||||
*/
|
||||
|
|
@ -913,7 +667,7 @@ public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSamp
|
|||
*
|
||||
* <p>
|
||||
* Note: this is done just for testing purposes.
|
||||
* Checkout {@link HaplotypeGraphUnitTest} for examples.
|
||||
* Checkout {@see HaplotypeGraphUnitTest} for examples.
|
||||
* </p>
|
||||
* @param str the string representation.
|
||||
*/
|
||||
|
|
@ -934,9 +688,9 @@ public class ReadThreadingGraph extends BaseGraph<MultiDeBruijnVertex, MultiSamp
|
|||
if (referenceFound) {
|
||||
if (isReference)
|
||||
throw new IllegalArgumentException("there are two reference paths");
|
||||
|
||||
} else
|
||||
referenceFound |= isReference;
|
||||
} else if ( isReference ) {
|
||||
referenceFound = true;
|
||||
}
|
||||
|
||||
// Divide each path into its elements getting a list of sequences and labels if applies:
|
||||
final String elementsString = pathMatcher.group(3);
|
||||
|
|
|
|||
|
|
@ -358,7 +358,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
final File outputVCFNoQD = executeTest("testQualByDepth calling without QD", specNoQD).getFirst().get(0);
|
||||
|
||||
final String baseAnn = String.format("-T VariantAnnotator -R %s -V %s", REF, outputVCFNoQD.getAbsolutePath()) + " --no_cmdline_in_header -o %s -L 20:10130000-10134800 -A QualByDepth";
|
||||
final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("4ccdbebcfd02be87ae5b4ad94666f011"));
|
||||
final WalkerTestSpec specAnn = new WalkerTestSpec(baseAnn, 1, Arrays.asList("388200d107fb47326df78a971a52698f"));
|
||||
specAnn.disableShadowBCF();
|
||||
final File outputVCFAnn = executeTest("testQualByDepth re-annotation of QD", specAnn).getFirst().get(0);
|
||||
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex1() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "65c316f1f3987d7bc94e887999920d45");
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "7278afd47e5851c954359441cac2f0b8");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
@ -88,7 +88,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
|
||||
"724a05b7df716647014f29c0fe86e071");
|
||||
"cbdd34c454d69b266e3681ddfc33c7a3");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "96f299a5cf411900b8eda3845c3ce465");
|
||||
HCTest(NA12878_BAM, "", "c208ef58d464465c68b5c26501122ad7");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -99,7 +99,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerGraphBasedSingleSample() {
|
||||
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "83fe0694621bc1e0240f6f79eb6d6999");
|
||||
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "049ba1794a1ce2b15566bb1e9431fccf");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -227,7 +227,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestDBSNPAnnotationWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||
Arrays.asList("0864904254b2fa757991f8c2dac4932d"));
|
||||
Arrays.asList("0998be22d7af4372247f5a0338f9446b"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -244,7 +244,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestDBSNPAnnotationWGSGraphBased() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||
Arrays.asList("df1f9410d23a550a143531ac0891f1dc"));
|
||||
Arrays.asList("1aeed297a3cb41940d83eac499a2ce07"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -284,7 +284,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestConservativePcrIndelModelWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
||||
Arrays.asList("616cc63d5a78765145914457dec475b0"));
|
||||
Arrays.asList("dcb38cb9280f2c3059a09d323db1c633"));
|
||||
executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -292,19 +292,4 @@ public class BaseGraphUnitTest extends BaseTest {
|
|||
final Set<SeqVertex> expectedSet = expected == null ? Collections.<SeqVertex>emptySet() : new HashSet<SeqVertex>(Arrays.asList(expected));
|
||||
Assert.assertEquals(actualSet, expectedSet);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testGetBases() {
|
||||
|
||||
final int kmerSize = 4;
|
||||
final String testString = "AATGGGGGCAATACTA";
|
||||
|
||||
final List<DeBruijnVertex> vertexes = new ArrayList<>();
|
||||
for ( int i = 0; i <= testString.length() - kmerSize; i++ ) {
|
||||
vertexes.add(new DeBruijnVertex(testString.substring(i, i + kmerSize)));
|
||||
}
|
||||
|
||||
final String result = new String(new TestGraph().getBasesForPath(vertexes));
|
||||
Assert.assertEquals(result, testString.substring(kmerSize - 1));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,230 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class DanglingChainMergingGraphUnitTest extends BaseTest {
|
||||
|
||||
public static byte[] getBytes(final String alignment) {
|
||||
return alignment.replace("-","").getBytes();
|
||||
}
|
||||
|
||||
@DataProvider(name = "DanglingTails")
|
||||
public Object[][] makeDanglingTailsData() {
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
// add 1M to the expected CIGAR because it includes the previous (common) base too
|
||||
tests.add(new Object[]{"AAAAAAAAAA", "CAAA", "5M", true, 3}); // incomplete haplotype
|
||||
tests.add(new Object[]{"AAAAAAAAAA", "CAAAAAAAAAA", "1M1I10M", true, 10}); // insertion
|
||||
tests.add(new Object[]{"CCAAAAAAAAAA", "AAAAAAAAAA", "1M2D10M", true, 10}); // deletion
|
||||
tests.add(new Object[]{"AAAAAAAA", "CAAAAAAA", "9M", true, 7}); // 1 snp
|
||||
tests.add(new Object[]{"AAAAAAAA", "CAAGATAA", "9M", true, 2}); // several snps
|
||||
tests.add(new Object[]{"AAAAA", "C", "1M4D1M", false, -1}); // funky SW alignment
|
||||
tests.add(new Object[]{"AAAAA", "CA", "1M3D2M", false, 1}); // very little data
|
||||
tests.add(new Object[]{"AAAAAAA", "CAAAAAC", "8M", true, -1}); // ends in mismatch
|
||||
tests.add(new Object[]{"AAAAAA", "CGAAAACGAA", "1M2I4M2I2M", false, 0}); // alignment is too complex
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "DanglingTails")
|
||||
public void testDanglingTails(final String refEnd,
|
||||
final String altEnd,
|
||||
final String cigar,
|
||||
final boolean cigarIsGood,
|
||||
final int mergePointDistanceFromSink) {
|
||||
|
||||
final int kmerSize = 15;
|
||||
|
||||
// construct the haplotypes
|
||||
final String commonPrefix = "AAAAAAAAAACCCCCCCCCCGGGGGGGGGGTTTTTTTTTT";
|
||||
final String ref = commonPrefix + refEnd;
|
||||
final String alt = commonPrefix + altEnd;
|
||||
|
||||
// create the graph and populate it
|
||||
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize);
|
||||
rtgraph.addSequence("ref", ref.getBytes(), true);
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(alt.getBytes(), Utils.dupBytes((byte) 30, alt.length()), alt.length() + "M");
|
||||
rtgraph.addRead(read);
|
||||
rtgraph.buildGraphIfNecessary();
|
||||
|
||||
// confirm that we have just a single dangling tail
|
||||
MultiDeBruijnVertex altSink = null;
|
||||
for ( final MultiDeBruijnVertex v : rtgraph.vertexSet() ) {
|
||||
if ( rtgraph.isSink(v) && !rtgraph.isReferenceNode(v) ) {
|
||||
Assert.assertTrue(altSink == null, "We found more than one non-reference sink");
|
||||
altSink = v;
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertTrue(altSink != null, "We did not find a non-reference sink");
|
||||
|
||||
// confirm that the SW alignment agrees with our expectations
|
||||
final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstDownwardsReferencePath(altSink, 0);
|
||||
|
||||
if ( result == null ) {
|
||||
Assert.assertFalse(cigarIsGood);
|
||||
return;
|
||||
}
|
||||
|
||||
Assert.assertTrue(cigar.equals(result.cigar.toString()), "SW generated cigar = " + result.cigar.toString());
|
||||
|
||||
// confirm that the goodness of the cigar agrees with our expectations
|
||||
Assert.assertEquals(rtgraph.cigarIsOkayToMerge(result.cigar, false, true), cigarIsGood);
|
||||
|
||||
// confirm that the tail merging works as expected
|
||||
if ( cigarIsGood ) {
|
||||
final int mergeResult = rtgraph.mergeDanglingTail(result);
|
||||
Assert.assertTrue(mergeResult == 1 || mergePointDistanceFromSink == -1);
|
||||
|
||||
// confirm that we created the appropriate edge
|
||||
if ( mergePointDistanceFromSink >= 0 ) {
|
||||
MultiDeBruijnVertex v = altSink;
|
||||
for ( int i = 0; i < mergePointDistanceFromSink; i++ ) {
|
||||
if ( rtgraph.inDegreeOf(v) != 1 )
|
||||
Assert.fail("Encountered vertex with multiple edges");
|
||||
v = rtgraph.getEdgeSource(rtgraph.incomingEdgeOf(v));
|
||||
}
|
||||
Assert.assertTrue(rtgraph.outDegreeOf(v) > 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testGetBasesForPath() {
|
||||
|
||||
final int kmerSize = 4;
|
||||
final String testString = "AATGGGGCAATACTA";
|
||||
|
||||
final ReadThreadingGraph graph = new ReadThreadingGraph(kmerSize);
|
||||
graph.addSequence(testString.getBytes(), true);
|
||||
graph.buildGraphIfNecessary();
|
||||
|
||||
final List<MultiDeBruijnVertex> vertexes = new ArrayList<>();
|
||||
MultiDeBruijnVertex v = graph.getReferenceSourceVertex();
|
||||
while ( v != null ) {
|
||||
vertexes.add(v);
|
||||
v = graph.getNextReferenceVertex(v);
|
||||
}
|
||||
|
||||
final String result = new String(graph.getBasesForPath(vertexes, false));
|
||||
Assert.assertEquals(result, testString);
|
||||
}
|
||||
|
||||
@DataProvider(name = "DanglingHeads")
|
||||
public Object[][] makeDanglingHeadsData() {
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
// add 1M to the expected CIGAR because it includes the last (common) base too
|
||||
tests.add(new Object[]{"XXXXXXXAACCGGTTACGT", "AAYCGGTTACGT", "8M", true}); // 1 snp
|
||||
tests.add(new Object[]{"XXXAACCGGTTACGT", "XAAACCGGTTACGT", "7M", false}); // 1 snp
|
||||
tests.add(new Object[]{"XXXXXXXAACCGGTTACGT", "XAACGGTTACGT", "4M1D4M", false}); // deletion
|
||||
tests.add(new Object[]{"XXXXXXXAACCGGTTACGT", "AYYCGGTTACGT", "8M", true}); // 2 snps
|
||||
tests.add(new Object[]{"XXXXXXXAACCGGTTACGTAA", "AYCYGGTTACGTAA", "9M", true}); // 2 snps
|
||||
tests.add(new Object[]{"XXXXXXXAACCGGTTACGT", "AYCGGTTACGT", "7M", true}); // very little data
|
||||
tests.add(new Object[]{"XXXXXXXAACCGGTTACGT", "YCCGGTTACGT", "6M", true}); // begins in mismatch
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "DanglingHeads")
|
||||
public void testDanglingHeads(final String ref,
|
||||
final String alt,
|
||||
final String cigar,
|
||||
final boolean shouldBeMerged) {
|
||||
|
||||
final int kmerSize = 5;
|
||||
|
||||
// create the graph and populate it
|
||||
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize);
|
||||
rtgraph.addSequence("ref", ref.getBytes(), true);
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(alt.getBytes(), Utils.dupBytes((byte) 30, alt.length()), alt.length() + "M");
|
||||
rtgraph.addRead(read);
|
||||
rtgraph.buildGraphIfNecessary();
|
||||
|
||||
// confirm that we have just a single dangling head
|
||||
MultiDeBruijnVertex altSource = null;
|
||||
for ( final MultiDeBruijnVertex v : rtgraph.vertexSet() ) {
|
||||
if ( rtgraph.isSource(v) && !rtgraph.isReferenceNode(v) ) {
|
||||
Assert.assertTrue(altSource == null, "We found more than one non-reference source");
|
||||
altSource = v;
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertTrue(altSource != null, "We did not find a non-reference source");
|
||||
|
||||
// confirm that the SW alignment agrees with our expectations
|
||||
final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstUpwardsReferencePath(altSource, 0);
|
||||
|
||||
if ( result == null ) {
|
||||
Assert.assertFalse(shouldBeMerged);
|
||||
return;
|
||||
}
|
||||
|
||||
Assert.assertTrue(cigar.equals(result.cigar.toString()), "SW generated cigar = " + result.cigar.toString());
|
||||
|
||||
// confirm that the tail merging works as expected
|
||||
final int mergeResult = rtgraph.mergeDanglingHead(result);
|
||||
Assert.assertTrue(mergeResult > 0 || !shouldBeMerged);
|
||||
|
||||
// confirm that we created the appropriate bubble in the graph only if expected
|
||||
rtgraph.cleanNonRefPaths();
|
||||
final SeqGraph seqGraph = rtgraph.convertToSequenceGraph();
|
||||
List<Path<SeqVertex,BaseEdge>> paths = new KBestPaths<SeqVertex,BaseEdge>().getKBestPaths(seqGraph, seqGraph.getReferenceSourceVertex(), seqGraph.getReferenceSinkVertex());
|
||||
Assert.assertEquals(paths.size(), shouldBeMerged ? 2 : 1);
|
||||
}
|
||||
}
|
||||
|
|
@ -150,6 +150,29 @@ public class ReadThreadingAssemblerUnitTest extends BaseTest {
|
|||
assertSingleBubble(assembler, ref, "ACAGCTGA");
|
||||
}
|
||||
|
||||
@Test(enabled = ! DEBUG)
|
||||
public void testMismatchInFirstKmer() {
|
||||
final TestAssembler assembler = new TestAssembler(3);
|
||||
final String ref = "ACAACTGA";
|
||||
final String alt = "AGCTGA";
|
||||
assembler.addSequence(ref.getBytes(), true);
|
||||
assembler.addSequence(alt.getBytes(), false);
|
||||
|
||||
final SeqGraph graph = assembler.assemble();
|
||||
graph.simplifyGraph();
|
||||
graph.removeSingletonOrphanVertices();
|
||||
final Set<SeqVertex> sources = graph.getSources();
|
||||
final Set<SeqVertex> sinks = graph.getSinks();
|
||||
|
||||
Assert.assertEquals(sources.size(), 1);
|
||||
Assert.assertEquals(sinks.size(), 1);
|
||||
Assert.assertNotNull(graph.getReferenceSourceVertex());
|
||||
Assert.assertNotNull(graph.getReferenceSinkVertex());
|
||||
|
||||
final List<Path<SeqVertex,BaseEdge>> paths = new KBestPaths<SeqVertex,BaseEdge>().getKBestPaths(graph);
|
||||
Assert.assertEquals(paths.size(), 2);
|
||||
}
|
||||
|
||||
@Test(enabled = ! DEBUG)
|
||||
public void testStartInMiddle() {
|
||||
final TestAssembler assembler = new TestAssembler(3);
|
||||
|
|
|
|||
|
|
@ -216,88 +216,6 @@ public class ReadThreadingGraphUnitTest extends BaseTest {
|
|||
Assert.assertEquals(pathFinder.getKBestPaths(graph, length, graph.getReferenceSourceVertex(), graph.getReferenceSinkVertex()).size(), 1);
|
||||
}
|
||||
|
||||
@DataProvider(name = "DanglingTails")
|
||||
public Object[][] makeDanglingTailsData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
// add 1M to the expected CIGAR because it includes the previous (common) base too
|
||||
tests.add(new Object[]{"AAAAAAAAAA", "CAAA", "5M", true, 3}); // incomplete haplotype
|
||||
tests.add(new Object[]{"AAAAAAAAAA", "CAAAAAAAAAA", "1M1I10M", true, 10}); // insertion
|
||||
tests.add(new Object[]{"CCAAAAAAAAAA", "AAAAAAAAAA", "1M2D10M", true, 10}); // deletion
|
||||
tests.add(new Object[]{"AAAAAAAA", "CAAAAAAA", "9M", true, 7}); // 1 snp
|
||||
tests.add(new Object[]{"AAAAAAAA", "CAAGATAA", "9M", true, 2}); // several snps
|
||||
tests.add(new Object[]{"AAAAA", "C", "1M4D1M", false, -1}); // funky SW alignment
|
||||
tests.add(new Object[]{"AAAAA", "CA", "1M3D2M", false, 1}); // very little data
|
||||
tests.add(new Object[]{"AAAAAAA", "CAAAAAC", "8M", true, -1}); // ends in mismatch
|
||||
tests.add(new Object[]{"AAAAAA", "CGAAAACGAA", "1M2I4M2I2M", false, 0}); // alignment is too complex
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "DanglingTails", enabled = !DEBUG)
|
||||
public void testDanglingTails(final String refEnd,
|
||||
final String altEnd,
|
||||
final String cigar,
|
||||
final boolean cigarIsGood,
|
||||
final int mergePointDistanceFromSink) {
|
||||
|
||||
final int kmerSize = 15;
|
||||
|
||||
// construct the haplotypes
|
||||
final String commonPrefix = "AAAAAAAAAACCCCCCCCCCGGGGGGGGGGTTTTTTTTTT";
|
||||
final String ref = commonPrefix + refEnd;
|
||||
final String alt = commonPrefix + altEnd;
|
||||
|
||||
// create the graph and populate it
|
||||
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize);
|
||||
rtgraph.addSequence("ref", ref.getBytes(), true);
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(alt.getBytes(), Utils.dupBytes((byte) 30, alt.length()), alt.length() + "M");
|
||||
rtgraph.addRead(read);
|
||||
rtgraph.buildGraphIfNecessary();
|
||||
|
||||
// confirm that we have just a single dangling tail
|
||||
MultiDeBruijnVertex altSink = null;
|
||||
for ( final MultiDeBruijnVertex v : rtgraph.vertexSet() ) {
|
||||
if ( rtgraph.isSink(v) && !rtgraph.isReferenceNode(v) ) {
|
||||
Assert.assertTrue(altSink == null, "We found more than one non-reference sink");
|
||||
altSink = v;
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertTrue(altSink != null, "We did not find a non-reference sink");
|
||||
|
||||
// confirm that the SW alignment agrees with our expectations
|
||||
final ReadThreadingGraph.DanglingTailMergeResult result = rtgraph.generateCigarAgainstReferencePath(altSink, 0);
|
||||
|
||||
if ( result == null ) {
|
||||
Assert.assertFalse(cigarIsGood);
|
||||
return;
|
||||
}
|
||||
|
||||
Assert.assertTrue(cigar.equals(result.cigar.toString()), "SW generated cigar = " + result.cigar.toString());
|
||||
|
||||
// confirm that the goodness of the cigar agrees with our expectations
|
||||
Assert.assertEquals(rtgraph.cigarIsOkayToMerge(result.cigar), cigarIsGood);
|
||||
|
||||
// confirm that the tail merging works as expected
|
||||
if ( cigarIsGood ) {
|
||||
final int mergeResult = rtgraph.mergeDanglingTail(result);
|
||||
Assert.assertTrue(mergeResult == 1 || mergePointDistanceFromSink == -1);
|
||||
|
||||
// confirm that we created the appropriate edge
|
||||
if ( mergePointDistanceFromSink >= 0 ) {
|
||||
MultiDeBruijnVertex v = altSink;
|
||||
for ( int i = 0; i < mergePointDistanceFromSink; i++ ) {
|
||||
if ( rtgraph.inDegreeOf(v) != 1 )
|
||||
Assert.fail("Encountered vertex with multiple sources");
|
||||
v = rtgraph.getEdgeSource(rtgraph.incomingEdgeOf(v));
|
||||
}
|
||||
Assert.assertTrue(rtgraph.outDegreeOf(v) > 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// TODO -- update to use determineKmerSizeAndNonUniques directly
|
||||
// @DataProvider(name = "KmerSizeData")
|
||||
// public Object[][] makeKmerSizeDataProvider() {
|
||||
|
|
@ -340,5 +258,4 @@ public class ReadThreadingGraphUnitTest extends BaseTest {
|
|||
// assertNonUniques(assembler, expectedNonUniques.toArray(new String[]{}));
|
||||
// }
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue