Merge pull request #559 from broadinstitute/vrr_assembly_graph_edge_info_revise
Improved criteria to select best haplotypes out from the assembly graph.
This commit is contained in:
commit
c1b4390691
|
|
@ -213,16 +213,18 @@ public abstract class LocalAssemblyEngine {
|
||||||
final Map<SeqGraph,AssemblyResult> assemblyResultByGraph, final AssemblyResultSet assemblyResultSet) {
|
final Map<SeqGraph,AssemblyResult> assemblyResultByGraph, final AssemblyResultSet assemblyResultSet) {
|
||||||
// add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes
|
// add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes
|
||||||
final Set<Haplotype> returnHaplotypes = new LinkedHashSet<>();
|
final Set<Haplotype> returnHaplotypes = new LinkedHashSet<>();
|
||||||
returnHaplotypes.add( refHaplotype );
|
|
||||||
|
|
||||||
final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
|
final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
|
||||||
|
final ArrayList<KBestHaplotypeFinder> finders = new ArrayList<>(graphs.size());
|
||||||
|
|
||||||
for( final SeqGraph graph : graphs ) {
|
for( final SeqGraph graph : graphs ) {
|
||||||
final SeqVertex source = graph.getReferenceSourceVertex();
|
final SeqVertex source = graph.getReferenceSourceVertex();
|
||||||
final SeqVertex sink = graph.getReferenceSinkVertex();
|
final SeqVertex sink = graph.getReferenceSinkVertex();
|
||||||
if ( source == null || sink == null ) throw new IllegalArgumentException("Both source and sink cannot be null but got " + source + " and sink " + sink + " for graph "+ graph);
|
if ( source == null || sink == null ) throw new IllegalArgumentException("Both source and sink cannot be null but got " + source + " and sink " + sink + " for graph "+ graph);
|
||||||
final KBestHaplotypeFinder haplotypeFinder = new KBestHaplotypeFinder(graph,source,sink);
|
final KBestHaplotypeFinder haplotypeFinder = new KBestHaplotypeFinder(graph,source,sink);
|
||||||
|
finders.add(haplotypeFinder);
|
||||||
final Iterator<KBestHaplotype> bestHaplotypes = haplotypeFinder.iterator(numBestHaplotypesPerGraph);
|
final Iterator<KBestHaplotype> bestHaplotypes = haplotypeFinder.iterator(numBestHaplotypesPerGraph);
|
||||||
|
|
||||||
while (bestHaplotypes.hasNext()) {
|
while (bestHaplotypes.hasNext()) {
|
||||||
final KBestHaplotype kBestHaplotype = bestHaplotypes.next();
|
final KBestHaplotype kBestHaplotype = bestHaplotypes.next();
|
||||||
final Haplotype h = kBestHaplotype.haplotype();
|
final Haplotype h = kBestHaplotype.haplotype();
|
||||||
|
|
@ -256,9 +258,19 @@ public abstract class LocalAssemblyEngine {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Make sure that the ref haplotype is amongst the return haplotypes and calculate its score as
|
||||||
if ( returnHaplotypes.size() < returnHaplotypes.size() )
|
// the first returned by any finder.
|
||||||
logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against at " + refLoc);
|
if (!returnHaplotypes.contains(refHaplotype)) {
|
||||||
|
double refScore = Double.NaN;
|
||||||
|
for (final KBestHaplotypeFinder finder : finders) {
|
||||||
|
final double candidate = finder.score(refHaplotype);
|
||||||
|
if (Double.isNaN(candidate)) continue;
|
||||||
|
refScore = candidate;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
refHaplotype.setScore(refScore);
|
||||||
|
returnHaplotypes.add(refHaplotype);
|
||||||
|
}
|
||||||
|
|
||||||
if( debug ) {
|
if( debug ) {
|
||||||
if( returnHaplotypes.size() > 1 ) {
|
if( returnHaplotypes.size() > 1 ) {
|
||||||
|
|
|
||||||
|
|
@ -45,21 +45,21 @@
|
||||||
*/
|
*/
|
||||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.PriorityQueue;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* K-best sub-haplotype finder that selects the best solutions out of a collection of sub-haplotype finders.
|
* K-best sub-haplotype finder that selects the best solutions out of a collection of sub-haplotype finders.
|
||||||
*
|
*
|
||||||
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
|
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
|
||||||
*/
|
*/
|
||||||
class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder {
|
class AggregatedSubHaplotypeFinder<F extends KBestSubHaplotypeFinder> implements KBestSubHaplotypeFinder {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Collection of subFinders that provided the actual solutions.
|
* Collection of subFinders that provided the actual solutions.
|
||||||
*/
|
*/
|
||||||
private final Collection<? extends KBestSubHaplotypeFinder> subFinders;
|
protected final Collection<F> subFinders;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Flag indicating whether the sub-finders have been processed or not.
|
* Flag indicating whether the sub-finders have been processed or not.
|
||||||
|
|
@ -89,17 +89,53 @@ class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder {
|
||||||
* Creates a new aggregated sub-haplotype finder given its sub-finders.
|
* Creates a new aggregated sub-haplotype finder given its sub-finders.
|
||||||
* @param finders set of sub-finders.
|
* @param finders set of sub-finders.
|
||||||
*/
|
*/
|
||||||
public AggregatedSubHaplotypeFinder(final Collection<? extends KBestSubHaplotypeFinder> finders) {
|
public AggregatedSubHaplotypeFinder(final Collection<F> finders) {
|
||||||
if (finders == null) throw new IllegalArgumentException("finder collection cannot be null");
|
if (finders == null) throw new IllegalArgumentException("finder collection cannot be null");
|
||||||
this.subFinders = finders;
|
this.subFinders = finders;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String id() {
|
||||||
|
final StringBuilder resultBuilder = new StringBuilder();
|
||||||
|
for (final KBestSubHaplotypeFinder subFinder : subFinders)
|
||||||
|
resultBuilder.append(subFinder.id());
|
||||||
|
return resultBuilder.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String label() {
|
||||||
|
return "<OR>";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<Pair<? extends KBestSubHaplotypeFinder, String>> subFinderLabels() {
|
||||||
|
final int subFinderCount = subFinders.size();
|
||||||
|
final String edgeCost = String.format("%.2f",-Math.log10((double) subFinderCount));
|
||||||
|
final Set<Pair<? extends KBestSubHaplotypeFinder,String>> result = new LinkedHashSet<>(subFinderCount);
|
||||||
|
for (final KBestSubHaplotypeFinder subFinder : subFinders)
|
||||||
|
result.add(new Pair<>(subFinder,edgeCost));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getCount() {
|
public int getCount() {
|
||||||
processSubFindersIfNeeded();
|
processSubFindersIfNeeded();
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double score(final byte[] bases, final int offset, final int length) {
|
||||||
|
if (bases == null) throw new IllegalArgumentException("bases cannot be null");
|
||||||
|
if (offset < 0) throw new IllegalArgumentException("the offset cannot be negative");
|
||||||
|
if (length < 0) throw new IllegalArgumentException("the length cannot be negative");
|
||||||
|
if (offset + length > bases.length) throw new IllegalArgumentException("the offset and length go beyond the array size");
|
||||||
|
for (final KBestSubHaplotypeFinder subFinder : subFinders) {
|
||||||
|
final double score = subFinder.score(bases,offset,length);
|
||||||
|
if (!Double.isNaN(score)) return score;
|
||||||
|
}
|
||||||
|
return Double.NaN;
|
||||||
|
}
|
||||||
|
|
||||||
private void processSubFindersIfNeeded() {
|
private void processSubFindersIfNeeded() {
|
||||||
if (processedSubFinders) return;
|
if (processedSubFinders) return;
|
||||||
|
|
||||||
|
|
@ -144,6 +180,11 @@ class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder {
|
||||||
return rankedSubHaplotype.get(k);
|
return rankedSubHaplotype.get(k);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isReference() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Custom implementation of {@link KBestHaplotype} to encapsulate sub-finder results.
|
* Custom implementation of {@link KBestHaplotype} to encapsulate sub-finder results.
|
||||||
*/
|
*/
|
||||||
|
|
@ -167,7 +208,7 @@ class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int score() {
|
public double score() {
|
||||||
return result.score();
|
return result.score();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,7 @@ import com.google.java.contract.Requires;
|
||||||
import org.apache.commons.lang.ArrayUtils;
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.jgrapht.EdgeFactory;
|
import org.jgrapht.EdgeFactory;
|
||||||
|
import org.jgrapht.alg.CycleDetector;
|
||||||
import org.jgrapht.graph.DefaultDirectedGraph;
|
import org.jgrapht.graph.DefaultDirectedGraph;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -146,6 +147,39 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
|
||||||
return set;
|
return set;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert this kmer graph to a simple sequence graph.
|
||||||
|
*
|
||||||
|
* Each kmer suffix shows up as a distinct SeqVertex, attached in the same structure as in the kmer
|
||||||
|
* graph. Nodes that are sources are mapped to SeqVertex nodes that contain all of their sequence
|
||||||
|
*
|
||||||
|
* @return a newly allocated SequenceGraph
|
||||||
|
*/
|
||||||
|
public SeqGraph convertToSequenceGraph() {
|
||||||
|
|
||||||
|
final SeqGraph seqGraph = new SeqGraph(kmerSize);
|
||||||
|
final Map<V, SeqVertex> vertexMap = new HashMap<>();
|
||||||
|
|
||||||
|
|
||||||
|
// create all of the equivalent seq graph vertices
|
||||||
|
for ( final V dv : vertexSet() ) {
|
||||||
|
final SeqVertex sv = new SeqVertex(dv.getAdditionalSequence(isSource(dv)));
|
||||||
|
sv.setAdditionalInfo(dv.additionalInfo());
|
||||||
|
vertexMap.put(dv, sv);
|
||||||
|
seqGraph.addVertex(sv);
|
||||||
|
}
|
||||||
|
|
||||||
|
// walk through the nodes and connect them to their equivalent seq vertices
|
||||||
|
for( final E e : edgeSet() ) {
|
||||||
|
final SeqVertex seqInV = vertexMap.get(getEdgeSource(e));
|
||||||
|
final SeqVertex seqOutV = vertexMap.get(getEdgeTarget(e));
|
||||||
|
//logger.info("Adding edge " + seqInV + " -> " + seqOutV);
|
||||||
|
seqGraph.addEdge(seqInV, seqOutV, new BaseEdge(e.isRef(), e.getMultiplicity()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return seqGraph;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pull out the additional sequence implied by traversing this node in the graph
|
* Pull out the additional sequence implied by traversing this node in the graph
|
||||||
* @param v the vertex from which to pull out the additional base sequence
|
* @param v the vertex from which to pull out the additional base sequence
|
||||||
|
|
@ -712,4 +746,13 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
|
||||||
if (!containsVertex(vertex)) return false;
|
if (!containsVertex(vertex)) return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks for the presence of directed cycles in the graph.
|
||||||
|
*
|
||||||
|
* @return {@code true} if the graph has cycles, {@code false} otherwise.
|
||||||
|
*/
|
||||||
|
public boolean hasCycles() {
|
||||||
|
return new CycleDetector<>(this).detectCycles();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -122,7 +122,7 @@ public class CommonSuffixSplitter {
|
||||||
} else {
|
} else {
|
||||||
incomingTarget = prefixV;
|
incomingTarget = prefixV;
|
||||||
graph.addVertex(prefixV);
|
graph.addVertex(prefixV);
|
||||||
graph.addEdge(prefixV, suffixV, new BaseEdge(out.isRef(), 0));
|
graph.addEdge(prefixV, suffixV, new BaseEdge(out.isRef(), 1));
|
||||||
edgesToRemove.add(out);
|
edgesToRemove.add(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,11 @@
|
||||||
*/
|
*/
|
||||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents a trivial k-best sub haplotype finder with no solutions.
|
* Represents a trivial k-best sub haplotype finder with no solutions.
|
||||||
*
|
*
|
||||||
|
|
@ -65,6 +70,21 @@ final class DeadEndKBestSubHaplotypeFinder implements KBestSubHaplotypeFinder {
|
||||||
protected DeadEndKBestSubHaplotypeFinder() {
|
protected DeadEndKBestSubHaplotypeFinder() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String id() {
|
||||||
|
return "<DEAD>";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String label() {
|
||||||
|
return "<DEAD>";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<Pair<? extends KBestSubHaplotypeFinder, String>> subFinderLabels() {
|
||||||
|
return Collections.emptySet();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getCount() {
|
public int getCount() {
|
||||||
return 0;
|
return 0;
|
||||||
|
|
@ -77,4 +97,18 @@ final class DeadEndKBestSubHaplotypeFinder implements KBestSubHaplotypeFinder {
|
||||||
else
|
else
|
||||||
throw new IllegalArgumentException("k cannot be equal or greater to the haplotype count");
|
throw new IllegalArgumentException("k cannot be equal or greater to the haplotype count");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isReference() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double score(final byte[] bases, final int offset, final int length) {
|
||||||
|
if (bases == null) throw new IllegalArgumentException("bases cannot be null");
|
||||||
|
if (offset < 0) throw new IllegalArgumentException("the offset cannot be negative");
|
||||||
|
if (length < 0) throw new IllegalArgumentException("the length cannot be negative");
|
||||||
|
if (offset + length > bases.length) throw new IllegalArgumentException("the offset and length go beyond the array size");
|
||||||
|
return Double.NaN;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,12 @@
|
||||||
*/
|
*/
|
||||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Trivial k-best sub-haplotype finder where the source and sink vertex are the same one.
|
* Trivial k-best sub-haplotype finder where the source and sink vertex are the same one.
|
||||||
*
|
*
|
||||||
|
|
@ -67,6 +73,21 @@ class EmptyPathHaplotypeFinderNode implements KBestSubHaplotypeFinder {
|
||||||
singleHaplotypePath = new MyBestHaplotypePath(graph,vertex);
|
singleHaplotypePath = new MyBestHaplotypePath(graph,vertex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String id() {
|
||||||
|
return "v" + singleHaplotypePath.head().getId();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String label() {
|
||||||
|
return singleHaplotypePath.head().getSequenceString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<Pair<? extends KBestSubHaplotypeFinder, String>> subFinderLabels() {
|
||||||
|
return Collections.emptySet();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getCount() {
|
public int getCount() {
|
||||||
return 1;
|
return 1;
|
||||||
|
|
@ -81,6 +102,24 @@ class EmptyPathHaplotypeFinderNode implements KBestSubHaplotypeFinder {
|
||||||
return singleHaplotypePath;
|
return singleHaplotypePath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isReference() {
|
||||||
|
return singleHaplotypePath.isReference();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double score(final byte[] bases, final int offset, final int length) {
|
||||||
|
if (bases == null) throw new IllegalArgumentException("bases cannot be null");
|
||||||
|
if (offset < 0) throw new IllegalArgumentException("the offset cannot be negative");
|
||||||
|
if (length < 0) throw new IllegalArgumentException("the length cannot be negative");
|
||||||
|
if (offset + length > bases.length) throw new IllegalArgumentException("the offset and length go beyond the array size");
|
||||||
|
final byte[] vertexBases = singleHaplotypePath.head().getSequence();
|
||||||
|
if (length != vertexBases.length)
|
||||||
|
return Double.NaN;
|
||||||
|
else
|
||||||
|
return Utils.equalRange(bases, offset, vertexBases, 0, length)? 0 : Double.NaN;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Custom extension of {@link KBestHaplotype} that implements the single solution behaviour.
|
* Custom extension of {@link KBestHaplotype} that implements the single solution behaviour.
|
||||||
*/
|
*/
|
||||||
|
|
@ -120,7 +159,7 @@ class EmptyPathHaplotypeFinderNode implements KBestSubHaplotypeFinder {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int score() {
|
public double score() {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -68,7 +68,7 @@ public abstract class KBestHaplotype implements Comparable<KBestHaplotype> {
|
||||||
*
|
*
|
||||||
* @return 0 or greater.
|
* @return 0 or greater.
|
||||||
*/
|
*/
|
||||||
public abstract int score();
|
public abstract double score();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Indicates whether this result is the reference haplotype.
|
* Indicates whether this result is the reference haplotype.
|
||||||
|
|
@ -122,6 +122,8 @@ public abstract class KBestHaplotype implements Comparable<KBestHaplotype> {
|
||||||
public Haplotype haplotype() {
|
public Haplotype haplotype() {
|
||||||
if (haplotype != null) return haplotype;
|
if (haplotype != null) return haplotype;
|
||||||
haplotype = new Haplotype(bases(),isReference());
|
haplotype = new Haplotype(bases(),isReference());
|
||||||
|
if (score() > 0)
|
||||||
|
throw new IllegalStateException("score cannot be greater than 0: " + score());
|
||||||
haplotype.setScore(score());
|
haplotype.setScore(score());
|
||||||
return haplotype;
|
return haplotype;
|
||||||
}
|
}
|
||||||
|
|
@ -152,7 +154,35 @@ public abstract class KBestHaplotype implements Comparable<KBestHaplotype> {
|
||||||
*/
|
*/
|
||||||
public int compareTo(final KBestHaplotype other) {
|
public int compareTo(final KBestHaplotype other) {
|
||||||
if (other == null) throw new IllegalArgumentException("the other object cannot be null");
|
if (other == null) throw new IllegalArgumentException("the other object cannot be null");
|
||||||
return - 1 * (score() - other.score());
|
return - Double.compare(score(), other.score());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return haplotype().hashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(final Object other) {
|
||||||
|
return other == null ? false: (other instanceof KBestHaplotype ? equals((KBestHaplotype)other) : false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return haplotype().toString() + " Score = " + score();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks whether both solutions are equal.
|
||||||
|
* <p>
|
||||||
|
* Both solutions are considered equal when the underlying haplotypes are equal. The path on the respective
|
||||||
|
* graph might deffer though.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @return {@code true} iff both haplotypes are the same (considering the ref state).
|
||||||
|
*/
|
||||||
|
protected boolean equals(final KBestHaplotype other) {
|
||||||
|
return haplotype().equals(other.haplotype(),false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -45,8 +45,13 @@
|
||||||
*/
|
*/
|
||||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.haplotype.Haplotype;
|
||||||
import org.jgrapht.alg.CycleDetector;
|
import org.jgrapht.alg.CycleDetector;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.PrintWriter;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -233,7 +238,7 @@ public class KBestHaplotypeFinder extends AbstractList<KBestHaplotype> implement
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public KBestHaplotype get(int index) {
|
public KBestHaplotype get(final int index) {
|
||||||
if (index < 0 || index >= size())
|
if (index < 0 || index >= size())
|
||||||
throw new IndexOutOfBoundsException();
|
throw new IndexOutOfBoundsException();
|
||||||
return topFinder.getKBest(index);
|
return topFinder.getKBest(index);
|
||||||
|
|
@ -305,28 +310,28 @@ public class KBestHaplotypeFinder extends AbstractList<KBestHaplotype> implement
|
||||||
/**
|
/**
|
||||||
* Creates a finder from a vertex.
|
* Creates a finder from a vertex.
|
||||||
*
|
*
|
||||||
* @param source the source vertex for the finder.
|
* @param vertex the source vertex for the finder.
|
||||||
*
|
*
|
||||||
* @return never {@code null}, perhaps a finder that returns no haplotypes though.
|
* @return never {@code null}, perhaps a finder that returns no haplotypes though.
|
||||||
*/
|
*/
|
||||||
protected KBestSubHaplotypeFinder createVertexFinder(final SeqVertex source) {
|
protected KBestSubHaplotypeFinder createVertexFinder(final SeqVertex vertex) {
|
||||||
KBestSubHaplotypeFinder node = finderByVertex.get(source);
|
KBestSubHaplotypeFinder finder = finderByVertex.get(vertex);
|
||||||
if (node == null) {
|
if (finder == null) {
|
||||||
if (sinks.contains(source))
|
if (sinks.contains(vertex))
|
||||||
node = new EmptyPathHaplotypeFinderNode(graph,source);
|
finder = new EmptyPathHaplotypeFinderNode(graph,vertex);
|
||||||
else {
|
else {
|
||||||
final Set<BaseEdge> outgoingEdges = graph.outgoingEdgesOf(source);
|
final Set<BaseEdge> outgoingEdges = graph.outgoingEdgesOf(vertex);
|
||||||
if (outgoingEdges.isEmpty())
|
if (outgoingEdges.isEmpty())
|
||||||
node = DeadEndKBestSubHaplotypeFinder.INSTANCE;
|
finder = DeadEndKBestSubHaplotypeFinder.INSTANCE;
|
||||||
else {
|
else {
|
||||||
final Map<BaseEdge,KBestSubHaplotypeFinder> undeadChildren = createChildrenFinders(outgoingEdges);
|
final Map<BaseEdge,KBestSubHaplotypeFinder> undeadChildren = createChildrenFinders(outgoingEdges);
|
||||||
node = undeadChildren.isEmpty() ? DeadEndKBestSubHaplotypeFinder.INSTANCE :
|
finder = undeadChildren.isEmpty() ? DeadEndKBestSubHaplotypeFinder.INSTANCE :
|
||||||
new RecursiveSubHaplotypeFinder(source,undeadChildren);
|
new RecursiveSubHaplotypeFinder(graph,vertex,undeadChildren);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
finderByVertex.put(source, node);
|
finderByVertex.put(vertex, finder);
|
||||||
}
|
}
|
||||||
return node;
|
return finder;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -340,7 +345,7 @@ public class KBestHaplotypeFinder extends AbstractList<KBestHaplotype> implement
|
||||||
* @return never {@code null}, perhaps an empty map if there is no children with valid paths to any sink for this
|
* @return never {@code null}, perhaps an empty map if there is no children with valid paths to any sink for this
|
||||||
* finder.
|
* finder.
|
||||||
*/
|
*/
|
||||||
private Map<BaseEdge, KBestSubHaplotypeFinder> createChildrenFinders(Set<BaseEdge> baseEdges) {
|
private Map<BaseEdge, KBestSubHaplotypeFinder> createChildrenFinders(final Set<BaseEdge> baseEdges) {
|
||||||
final Map<BaseEdge,KBestSubHaplotypeFinder> result = new LinkedHashMap<>(baseEdges.size());
|
final Map<BaseEdge,KBestSubHaplotypeFinder> result = new LinkedHashMap<>(baseEdges.size());
|
||||||
for (final BaseEdge edge : baseEdges) {
|
for (final BaseEdge edge : baseEdges) {
|
||||||
final KBestSubHaplotypeFinder targetFinder = createVertexFinder(graph.getEdgeTarget(edge));
|
final KBestSubHaplotypeFinder targetFinder = createVertexFinder(graph.getEdgeTarget(edge));
|
||||||
|
|
@ -349,4 +354,156 @@ public class KBestHaplotypeFinder extends AbstractList<KBestHaplotype> implement
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Print a DOT representation of search graph.
|
||||||
|
*
|
||||||
|
* @param out character stream printer where to print the DOT representation to.
|
||||||
|
*
|
||||||
|
* @throws IllegalArgumentException if {@code out} is {@code null}.
|
||||||
|
*/
|
||||||
|
public void printDOT(final PrintWriter out) {
|
||||||
|
if (out == null)
|
||||||
|
throw new IllegalArgumentException("the out writer cannot be null");
|
||||||
|
out.println("digraph {");
|
||||||
|
out.println(" rankdir = LR");
|
||||||
|
out.println(" node [shape=box, margin=0.01]");
|
||||||
|
out.println(" subgraph cluster_dummy { style = invis; x [label=\"\",shape=none,margin=0] }");
|
||||||
|
final StringBuilder referenceCluster = new StringBuilder(1000);
|
||||||
|
|
||||||
|
referenceCluster.append(" subgraph cluster_ref {\n");
|
||||||
|
referenceCluster.append(" node [penwidth=2]\n");
|
||||||
|
for (final KBestSubHaplotypeFinder finder : finderByVertex.values() ) {
|
||||||
|
final String id = finder.id();
|
||||||
|
final String line = String.format(" %s [label=<%s>]",id,finder.label());
|
||||||
|
if (finder.isReference())
|
||||||
|
referenceCluster.append(" ").append(line).append('\n');
|
||||||
|
else
|
||||||
|
out.println(line);
|
||||||
|
}
|
||||||
|
referenceCluster.append(" }");
|
||||||
|
out.println(referenceCluster.toString());
|
||||||
|
|
||||||
|
for (final KBestSubHaplotypeFinder finder : finderByVertex.values())
|
||||||
|
for (final Pair<? extends KBestSubHaplotypeFinder,String> subFinderLabel : finder.subFinderLabels()) {
|
||||||
|
final KBestSubHaplotypeFinder subFinder = subFinderLabel.getFirst();
|
||||||
|
|
||||||
|
final String edgeLabel = subFinderLabel.getSecond();
|
||||||
|
out.println(String.format(" %s -> %s [label=%s]",finder.id(),subFinder.id(),edgeLabel));
|
||||||
|
}
|
||||||
|
out.println("}");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Print a DOT representation of search graph.
|
||||||
|
*
|
||||||
|
* @param file file where to print the DOT representation to.
|
||||||
|
*
|
||||||
|
* @throws IllegalArgumentException if {@code file} is {@code null}.
|
||||||
|
* @throws FileNotFoundException if {@code file} cannot be created or written.
|
||||||
|
* @throws IllegalStateException if there was some trouble when writing the DOT representation.
|
||||||
|
*/
|
||||||
|
public void printDOT(final File file) throws FileNotFoundException {
|
||||||
|
if (file == null)
|
||||||
|
throw new IllegalArgumentException("the output file cannot be null");
|
||||||
|
final PrintWriter out = new PrintWriter(file);
|
||||||
|
printDOT(out);
|
||||||
|
if (out.checkError())
|
||||||
|
throw new IllegalStateException("error occurred while writing k-best haplotype search graph into file '"
|
||||||
|
+ file.getAbsolutePath() + "'");
|
||||||
|
out.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Print a DOT representation of search graph.
|
||||||
|
*
|
||||||
|
* @param fileName name of the file where to print the DOT representation to.
|
||||||
|
*
|
||||||
|
* @throws IllegalArgumentException if {@code fileName} is {@code null}.
|
||||||
|
* @throws FileNotFoundException if no file named {@code fileName} cannot be created or written.
|
||||||
|
* @throws IllegalStateException if there was some trouble when writing the DOT representation.
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("unused") // Available for debugging purposes.
|
||||||
|
public void printDOTFile(final String fileName) throws FileNotFoundException {
|
||||||
|
printDOT(new File(fileName));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the score of a give sequence of bases
|
||||||
|
*
|
||||||
|
* @param bases the base sequence.
|
||||||
|
*
|
||||||
|
* @return {@link Double#NaN} if there is no score for the sequence, i.e. there is no such a haplotype accessible
|
||||||
|
* throw this finder.
|
||||||
|
*/
|
||||||
|
public double score(final byte[] bases) {
|
||||||
|
return topFinder.score(bases,0,bases.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the score of a give sequence of bases
|
||||||
|
*
|
||||||
|
* @param haplotype the haplotype.
|
||||||
|
*
|
||||||
|
* @return {@link Double#NaN} if there is no score for the sequence, i.e. there is no such a haplotype accessible
|
||||||
|
* throw this finder.
|
||||||
|
*/
|
||||||
|
public double score(final Haplotype haplotype) {
|
||||||
|
return score(haplotype.getBases());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a unique list of haplotypes solutions.
|
||||||
|
* <p>
|
||||||
|
* The result will not contain more than one haplotype with the same base sequence. The solution of the best
|
||||||
|
* score is returned.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* This makes sense when there are more than one possible path through the graph to create the same haplotype.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* The resulting list is sorted by the score with more likely haplotype search results first.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @param maxSize maximum number of unique results to return.
|
||||||
|
*
|
||||||
|
* @throws IllegalArgumentException if {@code maxSize} is negative.
|
||||||
|
*
|
||||||
|
* @return never {@code null}, perhaps an empty list.
|
||||||
|
*/
|
||||||
|
public List<KBestHaplotype> unique(final int maxSize) {
|
||||||
|
if (maxSize < 0) throw new IllegalArgumentException("maxSize cannot be negative");
|
||||||
|
final int requiredCapacity = Math.min(maxSize,size());
|
||||||
|
final Set<Haplotype> haplotypes = new HashSet<>(requiredCapacity);
|
||||||
|
int resultSize = 0;
|
||||||
|
final List<KBestHaplotype> result = new ArrayList<>(requiredCapacity);
|
||||||
|
for (final KBestHaplotype kbh : this) {
|
||||||
|
if (haplotypes.add(kbh.haplotype())) {
|
||||||
|
result.add(kbh);
|
||||||
|
if (resultSize == maxSize) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a unique list of haplotypes solutions.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* The result will not contain more than one haplotype with the same base sequence. The solution of the best
|
||||||
|
* score is returned.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* This makes sense when there are more than one possible path through the graph to create the same haplotype.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* The resulting list is sorted by the score with more likely haplotype search results first.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @return never {@code null}, perhaps an empty list.
|
||||||
|
*/
|
||||||
|
public List<KBestHaplotype> unique() {
|
||||||
|
return unique(size());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,10 @@
|
||||||
*/
|
*/
|
||||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Common interface for K-Best sub-haplotype finders.
|
* Common interface for K-Best sub-haplotype finders.
|
||||||
*
|
*
|
||||||
|
|
@ -52,6 +56,29 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
||||||
*/
|
*/
|
||||||
interface KBestSubHaplotypeFinder {
|
interface KBestSubHaplotypeFinder {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return an unique id for this sub-haplotype finder to be used when outputting diagrams.
|
||||||
|
*
|
||||||
|
* @return never {@code null}.
|
||||||
|
*/
|
||||||
|
public String id();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a label with human readable representation of this finder.
|
||||||
|
*
|
||||||
|
* <p>This is used when generating a diagram to illustrate the search space and costs</p>
|
||||||
|
*
|
||||||
|
* @return never {@code null}.
|
||||||
|
*/
|
||||||
|
public String label();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the set of subfinder from this finder together with a label for the connection with the current finder.
|
||||||
|
*
|
||||||
|
* <p>The label is used when generating a diagram to illustrate the search space and costs</p>
|
||||||
|
*/
|
||||||
|
public Set<Pair<? extends KBestSubHaplotypeFinder,String>> subFinderLabels();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the total number of possible sub-haplotypes.
|
* Returns the total number of possible sub-haplotypes.
|
||||||
* @return 0 or greater.
|
* @return 0 or greater.
|
||||||
|
|
@ -67,5 +94,22 @@ interface KBestSubHaplotypeFinder {
|
||||||
*
|
*
|
||||||
* @return never {@code null}.
|
* @return never {@code null}.
|
||||||
*/
|
*/
|
||||||
public abstract KBestHaplotype getKBest(int k);
|
public KBestHaplotype getKBest(int k);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks whether the top vertex for this finder is a reference haplotype vertex.
|
||||||
|
*
|
||||||
|
* @return {@code true} iff the top vertex for this finder is a reference vertex.
|
||||||
|
*/
|
||||||
|
public boolean isReference();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate the score of a sequence of bases.
|
||||||
|
*
|
||||||
|
* @param bases array containing the query base sequence.
|
||||||
|
* @param offset first position of the query base sequence in {@code bases} .
|
||||||
|
* @param length length of the query base sequence.
|
||||||
|
* @return {@link Double#NaN} if there is no score for this sequence, otherwise a valid score value.
|
||||||
|
*/
|
||||||
|
public double score(byte[] bases, int offset, int length);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -49,20 +49,24 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
||||||
import java.util.PriorityQueue;
|
import java.util.PriorityQueue;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* edge class for connecting nodes in the graph that tracks some per-sample information
|
* Edge class for connecting nodes in the graph that tracks some per-sample information.
|
||||||
*
|
* <p>
|
||||||
* This class extends BaseEdge with the additional functionality of tracking the maximum
|
* This class extends BaseEdge with the additional functionality of tracking the maximum
|
||||||
* multiplicity seen within any single sample. The workflow for using this class is:
|
* multiplicity seen within any single sample. The workflow for using this class is:
|
||||||
*
|
* </p>
|
||||||
* MultiSampleEdge e = new MultiSampleEdge(ref, 1)
|
* <pre>
|
||||||
* e.incMultiplicity(1) // total is 2, per sample is 2, max per sample is 1
|
* {@code
|
||||||
* e.getPruningMultiplicity() // = 1
|
* MultiSampleEdge e = new MultiSampleEdge(ref, 1)
|
||||||
* e.flushSingleSampleMultiplicity() // total is 2, per sample is 0, max per sample is 2
|
* e.incMultiplicity(1) // total is 2, per sample is 2, max per sample is 1
|
||||||
* e.getPruningMultiplicity() // = 2
|
* e.getPruningMultiplicity() // = 1
|
||||||
* e.incMultiplicity(3) // total is 5, per sample is 3, max per sample is 2
|
* e.flushSingleSampleMultiplicity() // total is 2, per sample is 0, max per sample is 2
|
||||||
* e.getPruningMultiplicity() // = 2
|
* e.getPruningMultiplicity() // = 2
|
||||||
* e.flushSingleSampleMultiplicity() // total is 5, per sample is 0, max per sample is 3
|
* e.incMultiplicity(3) // total is 5, per sample is 3, max per sample is 2
|
||||||
* e.getPruningMultiplicity() // = 3
|
* e.getPruningMultiplicity() // = 2
|
||||||
|
* e.flushSingleSampleMultiplicity() // total is 5, per sample is 0, max per sample is 3
|
||||||
|
* e.getPruningMultiplicity() // = 3
|
||||||
|
* }
|
||||||
|
* </pre>
|
||||||
*/
|
*/
|
||||||
public class MultiSampleEdge extends BaseEdge {
|
public class MultiSampleEdge extends BaseEdge {
|
||||||
private int currentSingleSampleMultiplicity;
|
private int currentSingleSampleMultiplicity;
|
||||||
|
|
|
||||||
|
|
@ -45,9 +45,10 @@
|
||||||
*/
|
*/
|
||||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import java.util.Collection;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import java.util.Map;
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* General recursive sub-haplotype finder.
|
* General recursive sub-haplotype finder.
|
||||||
|
|
@ -67,7 +68,11 @@ import java.util.Map;
|
||||||
*
|
*
|
||||||
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
|
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
|
||||||
*/
|
*/
|
||||||
class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
|
class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder<RecursiveSubHaplotypeFinder.EdgeSubHaplotypeFinder> {
|
||||||
|
|
||||||
|
|
||||||
|
private final SeqVertex vertex;
|
||||||
|
private final boolean isReference;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a recursive sub-haplotype finder give the target graph, first vertex and all possible outgoing edges
|
* Creates a recursive sub-haplotype finder give the target graph, first vertex and all possible outgoing edges
|
||||||
|
|
@ -80,20 +85,83 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
|
||||||
* @param vertex first vertex for all sub-haplotype solutions provided by this finder
|
* @param vertex first vertex for all sub-haplotype solutions provided by this finder
|
||||||
* @param children map from outgoing edge to the corresponding sub-sub-haplotype finder.
|
* @param children map from outgoing edge to the corresponding sub-sub-haplotype finder.
|
||||||
*/
|
*/
|
||||||
public RecursiveSubHaplotypeFinder(final SeqVertex vertex,
|
public RecursiveSubHaplotypeFinder(final SeqGraph graph, final SeqVertex vertex,
|
||||||
final Map<BaseEdge, KBestSubHaplotypeFinder> children) {
|
final Map<BaseEdge, KBestSubHaplotypeFinder> children) {
|
||||||
super(createChildFinderCollection(vertex, children));
|
super(createChildFinderCollection(vertex, children));
|
||||||
|
this.vertex = vertex;
|
||||||
|
this.isReference = graph.isReferenceNode(vertex);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Collection<EdgeSubHaplotypeFinder> createChildFinderCollection(final SeqVertex vertex, final Map<BaseEdge,KBestSubHaplotypeFinder> finders) {
|
/**
|
||||||
|
* Wraps the descendant vertices finders in order to take advantage of the {@link AggregatedSubHaplotypeFinder}
|
||||||
|
* common code.
|
||||||
|
* <p>
|
||||||
|
* Automatically calibrates the edge score (cost) so that it takes into account the total across all edges.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @param vertex the parent vertex.
|
||||||
|
* @param finders the child vertices indexed by the connecting edge.
|
||||||
|
* @return never {@code null} but potentially an empty collection if there is child returning some sub-haplotype
|
||||||
|
* solution.
|
||||||
|
*/
|
||||||
|
private static Collection<EdgeSubHaplotypeFinder> createChildFinderCollection(final SeqVertex vertex,
|
||||||
|
final Map<BaseEdge,KBestSubHaplotypeFinder> finders) {
|
||||||
if (finders == null) throw new IllegalArgumentException("the edge to child map cannot be null");
|
if (finders == null) throw new IllegalArgumentException("the edge to child map cannot be null");
|
||||||
final Collection<EdgeSubHaplotypeFinder> result = new ArrayList<>(finders.size());
|
final ArrayList<EdgeSubHaplotypeFinder> result = new ArrayList<>(finders.size());
|
||||||
for (final Map.Entry<BaseEdge,KBestSubHaplotypeFinder> e : finders.entrySet())
|
for (final Map.Entry<BaseEdge,KBestSubHaplotypeFinder> e : finders.entrySet()) {
|
||||||
result.add(new EdgeSubHaplotypeFinder(vertex,e.getKey(), e.getValue()));
|
final EdgeSubHaplotypeFinder subFinder = new EdgeSubHaplotypeFinder(vertex,e.getKey(), e.getValue());
|
||||||
|
if (subFinder.getCount() == 0) continue;
|
||||||
|
result.add(subFinder);
|
||||||
|
}
|
||||||
|
if (result.size() == 0)
|
||||||
|
return Collections.emptySet();
|
||||||
|
else if (result.size() == 1) // no calibration needed, by default edgeScore is 0.
|
||||||
|
return Collections.singleton(result.get(0));
|
||||||
|
else {
|
||||||
|
double totalEdgeMultiplicityAcrossEdges = 0;
|
||||||
|
for (final EdgeSubHaplotypeFinder finder : result)
|
||||||
|
totalEdgeMultiplicityAcrossEdges += Math.max(0.5,finder.edge.getMultiplicity());
|
||||||
|
final double log10TotalEdgeMultiplicityAcrossEdges = Math.log10(totalEdgeMultiplicityAcrossEdges);
|
||||||
|
for (final EdgeSubHaplotypeFinder finder : result)
|
||||||
|
finder.calibrateEdgeScore(log10TotalEdgeMultiplicityAcrossEdges);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isReference() {
|
||||||
|
return isReference;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String label() {
|
||||||
|
return vertex.getSequenceString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<Pair<? extends KBestSubHaplotypeFinder, String>> subFinderLabels() {
|
||||||
|
final Set<Pair<? extends KBestSubHaplotypeFinder,String>> result = new LinkedHashSet<>(subFinders.size());
|
||||||
|
for (final EdgeSubHaplotypeFinder subFinder : subFinders)
|
||||||
|
result.add(new Pair<>(subFinder,simplifyZeros(String.format("%.4f", subFinder.edgeScore))));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class EdgeSubHaplotypeFinder implements KBestSubHaplotypeFinder {
|
/**
|
||||||
|
* Removes zeros decimal positions from edge-labels.
|
||||||
|
*
|
||||||
|
* @param edgeLabel the original label to reformat.
|
||||||
|
* @return never {@code null}, the reformatted label.
|
||||||
|
*/
|
||||||
|
private String simplifyZeros(final String edgeLabel) {
|
||||||
|
if (edgeLabel.equals("0.000") || edgeLabel.equals("-0.000") )
|
||||||
|
return "0.";
|
||||||
|
int i = edgeLabel.length() - 1;
|
||||||
|
while (edgeLabel.charAt(i) == '0')
|
||||||
|
i--;
|
||||||
|
return (i == edgeLabel.length() - 1) ? edgeLabel : edgeLabel.substring(0,i);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static class EdgeSubHaplotypeFinder implements KBestSubHaplotypeFinder {
|
||||||
|
|
||||||
private final KBestSubHaplotypeFinder childFinder;
|
private final KBestSubHaplotypeFinder childFinder;
|
||||||
|
|
||||||
|
|
@ -101,10 +169,32 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
|
||||||
|
|
||||||
private final BaseEdge edge;
|
private final BaseEdge edge;
|
||||||
|
|
||||||
|
private double edgeScore = 0;
|
||||||
|
|
||||||
private EdgeSubHaplotypeFinder(final SeqVertex vertex, final BaseEdge edge, final KBestSubHaplotypeFinder childFinder) {
|
private EdgeSubHaplotypeFinder(final SeqVertex vertex, final BaseEdge edge, final KBestSubHaplotypeFinder childFinder) {
|
||||||
this.childFinder = childFinder;
|
this.childFinder = childFinder;
|
||||||
this.edge = edge;
|
this.edge = edge;
|
||||||
this.vertex = vertex;
|
this.vertex = vertex;
|
||||||
|
this.edgeScore = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void calibrateEdgeScore(final double log10TotalMultiplicityAcrossOutgoingEdges) {
|
||||||
|
edgeScore = Math.log10(Math.max(edge.getMultiplicity(),0.5)) - log10TotalMultiplicityAcrossOutgoingEdges;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String id() {
|
||||||
|
return childFinder.id();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String label() {
|
||||||
|
return childFinder.label();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Set<Pair<? extends KBestSubHaplotypeFinder, String>> subFinderLabels() {
|
||||||
|
return childFinder.subFinderLabels();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -114,8 +204,31 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public KBestHaplotype getKBest(int k) {
|
public KBestHaplotype getKBest(int k) {
|
||||||
return new ChildKBestSubHaplotype(vertex,edge,childFinder.getKBest(k));
|
return new ChildKBestSubHaplotype(vertex,edge,childFinder.getKBest(k),edgeScore);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isReference() {
|
||||||
|
return childFinder.isReference();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double score(final byte[] bases, final int offset, final int length) {
|
||||||
|
if (length == 0)
|
||||||
|
return 0;
|
||||||
|
final byte[] vertexSequence = vertex.getSequence();
|
||||||
|
if (length < vertexSequence.length) // query is not long enough to have any score.
|
||||||
|
return Double.NaN;
|
||||||
|
else if (!Utils.equalRange(vertexSequence,0,bases,offset,vertexSequence.length))
|
||||||
|
return Double.NaN;
|
||||||
|
else
|
||||||
|
return edgeScore + childFinder.score(bases,offset + vertexSequence.length,length - vertexSequence.length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String id() {
|
||||||
|
return "v" + vertex.getId();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -129,13 +242,14 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
|
||||||
*/
|
*/
|
||||||
private static class ChildKBestSubHaplotype extends KBestHaplotype {
|
private static class ChildKBestSubHaplotype extends KBestHaplotype {
|
||||||
|
|
||||||
private final int score;
|
private final double score;
|
||||||
private final KBestHaplotype child;
|
private final KBestHaplotype child;
|
||||||
private final SeqVertex vertex;
|
private final SeqVertex vertex;
|
||||||
private final boolean isReference;
|
private final boolean isReference;
|
||||||
|
|
||||||
public ChildKBestSubHaplotype(final SeqVertex vertex, final BaseEdge edge, final KBestHaplotype child) {
|
|
||||||
this.score = edge.getMultiplicity() + child.score();
|
public ChildKBestSubHaplotype(final SeqVertex vertex, final BaseEdge edge, final KBestHaplotype child, final double edgeScore) {
|
||||||
|
this.score = edgeScore + child.score();
|
||||||
this.vertex = vertex;
|
this.vertex = vertex;
|
||||||
this.child = child;
|
this.child = child;
|
||||||
this.isReference = edge.isRef() && child.isReference();
|
this.isReference = edge.isRef() && child.isReference();
|
||||||
|
|
@ -147,7 +261,7 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int score() {
|
public double score() {
|
||||||
return score;
|
return score;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -291,16 +291,9 @@ public class SeqGraph extends BaseGraph<SeqVertex, BaseEdge> {
|
||||||
final SeqVertex addedVertex = mergeLinearChainVertices(linearChain);
|
final SeqVertex addedVertex = mergeLinearChainVertices(linearChain);
|
||||||
addVertex(addedVertex);
|
addVertex(addedVertex);
|
||||||
|
|
||||||
final Set<BaseEdge> inEdges = incomingEdgesOf(first);
|
|
||||||
final Set<BaseEdge> outEdges = outgoingEdgesOf(last);
|
|
||||||
|
|
||||||
final int nEdges = inEdges.size() + outEdges.size();
|
|
||||||
int sharedWeightAmongEdges = nEdges == 0 ? 0 : sumEdgeWeightAlongChain(linearChain) / nEdges;
|
|
||||||
final BaseEdge inc = new BaseEdge(false, sharedWeightAmongEdges); // template to make .add function call easy
|
|
||||||
|
|
||||||
// update the incoming and outgoing edges to point to the new vertex
|
// update the incoming and outgoing edges to point to the new vertex
|
||||||
for( final BaseEdge edge : outEdges ) { addEdge(addedVertex, getEdgeTarget(edge), edge.copy().add(inc)); }
|
for( final BaseEdge edge : outgoingEdgesOf(last) ) { addEdge(addedVertex, getEdgeTarget(edge), edge.copy()); }
|
||||||
for( final BaseEdge edge : inEdges ) { addEdge(getEdgeSource(edge), addedVertex, edge.copy().add(inc)); }
|
for( final BaseEdge edge : incomingEdgesOf(first) ) { addEdge(getEdgeSource(edge), addedVertex, edge.copy()); }
|
||||||
|
|
||||||
removeAllVertices(linearChain);
|
removeAllVertices(linearChain);
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -313,29 +306,6 @@ public class SeqGraph extends BaseGraph<SeqVertex, BaseEdge> {
|
||||||
return new SeqVertex( seqsCat );
|
return new SeqVertex( seqsCat );
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the sum of the edge weights on a linear chain of at least 2 elements
|
|
||||||
*
|
|
||||||
* @param chain a linear chain of vertices with at least 2 vertices
|
|
||||||
* @return the sum of the multiplicities along all edges connecting vertices within the chain
|
|
||||||
*/
|
|
||||||
@Requires({"chain != null", "chain.size() >= 2"})
|
|
||||||
private int sumEdgeWeightAlongChain(final LinkedList<SeqVertex> chain) {
|
|
||||||
int sum = 0;
|
|
||||||
SeqVertex prev = null;
|
|
||||||
|
|
||||||
for ( final SeqVertex v : chain ) {
|
|
||||||
if ( prev != null ) {
|
|
||||||
final BaseEdge e = getEdge(prev, v);
|
|
||||||
if ( e == null ) throw new IllegalStateException("Something wrong with the linear chain, got a null edge between " + prev + " and " + v);
|
|
||||||
sum += e.getMultiplicity();
|
|
||||||
}
|
|
||||||
prev = v;
|
|
||||||
}
|
|
||||||
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Base class for transformation operations that need to iterate over proposed vertices, where
|
* Base class for transformation operations that need to iterate over proposed vertices, where
|
||||||
* each proposed vertex is a seed vertex for a potential transformation.
|
* each proposed vertex is a seed vertex for a potential transformation.
|
||||||
|
|
|
||||||
|
|
@ -247,12 +247,12 @@ public class SharedVertexSequenceSplitter {
|
||||||
|
|
||||||
if ( needPrefixNode ) {
|
if ( needPrefixNode ) {
|
||||||
outer.addVertex(prefixV);
|
outer.addVertex(prefixV);
|
||||||
if ( top != null ) outer.addEdge(top, prefixV, BaseEdge.orRef(splitGraph.outgoingEdgesOf(prefixV), 0));
|
if ( top != null ) outer.addEdge(top, prefixV, BaseEdge.orRef(splitGraph.outgoingEdgesOf(prefixV), 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( needSuffixNode ) {
|
if ( needSuffixNode ) {
|
||||||
outer.addVertex(suffixV);
|
outer.addVertex(suffixV);
|
||||||
if ( bot != null ) outer.addEdge(suffixV, bot, BaseEdge.orRef(splitGraph.incomingEdgesOf(suffixV), 0));
|
if ( bot != null ) outer.addEdge(suffixV, bot, BaseEdge.orRef(splitGraph.incomingEdgesOf(suffixV), 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( topForConnect != null ) {
|
if ( topForConnect != null ) {
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,6 @@ import org.broadinstitute.sting.gatk.walkers.haplotypecaller.Kmer;
|
||||||
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*;
|
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.jgrapht.alg.CycleDetector;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -88,8 +87,7 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
private final boolean debugGraphTransformations;
|
||||||
final boolean debugGraphTransformations;
|
|
||||||
final byte minBaseQualityToUseInAssembly;
|
final byte minBaseQualityToUseInAssembly;
|
||||||
|
|
||||||
protected boolean increaseCountsBackwards = true;
|
protected boolean increaseCountsBackwards = true;
|
||||||
|
|
@ -319,13 +317,6 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
|
||||||
removeAllVertices(verticesToRemove);
|
removeAllVertices(verticesToRemove);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @return true if the graph has cycles, false otherwise
|
|
||||||
*/
|
|
||||||
public boolean hasCycles() {
|
|
||||||
return new CycleDetector<>(this).detectCycles();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Does the graph not have enough complexity? We define low complexity as a situation where the number
|
* Does the graph not have enough complexity? We define low complexity as a situation where the number
|
||||||
* of non-unique kmers is more than 20% of the total number of kmers.
|
* of non-unique kmers is more than 20% of the total number of kmers.
|
||||||
|
|
@ -419,39 +410,10 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
|
||||||
return counter.getKmersWithCountsAtLeast(2);
|
return counter.getKmersWithCountsAtLeast(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
@Override
|
||||||
* Convert this kmer graph to a simple sequence graph.
|
|
||||||
*
|
|
||||||
* Each kmer suffix shows up as a distinct SeqVertex, attached in the same structure as in the kmer
|
|
||||||
* graph. Nodes that are sources are mapped to SeqVertex nodes that contain all of their sequence
|
|
||||||
*
|
|
||||||
* @return a newly allocated SequenceGraph
|
|
||||||
*/
|
|
||||||
// TODO -- should override base class method
|
|
||||||
public SeqGraph convertToSequenceGraph() {
|
public SeqGraph convertToSequenceGraph() {
|
||||||
buildGraphIfNecessary();
|
buildGraphIfNecessary();
|
||||||
|
return super.convertToSequenceGraph();
|
||||||
final SeqGraph seqGraph = new SeqGraph(kmerSize);
|
|
||||||
final Map<MultiDeBruijnVertex, SeqVertex> vertexMap = new HashMap<>();
|
|
||||||
|
|
||||||
|
|
||||||
// create all of the equivalent seq graph vertices
|
|
||||||
for ( final MultiDeBruijnVertex dv : vertexSet() ) {
|
|
||||||
final SeqVertex sv = new SeqVertex(dv.getAdditionalSequence(isSource(dv)));
|
|
||||||
sv.setAdditionalInfo(dv.additionalInfo());
|
|
||||||
vertexMap.put(dv, sv);
|
|
||||||
seqGraph.addVertex(sv);
|
|
||||||
}
|
|
||||||
|
|
||||||
// walk through the nodes and connect them to their equivalent seq vertices
|
|
||||||
for( final MultiSampleEdge e : edgeSet() ) {
|
|
||||||
final SeqVertex seqInV = vertexMap.get(getEdgeSource(e));
|
|
||||||
final SeqVertex seqOutV = vertexMap.get(getEdgeTarget(e));
|
|
||||||
//logger.info("Adding edge " + seqInV + " -> " + seqOutV);
|
|
||||||
seqGraph.addEdge(seqInV, seqOutV, new BaseEdge(e.isRef(), e.getMultiplicity()));
|
|
||||||
}
|
|
||||||
|
|
||||||
return seqGraph;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void increaseCountsInMatchedKmers(final SequenceForKmers seqForKmers,
|
private void increaseCountsInMatchedKmers(final SequenceForKmers seqForKmers,
|
||||||
|
|
@ -749,15 +711,15 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String pathElementId(final String element) {
|
private static String pathElementId(final String element) {
|
||||||
final int parentesysPos = element.indexOf('(');
|
final int openBracketPosition = element.indexOf('(');
|
||||||
|
|
||||||
if (parentesysPos == -1)
|
if (openBracketPosition == -1)
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
final int closeParentesysPos = element.lastIndexOf(')');
|
final int closeBracketPosition = element.lastIndexOf(')');
|
||||||
if (closeParentesysPos == -1)
|
if (closeBracketPosition == -1)
|
||||||
throw new IllegalArgumentException("non-closed id parantesys found in element: " + element);
|
throw new IllegalArgumentException("non-closed id parantesys found in element: " + element);
|
||||||
final String result = element.substring(parentesysPos + 1,closeParentesysPos).trim();
|
final String result = element.substring(openBracketPosition + 1,closeBracketPosition).trim();
|
||||||
if (result.isEmpty())
|
if (result.isEmpty())
|
||||||
throw new IllegalArgumentException("empty id found in element: " + element);
|
throw new IllegalArgumentException("empty id found in element: " + element);
|
||||||
return result;
|
return result;
|
||||||
|
|
|
||||||
|
|
@ -94,7 +94,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||||
"f50e0b35e2240b19b1b8b6dfa0cf9796");
|
"5ac3bfe1da1d411b52a98ef3debbd318");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void HCTestComplexConsensusMode(String bam, String args, String md5) {
|
private void HCTestComplexConsensusMode(String bam, String args, String md5) {
|
||||||
|
|
@ -106,7 +106,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
|
public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
|
||||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
|
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
|
||||||
"21e521d51b826450d348e5201684ffe4");
|
"61972c7c0d378e756f3b4d99aed9d0cf");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -68,8 +68,8 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "50323a284788c8220c9226037c7003b5"});
|
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "50323a284788c8220c9226037c7003b5"});
|
||||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "7c16aa8e35de9f418533efac3bae6551"});
|
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "96fea2caf0a40df3feb268e8b14da670"});
|
||||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "7e1e193d70187774f9740d475e0f1cc1"});
|
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "19efc8020f31d1b68d80c50df0629e50"});
|
||||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"});
|
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"});
|
||||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d926d653500a970280ad7828d9ee2b84"});
|
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d926d653500a970280ad7828d9ee2b84"});
|
||||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "83ddc16e4f0900429b2da30e582994aa"});
|
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "83ddc16e4f0900429b2da30e582994aa"});
|
||||||
|
|
|
||||||
|
|
@ -227,7 +227,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
public void HCTestDBSNPAnnotationWGS() {
|
public void HCTestDBSNPAnnotationWGS() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||||
Arrays.asList("0998be22d7af4372247f5a0338f9446b"));
|
Arrays.asList("7c3254ead383e2b9a51b242f6de2a5b2"));
|
||||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -244,7 +244,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
public void HCTestDBSNPAnnotationWGSGraphBased() {
|
public void HCTestDBSNPAnnotationWGSGraphBased() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||||
Arrays.asList("1aeed297a3cb41940d83eac499a2ce07"));
|
Arrays.asList("eda8f91091fe462205d687ec49fc61e7"));
|
||||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -276,7 +276,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
public void HCTestAggressivePcrIndelModelWGS() {
|
public void HCTestAggressivePcrIndelModelWGS() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
"-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
||||||
Arrays.asList("f426f4c2986e1dea8f3f55951ef8e013"));
|
Arrays.asList("73c52372a1a80f052ea2b728ee17bf22"));
|
||||||
executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec);
|
executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -284,7 +284,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
public void HCTestConservativePcrIndelModelWGS() {
|
public void HCTestConservativePcrIndelModelWGS() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
"-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
||||||
Arrays.asList("dcb38cb9280f2c3059a09d323db1c633"));
|
Arrays.asList("4e10d49b8af23d5ef3a28cb702d10a4b"));
|
||||||
executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
|
executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -298,4 +298,25 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
spec.disableShadowBCF();
|
spec.disableShadowBCF();
|
||||||
executeTest("testGraphBasedNoSuchEdgeBugFix", spec);
|
executeTest("testGraphBasedNoSuchEdgeBugFix", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testLackSensitivityDueToBadHaplotypeSelectionFix() {
|
||||||
|
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
|
||||||
|
b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list",
|
||||||
|
HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||||
|
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("e2e6647f7c96e91aeead7301017dc800"));
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBadLikelihoodsDueToBadHaplotypeSelectionFix() {
|
||||||
|
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
|
||||||
|
hg19RefereneWithChrPrefixInChromosomeNames, privateTestDir + "bad-likelihoods.bam", privateTestDir + "bad-likelihoods.interval_list",
|
||||||
|
HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||||
|
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("cbda30145523bf05e0413157f1a00b3e"));
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest("testBadLikelihoodsDueToBadHaplotypeSelectionFix", spec);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||||
|
|
||||||
import org.broadinstitute.sting.WalkerTest;
|
import org.broadinstitute.sting.WalkerTest;
|
||||||
import org.broadinstitute.sting.utils.haplotypeBAMWriter.HaplotypeBAMWriter;
|
|
||||||
import org.testng.annotations.DataProvider;
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
|
@ -61,7 +60,7 @@ public class HaplotypeCallerParallelIntegrationTest extends WalkerTest {
|
||||||
List<Object[]> tests = new ArrayList<>();
|
List<Object[]> tests = new ArrayList<>();
|
||||||
|
|
||||||
for ( final int nct : Arrays.asList(1, 2, 4) ) {
|
for ( final int nct : Arrays.asList(1, 2, 4) ) {
|
||||||
tests.add(new Object[]{nct, "1f463bf3a06c401006858bc446ecea54"});
|
tests.add(new Object[]{nct, "fd9324a574f9204f7308fc1af422fdcc"});
|
||||||
}
|
}
|
||||||
|
|
||||||
return tests.toArray(new Object[][]{});
|
return tests.toArray(new Object[][]{});
|
||||||
|
|
|
||||||
|
|
@ -52,10 +52,11 @@ import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class CommonSuffixMergerUnitTest extends BaseTest {
|
public class CommonSuffixMergerUnitTest extends BaseTest {
|
||||||
private final static boolean PRINT_GRAPHS = true;
|
private final static boolean PRINT_GRAPHS = false;
|
||||||
|
|
||||||
@DataProvider(name = "CompleteCycleData")
|
@DataProvider(name = "CompleteCycleData")
|
||||||
public Object[][] makeCompleteCycleData() {
|
public Object[][] makeCompleteCycleData() {
|
||||||
|
|
@ -134,11 +135,35 @@ public class CommonSuffixMergerUnitTest extends BaseTest {
|
||||||
return toUse.toArray(new Object[][]{});
|
return toUse.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compares KBestHaplotype solutions, first by the haplotype base sequence and the by their score.
|
||||||
|
*/
|
||||||
|
private static final Comparator<KBestHaplotype> KBESTHAPLOTYPE_COMPARATOR = new Comparator<KBestHaplotype>() {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compares KBestHaplotype solutions, first by the haplotype base sequence and the by their score.
|
||||||
|
*
|
||||||
|
* @return {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int compare(final KBestHaplotype o1,final KBestHaplotype o2) {
|
||||||
|
final int baseCmp = o1.haplotype().getBaseString().compareTo(o2.haplotype().getBaseString());
|
||||||
|
if (baseCmp != 0)
|
||||||
|
return baseCmp;
|
||||||
|
return - Double.compare(o1.score(),o2.score());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
public static void assertSameHaplotypes(final String name, final SeqGraph actual, final SeqGraph original) {
|
public static void assertSameHaplotypes(final String name, final SeqGraph actual, final SeqGraph original) {
|
||||||
|
final KBestHaplotypeFinder originalKBestHaplotypes = new KBestHaplotypeFinder(original,original.getSources(),original.getSinks());
|
||||||
|
final KBestHaplotypeFinder actualKBestHaplotypes = new KBestHaplotypeFinder(actual,actual.getSources(),actual.getSinks());
|
||||||
|
final List<KBestHaplotype> sortedOriginalKBestHaplotypes = new ArrayList<>(originalKBestHaplotypes);
|
||||||
|
Collections.sort(sortedOriginalKBestHaplotypes, KBESTHAPLOTYPE_COMPARATOR);
|
||||||
|
final List<KBestHaplotype> sortedActualKBestHaplotypes = new ArrayList<>(actualKBestHaplotypes);
|
||||||
|
Collections.sort(sortedActualKBestHaplotypes, KBESTHAPLOTYPE_COMPARATOR);
|
||||||
try {
|
try {
|
||||||
final Set<String> haplotypes = new HashSet<String>();
|
final Set<String> haplotypes = new HashSet<String>();
|
||||||
final List<KBestHaplotype> originalKBestHaplotypes = new KBestHaplotypeFinder(original,original.getSources(),original.getSinks());
|
|
||||||
final List<KBestHaplotype> actualKBestHaplotypes = new KBestHaplotypeFinder(actual,actual.getSources(),actual.getSinks());
|
|
||||||
|
|
||||||
for (final KBestHaplotype kbh : originalKBestHaplotypes)
|
for (final KBestHaplotype kbh : originalKBestHaplotypes)
|
||||||
haplotypes.add(new String(kbh.bases()));
|
haplotypes.add(new String(kbh.bases()));
|
||||||
|
|
@ -148,14 +173,16 @@ public class CommonSuffixMergerUnitTest extends BaseTest {
|
||||||
Assert.assertTrue(haplotypes.contains(h), "Failed to find haplotype " + h);
|
Assert.assertTrue(haplotypes.contains(h), "Failed to find haplotype " + h);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( actualKBestHaplotypes.size() == originalKBestHaplotypes.size() ) {
|
Assert.assertEquals(sortedActualKBestHaplotypes,sortedOriginalKBestHaplotypes);
|
||||||
for ( int i = 0; i < originalKBestHaplotypes.size(); i++ ) {
|
|
||||||
Assert.assertTrue(actualKBestHaplotypes.get(i).haplotype().getBaseString().equals(originalKBestHaplotypes.get(i).haplotype().getBaseString()), "Paths not equal " + actualKBestHaplotypes.get(i).haplotype() + " vs. original " + originalKBestHaplotypes.get(i).haplotype());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch ( AssertionError e ) {
|
} catch ( AssertionError e ) {
|
||||||
if ( PRINT_GRAPHS ) original.printGraph(new File(String.format("%s.original.dot", name, actual.vertexSet().size())), 0);
|
if ( PRINT_GRAPHS ) original.printGraph(new File(String.format("%s.original.dot", name, actual.vertexSet().size())), 0);
|
||||||
if ( PRINT_GRAPHS ) actual.printGraph(new File(String.format("%s.actual.dot", name, actual.vertexSet().size())), 0);
|
if ( PRINT_GRAPHS ) actual.printGraph(new File(String.format("%s.actual.dot", name, actual.vertexSet().size())), 0);
|
||||||
|
try {
|
||||||
|
if ( PRINT_GRAPHS ) originalKBestHaplotypes.printDOTFile(String.format("%s.original.finder.dot",name));
|
||||||
|
if ( PRINT_GRAPHS ) actualKBestHaplotypes.printDOTFile(String.format("%s.actual.finder.dot",name));
|
||||||
|
} catch (IOException e2) {
|
||||||
|
// do nothing.
|
||||||
|
}
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -65,7 +65,6 @@ import java.util.*;
|
||||||
* User: rpoplin
|
* User: rpoplin
|
||||||
* Date: 1/31/13
|
* Date: 1/31/13
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class KBestHaplotypeFinderUnitTest extends BaseTest {
|
public class KBestHaplotypeFinderUnitTest extends BaseTest {
|
||||||
|
|
||||||
@DataProvider(name = "BasicPathFindingData")
|
@DataProvider(name = "BasicPathFindingData")
|
||||||
|
|
@ -113,11 +112,11 @@ public class KBestHaplotypeFinderUnitTest extends BaseTest {
|
||||||
final int expectedNumOfPaths = nStartNodes * nBranchesPerBubble * nEndNodes;
|
final int expectedNumOfPaths = nStartNodes * nBranchesPerBubble * nEndNodes;
|
||||||
Assert.assertEquals(paths.size(), expectedNumOfPaths, "Didn't find the expected number of paths");
|
Assert.assertEquals(paths.size(), expectedNumOfPaths, "Didn't find the expected number of paths");
|
||||||
|
|
||||||
int lastScore = Integer.MAX_VALUE;
|
double lastScore = 0;
|
||||||
for ( final KBestHaplotype kbh : paths ) {
|
for ( final KBestHaplotype kbh : paths ) {
|
||||||
final Path<SeqVertex,BaseEdge> path = kbh.path();
|
final Path<SeqVertex,BaseEdge> path = kbh.path();
|
||||||
Assert.assertTrue(path.getScore() <= lastScore, "Paths out of order. Path " + path + " has score above previous " + lastScore);
|
Assert.assertTrue(kbh.score() <= lastScore, "Paths out of order. Path " + path + " has score " + path.getScore() + " above previous " + lastScore);
|
||||||
lastScore = path.getScore();
|
lastScore = kbh.score();
|
||||||
}
|
}
|
||||||
|
|
||||||
// get the best path, and make sure it's the same as our optimal path overall
|
// get the best path, and make sure it's the same as our optimal path overall
|
||||||
|
|
|
||||||
|
|
@ -47,6 +47,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
||||||
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
|
|
@ -226,28 +227,34 @@ public class SharedVertexSequenceSplitterUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
final Set<String> haplotypes = new HashSet<>();
|
final Set<String> haplotypes = new HashSet<>();
|
||||||
final List<KBestHaplotype> originalPaths = new KBestHaplotypeFinder((SeqGraph) graph.clone(),graph.getSources(),graph.getSinks());
|
final KBestHaplotypeFinder originalPaths = new KBestHaplotypeFinder((SeqGraph) graph.clone(),graph.getSources(),graph.getSinks());
|
||||||
for ( final KBestHaplotype path : originalPaths )
|
for ( final KBestHaplotype path : originalPaths )
|
||||||
haplotypes.add(new String(path.bases()));
|
haplotypes.add(new String(path.bases()));
|
||||||
|
|
||||||
final SharedVertexSequenceSplitter splitter = new SharedVertexSequenceSplitter(graph, v);
|
final SharedVertexSequenceSplitter splitter = new SharedVertexSequenceSplitter(graph, v);
|
||||||
splitter.split();
|
splitter.split();
|
||||||
if ( PRINT_GRAPHS ) graph.printGraph(new File(Utils.join("_", strings) + ".original.dot"), 0);
|
if ( PRINT_GRAPHS ) graph.printGraph(new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".original.dot"), 0);
|
||||||
if ( PRINT_GRAPHS ) splitter.splitGraph.printGraph(new File(Utils.join("_", strings) + ".split.dot"), 0);
|
if ( PRINT_GRAPHS ) splitter.splitGraph.printGraph(new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".split.dot"), 0);
|
||||||
splitter.updateGraph(top, bot);
|
splitter.updateGraph(top, bot);
|
||||||
if ( PRINT_GRAPHS ) graph.printGraph(new File(Utils.join("_", strings) + ".updated.dot"), 0);
|
if ( PRINT_GRAPHS ) graph.printGraph(new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".updated.dot"), 0);
|
||||||
|
|
||||||
final List<KBestHaplotype> splitPaths = new KBestHaplotypeFinder(graph,graph.getSources(),graph.getSinks());
|
final KBestHaplotypeFinder splitPaths = new KBestHaplotypeFinder(graph,graph.getSources(),graph.getSinks());
|
||||||
for ( final KBestHaplotype path : splitPaths ) {
|
for ( final KBestHaplotype path : splitPaths ) {
|
||||||
final String h = new String(path.bases());
|
final String h = new String(path.bases());
|
||||||
Assert.assertTrue(haplotypes.contains(h), "Failed to find haplotype " + h);
|
Assert.assertTrue(haplotypes.contains(h), "Failed to find haplotype " + h);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( splitPaths.size() == originalPaths.size() ) {
|
|
||||||
for ( int i = 0; i < originalPaths.size(); i++ ) {
|
final List<byte[]> sortedOriginalPaths = new ArrayList<>(originalPaths.size());
|
||||||
Assert.assertTrue(splitPaths.get(i).path().equalScoreAndSequence(originalPaths.get(i).path()), "Paths not equal " + splitPaths.get(i) + " vs. original " + originalPaths.get(i));
|
for (final KBestHaplotype kbh : originalPaths.unique())
|
||||||
}
|
sortedOriginalPaths.add(kbh.bases());
|
||||||
}
|
Collections.sort(sortedOriginalPaths, BaseUtils.BASES_COMPARATOR);
|
||||||
|
final List<byte[]> sortedSplitPaths = new ArrayList<>(splitPaths.size());
|
||||||
|
for (final KBestHaplotype kbh : splitPaths.unique())
|
||||||
|
sortedSplitPaths.add(kbh.bases());
|
||||||
|
Collections.sort(sortedSplitPaths, BaseUtils.BASES_COMPARATOR);
|
||||||
|
|
||||||
|
Assert.assertEquals(sortedSplitPaths,sortedOriginalPaths,Utils.join("_", strings) + "_" + hasTop + "_" + hasBot);
|
||||||
}
|
}
|
||||||
|
|
||||||
@DataProvider(name = "MeetsMinSequenceData")
|
@DataProvider(name = "MeetsMinSequenceData")
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* BaseUtils contains some basic utilities for manipulating nucleotides.
|
* BaseUtils contains some basic utilities for manipulating nucleotides.
|
||||||
|
|
@ -589,4 +590,26 @@ public class BaseUtils {
|
||||||
throw new ReviewedStingException("base must be A, C, G or T. " + (char) base + " is not a valid base.");
|
throw new ReviewedStingException("base must be A, C, G or T. " + (char) base + " is not a valid base.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lexicographical sorting of base arrays {@link Comparator}.
|
||||||
|
*/
|
||||||
|
public static final Comparator<byte[]> BASES_COMPARATOR = new Comparator<byte[]> (){
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(final byte[] o1,final byte[] o2) {
|
||||||
|
final int minLength = Math.min(o1.length,o2.length);
|
||||||
|
for (int i = 0; i < minLength; i++) {
|
||||||
|
final int cmp = Byte.compare(o1[i],o2[i]);
|
||||||
|
if (cmp != 0) return cmp;
|
||||||
|
}
|
||||||
|
if (o1.length == o2.length)
|
||||||
|
return 0;
|
||||||
|
else if (o1.length == minLength)
|
||||||
|
return -1;
|
||||||
|
else
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -852,4 +852,34 @@ public class Utils {
|
||||||
|
|
||||||
return lst;
|
return lst;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compares sections from to byte arrays to verify whether they contain the same values.
|
||||||
|
*
|
||||||
|
* @param left first array to compare.
|
||||||
|
* @param leftOffset first position of the first array to compare.
|
||||||
|
* @param right second array to compare.
|
||||||
|
* @param rightOffset first position of the second array to compare.
|
||||||
|
* @param length number of positions to compare.
|
||||||
|
*
|
||||||
|
* @throws IllegalArgumentException if <ul>
|
||||||
|
* <li>either {@code left} or {@code right} is {@code null} or</li>
|
||||||
|
* <li>any off the offset or length combine point outside any of the two arrays</li>
|
||||||
|
* </ul>
|
||||||
|
* @return {@code true} iff {@code length} is 0 or all the bytes in both ranges are the same two-by-two.
|
||||||
|
*/
|
||||||
|
public static boolean equalRange(final byte[] left, final int leftOffset, byte[] right, final int rightOffset, final int length) {
|
||||||
|
if (left == null) throw new IllegalArgumentException("left cannot be null");
|
||||||
|
if (right == null) throw new IllegalArgumentException("right cannot be null");
|
||||||
|
if (length < 0) throw new IllegalArgumentException("the length cannot be negative");
|
||||||
|
if (leftOffset < 0) throw new IllegalArgumentException("left offset cannot be negative");
|
||||||
|
if (leftOffset + length > left.length) throw new IllegalArgumentException("length goes beyond end of left array");
|
||||||
|
if (rightOffset < 0) throw new IllegalArgumentException("right offset cannot be negative");
|
||||||
|
if (rightOffset + length > right.length) throw new IllegalArgumentException("length goes beyond end of right array");
|
||||||
|
|
||||||
|
for (int i = 0; i < length; i++)
|
||||||
|
if (left[leftOffset + i] != right[rightOffset + i])
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -49,7 +49,7 @@ public class Haplotype extends Allele {
|
||||||
private EventMap eventMap = null;
|
private EventMap eventMap = null;
|
||||||
private Cigar cigar;
|
private Cigar cigar;
|
||||||
private int alignmentStartHapwrtRef;
|
private int alignmentStartHapwrtRef;
|
||||||
private double score = 0;
|
private double score = Double.NaN;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Main constructor
|
* Main constructor
|
||||||
|
|
@ -301,7 +301,7 @@ public class Haplotype extends Allele {
|
||||||
* @return a double, where higher values are better
|
* @return a double, where higher values are better
|
||||||
*/
|
*/
|
||||||
public double getScore() {
|
public double getScore() {
|
||||||
return this.isReference() ? Double.MAX_VALUE : score;
|
return score;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -312,7 +312,7 @@ public class Haplotype extends Allele {
|
||||||
* @param score a double, where higher values are better
|
* @param score a double, where higher values are better
|
||||||
*/
|
*/
|
||||||
public void setScore(double score) {
|
public void setScore(double score) {
|
||||||
this.score = this.isReference() ? Double.MAX_VALUE : score;
|
this.score = score;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -91,6 +91,7 @@ public abstract class BaseTest {
|
||||||
//public static final String b37KGReference = "/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta";
|
//public static final String b37KGReference = "/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta";
|
||||||
public static final String b37KGReference = "/humgen/1kg/reference/human_g1k_v37.fasta";
|
public static final String b37KGReference = "/humgen/1kg/reference/human_g1k_v37.fasta";
|
||||||
public static final String b37KGReferenceWithDecoy = "/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37_decoy.fasta";
|
public static final String b37KGReferenceWithDecoy = "/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37_decoy.fasta";
|
||||||
|
public static final String hg19RefereneWithChrPrefixInChromosomeNames = "/humgen/gsa-hpprojects/GATK/bundle/current/hg19/ucsc.hg19.fasta";
|
||||||
public static final String GATKDataLocation = "/humgen/gsa-hpprojects/GATK/data/";
|
public static final String GATKDataLocation = "/humgen/gsa-hpprojects/GATK/data/";
|
||||||
public static final String validationDataLocation = GATKDataLocation + "Validation_Data/";
|
public static final String validationDataLocation = GATKDataLocation + "Validation_Data/";
|
||||||
public static final String evaluationDataLocation = GATKDataLocation + "Evaluation_Data/";
|
public static final String evaluationDataLocation = GATKDataLocation + "Evaluation_Data/";
|
||||||
|
|
|
||||||
|
|
@ -26,9 +26,16 @@
|
||||||
package org.broadinstitute.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.Test;
|
|
||||||
import org.testng.annotations.BeforeClass;
|
import org.testng.annotations.BeforeClass;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
|
||||||
public class BaseUtilsUnitTest extends BaseTest {
|
public class BaseUtilsUnitTest extends BaseTest {
|
||||||
|
|
@ -123,4 +130,50 @@ public class BaseUtilsUnitTest extends BaseTest {
|
||||||
|
|
||||||
Assert.assertTrue(rcObs.equals(rcExp));
|
Assert.assertTrue(rcObs.equals(rcExp));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider="baseComparatorData")
|
||||||
|
public void testBaseComparator(final Collection<byte[]> basesToSort) {
|
||||||
|
final ArrayList<byte[]> sorted = new ArrayList<>(basesToSort);
|
||||||
|
Collections.sort(sorted, BaseUtils.BASES_COMPARATOR);
|
||||||
|
for (int i = 0; i < sorted.size(); i++) {
|
||||||
|
Assert.assertEquals(BaseUtils.BASES_COMPARATOR.compare(sorted.get(i),sorted.get(i)),0);
|
||||||
|
final String iString = new String(sorted.get(i));
|
||||||
|
for (int j = i; j < sorted.size(); j++) {
|
||||||
|
final String jString = new String(sorted.get(j));
|
||||||
|
if (iString.compareTo(jString) == 0)
|
||||||
|
Assert.assertEquals(BaseUtils.BASES_COMPARATOR.compare(sorted.get(i),sorted.get(j)),0);
|
||||||
|
else
|
||||||
|
Assert.assertTrue(BaseUtils.BASES_COMPARATOR.compare(sorted.get(i),sorted.get(j)) * iString.compareTo(jString) > 0);
|
||||||
|
Assert.assertTrue(BaseUtils.BASES_COMPARATOR.compare(sorted.get(i),sorted.get(j)) <= 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name="baseComparatorData")
|
||||||
|
public Object[][] baseComparatorData() {
|
||||||
|
final int testCount = 10;
|
||||||
|
final int testSizeAverage = 10;
|
||||||
|
final int testSizeDeviation = 10;
|
||||||
|
final int haplotypeSizeAverage = 100;
|
||||||
|
final int haplotypeSizeDeviation = 100;
|
||||||
|
|
||||||
|
final Object[][] result = new Object[testCount][];
|
||||||
|
|
||||||
|
GenomeAnalysisEngine.resetRandomGenerator();
|
||||||
|
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
|
||||||
|
|
||||||
|
for (int i = 0; i < testCount; i++) {
|
||||||
|
final int size = (int) Math.max(0,rnd.nextDouble() * testSizeDeviation + testSizeAverage);
|
||||||
|
final ArrayList<byte[]> bases = new ArrayList<>(size);
|
||||||
|
for (int j = 0; j < size; j++) {
|
||||||
|
final int jSize = (int) Math.max(0,rnd.nextDouble() * haplotypeSizeDeviation + haplotypeSizeAverage);
|
||||||
|
final byte[] b = new byte[jSize];
|
||||||
|
for (int k = 0; k < jSize; k++)
|
||||||
|
b[k] = BaseUtils.baseIndexToSimpleBase(rnd.nextInt(4));
|
||||||
|
bases.add(b);
|
||||||
|
}
|
||||||
|
result[i] = new Object[] { bases };
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -236,4 +236,23 @@ public class UtilsUnitTest extends BaseTest {
|
||||||
public void testTrim(final String s, final int frontTrim, final int backTrim) {
|
public void testTrim(final String s, final int frontTrim, final int backTrim) {
|
||||||
Assert.assertEquals(s.length() - frontTrim - backTrim, Utils.trimArray(s.getBytes(), frontTrim, backTrim).length);
|
Assert.assertEquals(s.length() - frontTrim - backTrim, Utils.trimArray(s.getBytes(), frontTrim, backTrim).length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "equalRange", enabled = true)
|
||||||
|
public void testEqualRange(final byte[] array1, final byte[] array2, final int offset1, final int offset2, final int length, final boolean expected) {
|
||||||
|
Assert.assertEquals(Utils.equalRange(array1,offset1,array2,offset2,length),expected);
|
||||||
|
Assert.assertTrue(Utils.equalRange(array1,offset1,array1,offset1,length));
|
||||||
|
Assert.assertTrue(Utils.equalRange(array2,offset2,array2,offset2,length));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "equalRangeData")
|
||||||
|
public Object[][] equalRangeData() {
|
||||||
|
return new Object[][] {
|
||||||
|
new Object[] { new byte[0] , new byte[0], 0, 0, 0, true},
|
||||||
|
new Object[] { "ABCF".getBytes(), "BC".getBytes(), 1,0,2, true },
|
||||||
|
new Object[] { "ABCF".getBytes(), "".getBytes(), 1,0,0, true },
|
||||||
|
new Object[] { "ABCF".getBytes(), "ACBF".getBytes(), 0,0, 4, false}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue