Added new and improved suffix and node merging algorithms

-- These new algorithms are more powerful than the restricted diamond merging algoriths, in that they can merge nodes with multiple incoming and outgoing edges.  Together the splitter + merger algorithms will correctly merge many more cases than the original headless and tailless diamond merger.
-- Refactored haplotype caller infrastructure into graphs package, code cleanup
-- Cleanup new merging / splitting algorithms, with proper docs and unit tests
-- Fix bug in zipping of linear chains.  Because the multiplicity can be 0, protect ourselves with a max function call
-- Fix BaseEdge.max unit test
-- Add docs and some more unit tests
-- Move error correct from DeBruijnGraph to DeBruijnAssembler
-- Replaced uses of System.out.println with logger.info
-- Don't make multiplicity == 0 nodes look like they should be pruned
-- Fix toString of Path
This commit is contained in:
Mark DePristo 2013-03-25 18:37:25 -04:00
parent 39f2e811e5
commit 66910b036c
30 changed files with 1001 additions and 165 deletions

View File

@ -53,6 +53,7 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
@ -160,7 +161,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
if ( debugGraphTransformations ) graph.printGraph(new File("unpruned.dot"), pruneFactor);
if ( shouldErrorCorrectKmers() ) {
graph = graph.errorCorrect();
graph = errorCorrect(graph);
if ( debugGraphTransformations ) graph.printGraph(new File("errorCorrected.dot"), pruneFactor);
}
@ -189,6 +190,8 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.2.zipped.dot"), pruneFactor);
// now go through and prune the graph, removing vertices no longer connected to the reference chain
// IMPORTANT: pruning must occur before we call simplifyGraph, as simplifyGraph adds 0 weight
// edges to maintain graph connectivity.
seqGraph.pruneGraph(pruneFactor);
seqGraph.removeVerticesNotConnectedToRefRegardlessOfEdgeDirection();
@ -203,7 +206,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
return null;
seqGraph.removePathsNotConnectedToRef();
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.5.refcleaned.dot"), pruneFactor);
if ( debugGraphTransformations ) seqGraph.printGraph(new File("sequenceGraph.5.final.dot"), pruneFactor);
return seqGraph;
}
@ -321,6 +324,39 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
return true;
}
/**
* Error correct the kmers in this graph, returning a new graph built from those error corrected kmers
* @return an error corrected version of this (freshly allocated graph) or simply this graph if for some reason
* we cannot actually do the error correction
*/
public DeBruijnGraph errorCorrect(final DeBruijnGraph graph) {
final KMerErrorCorrector corrector = new KMerErrorCorrector(graph.getKmerSize(), 1, 1, 5); // TODO -- should be static variables
for( final BaseEdge e : graph.edgeSet() ) {
for ( final byte[] kmer : Arrays.asList(graph.getEdgeSource(e).getSequence(), graph.getEdgeTarget(e).getSequence())) {
// TODO -- need a cleaner way to deal with the ref weight
corrector.addKmer(kmer, e.isRef() ? 1000 : e.getMultiplicity());
}
}
if ( corrector.computeErrorCorrectionMap() ) {
final DeBruijnGraph correctedGraph = new DeBruijnGraph(graph.getKmerSize());
for( final BaseEdge e : graph.edgeSet() ) {
final byte[] source = corrector.getErrorCorrectedKmer(graph.getEdgeSource(e).getSequence());
final byte[] target = corrector.getErrorCorrectedKmer(graph.getEdgeTarget(e).getSequence());
if ( source != null && target != null ) {
correctedGraph.addKmersToGraph(source, target, e.isRef(), e.getMultiplicity());
}
}
return correctedGraph;
} else {
// the error correction wasn't possible, simply return this graph
return graph;
}
}
protected void printGraphs(final List<SeqGraph> graphs) {
final int writeFirstGraphWithSizeSmallerThan = 50;
@ -366,6 +402,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
for( final SeqGraph graph : graphs ) {
for ( final Path<SeqVertex> path : new KBestPaths<SeqVertex>().getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH) ) {
// logger.info("Found path " + path);
Haplotype h = new Haplotype( path.getBases() );
if( !returnHaplotypes.contains(h) ) {
final Cigar cigar = path.calculateCigar();
@ -421,13 +458,13 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
if( debug ) {
if( returnHaplotypes.size() > 1 ) {
System.out.println("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against.");
logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against.");
} else {
System.out.println("Found only the reference haplotype in the assembly graph.");
logger.info("Found only the reference haplotype in the assembly graph.");
}
for( final Haplotype h : returnHaplotypes ) {
System.out.println( h.toString() );
System.out.println( "> Cigar = " + h.getCigar() + " : " + h.getCigar().getReferenceLength() + " score " + h.getScore() );
logger.info( h.toString() );
logger.info( "> Cigar = " + h.getCigar() + " : " + h.getCigar().getReferenceLength() + " score " + h.getScore() );
}
}

View File

@ -51,6 +51,7 @@ import com.google.java.contract.Requires;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
@ -66,6 +67,7 @@ import java.io.PrintStream;
import java.util.*;
public class GenotypingEngine {
private final static Logger logger = Logger.getLogger(GenotypingEngine.class);
private final boolean DEBUG;
private final boolean USE_FILTERED_READ_MAP_FOR_ANNOTATIONS;
@ -168,15 +170,15 @@ public class GenotypingEngine {
// Using the cigar from each called haplotype figure out what events need to be written out in a VCF file
final TreeSet<Integer> startPosKeySet = new TreeSet<Integer>();
int count = 0;
if( DEBUG ) { System.out.println("=== Best Haplotypes ==="); }
if( DEBUG ) { logger.info("=== Best Haplotypes ==="); }
for( final Haplotype h : haplotypes ) {
// Walk along the alignment and turn any difference from the reference into an event
h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++ ) );
if( !in_GGA_mode ) { startPosKeySet.addAll(h.getEventMap().keySet()); }
if( DEBUG ) {
System.out.println( h.toString() );
System.out.println( "> Cigar = " + h.getCigar() );
System.out.println( ">> Events = " + h.getEventMap());
logger.info(h.toString());
logger.info("> Cigar = " + h.getCigar());
logger.info(">> Events = " + h.getEventMap());
}
}
@ -261,7 +263,7 @@ public class GenotypingEngine {
final Map<Allele, List<Haplotype>> alleleMapper = createAlleleMapper(mergeMap, eventMapper);
if( DEBUG ) {
System.out.println("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
//System.out.println("Event/haplotype allele mapping = " + alleleMapper);
}
@ -500,9 +502,9 @@ public class GenotypingEngine {
if( isBiallelic ) {
final double R2 = calculateR2LD( Math.pow(10.0, x11), Math.pow(10.0, x12), Math.pow(10.0, x21), Math.pow(10.0, x22) );
if( DEBUG ) {
System.out.println("Found consecutive biallelic events with R^2 = " + String.format("%.4f", R2));
System.out.println("-- " + thisVC);
System.out.println("-- " + nextVC);
logger.info("Found consecutive biallelic events with R^2 = " + String.format("%.4f", R2));
logger.info("-- " + thisVC);
logger.info("-- " + nextVC);
}
if( R2 > MERGE_EVENTS_R2_THRESHOLD ) {
@ -528,7 +530,7 @@ public class GenotypingEngine {
if(!containsStart) { startPosKeySet.remove(thisStart); }
if(!containsNext) { startPosKeySet.remove(nextStart); }
if( DEBUG ) { System.out.println("====> " + mergedVC); }
if( DEBUG ) { logger.info("====> " + mergedVC); }
mapWasUpdated = true;
break; // break out of tree set iteration since it was just updated, start over from the beginning and keep merging events
}

View File

@ -581,7 +581,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
haplotypeBAMWriter.writeReadsAlignedToHaplotypes(haplotypes, paddedReferenceLoc, bestHaplotypes, calledHaplotypes.getCalledHaplotypes(), stratifiedReadMap);
}
if( DEBUG ) { System.out.println("----------------------------------------------------------------------------------"); }
if( DEBUG ) { logger.info("----------------------------------------------------------------------------------"); }
return 1; // One active region was processed during this map call
}
@ -614,7 +614,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
//---------------------------------------------------------------------------------------------------------------
private void finalizeActiveRegion( final ActiveRegion activeRegion ) {
if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); }
if( DEBUG ) { logger.info("Assembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); }
final List<GATKSAMRecord> finalizedReadList = new ArrayList<GATKSAMRecord>();
final FragmentCollection<GATKSAMRecord> fragmentCollection = FragmentUtils.create( activeRegion.getReads() );
activeRegion.clearReads();

View File

@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
@ -62,6 +63,7 @@ import org.broadinstitute.variant.variantcontext.Allele;
import java.util.*;
public class LikelihoodCalculationEngine {
private final static Logger logger = Logger.getLogger(LikelihoodCalculationEngine.class);
private static final double LOG_ONE_HALF = -Math.log10(2.0);
private final byte constantGCP;
@ -256,14 +258,14 @@ public class LikelihoodCalculationEngine {
}
}
if( maxElement == Double.NEGATIVE_INFINITY ) { break; }
if( DEBUG ) { System.out.println("Chose haplotypes " + hap1 + " and " + hap2 + " with diploid likelihood = " + haplotypeLikelihoodMatrix[hap1][hap2]); }
if( DEBUG ) { logger.info("Chose haplotypes " + hap1 + " and " + hap2 + " with diploid likelihood = " + haplotypeLikelihoodMatrix[hap1][hap2]); }
haplotypeLikelihoodMatrix[hap1][hap2] = Double.NEGATIVE_INFINITY;
if( !bestHaplotypesIndexList.contains(hap1) ) { bestHaplotypesIndexList.add(hap1); }
if( !bestHaplotypesIndexList.contains(hap2) ) { bestHaplotypesIndexList.add(hap2); }
}
if( DEBUG ) { System.out.println("Chose " + (bestHaplotypesIndexList.size() - 1) + " alternate haplotypes to genotype in all samples."); }
if( DEBUG ) { logger.info("Chose " + (bestHaplotypesIndexList.size() - 1) + " alternate haplotypes to genotype in all samples."); }
final List<Haplotype> bestHaplotypes = new ArrayList<Haplotype>();
for( final int hIndex : bestHaplotypesIndexList ) {

View File

@ -44,7 +44,7 @@
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import java.io.Serializable;
import java.util.Collection;
@ -69,7 +69,7 @@ public class BaseEdge {
* @param multiplicity the number of observations of this edge
*/
public BaseEdge(final boolean isRef, final int multiplicity) {
if ( multiplicity < 0 ) throw new IllegalArgumentException("multiplicity must be >= 0");
if ( multiplicity < 0 ) throw new IllegalArgumentException("multiplicity must be >= 0 but got " + multiplicity);
this.multiplicity = multiplicity;
this.isRef = isRef;
@ -176,7 +176,7 @@ public class BaseEdge {
}
/**
* Return a new edge that the max of this and edge
* Return a new edge whose multiplicity is the max of this and edge, and isRef is or of this and edge
*
* isRef is simply the or of this and edge
* multiplicity is the max

View File

@ -44,7 +44,7 @@
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
@ -53,7 +53,6 @@ import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;
import org.jgrapht.EdgeFactory;
import org.jgrapht.graph.DefaultDirectedGraph;
import org.jgrapht.traverse.DepthFirstIterator;
import java.io.File;
import java.io.FileNotFoundException;
@ -221,15 +220,6 @@ public class BaseGraph<T extends BaseVertex> extends DefaultDirectedGraph<T, Bas
return null;
}
public boolean isNonRefSink(final T v) {
// TODO -- cleanup logic statements
if ( ! isSink(v) ) return false;
for ( final BaseEdge in : incomingEdgesOf(v) )
if ( in.isRef() )
return false;
return true;
}
/**
* Traverse the graph and get the next reference vertex if it exists
* @param v the current vertex, can be null
@ -400,7 +390,7 @@ public class BaseGraph<T extends BaseVertex> extends DefaultDirectedGraph<T, Bas
graphWriter.println("digraph assemblyGraphs {");
for( final BaseEdge edge : edgeSet() ) {
graphWriter.println("\t" + getEdgeSource(edge).toString() + " -> " + getEdgeTarget(edge).toString() + " [" + (edge.getMultiplicity() <= pruneFactor ? "style=dotted,color=grey," : "") + "label=\"" + edge.getMultiplicity() + "\"];");
graphWriter.println("\t" + getEdgeSource(edge).toString() + " -> " + getEdgeTarget(edge).toString() + " [" + (edge.getMultiplicity() > 0 && edge.getMultiplicity() <= pruneFactor ? "style=dotted,color=grey," : "") + "label=\"" + edge.getMultiplicity() + "\"];");
if( edge.isRef() ) {
graphWriter.println("\t" + getEdgeSource(edge).toString() + " -> " + getEdgeTarget(edge).toString() + " [color=red];");
}
@ -414,7 +404,12 @@ public class BaseGraph<T extends BaseVertex> extends DefaultDirectedGraph<T, Bas
graphWriter.println("}");
}
protected void cleanNonRefPaths() {
/**
* Remove edges that are connected before the reference source and after the reference sink
*
* Also removes all vertices that are orphaned by this process
*/
public void cleanNonRefPaths() {
if( getReferenceSourceVertex() == null || getReferenceSinkVertex() == null ) {
return;
}
@ -449,7 +444,7 @@ public class BaseGraph<T extends BaseVertex> extends DefaultDirectedGraph<T, Bas
*
* @param pruneFactor all edges with multiplicity <= this factor that aren't ref edges will be removed
*/
protected void pruneGraph( final int pruneFactor ) {
public void pruneGraph( final int pruneFactor ) {
final List<BaseEdge> edgesToRemove = new ArrayList<BaseEdge>();
for( final BaseEdge e : edgeSet() ) {
if( e.getMultiplicity() <= pruneFactor && !e.isRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor
@ -499,7 +494,7 @@ public class BaseGraph<T extends BaseVertex> extends DefaultDirectedGraph<T, Bas
* as it requires vertices to not only be connected by a series of directed edges but also prunes away
* paths that do not also meet eventually with the reference sink vertex
*/
protected void removePathsNotConnectedToRef() {
public void removePathsNotConnectedToRef() {
if ( getReferenceSourceVertex() == null || getReferenceSinkVertex() == null ) {
throw new IllegalStateException("Graph must have ref source and sink vertices");
}

View File

@ -44,7 +44,7 @@
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import java.util.HashSet;
import java.util.Iterator;

View File

@ -44,7 +44,7 @@
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import com.google.java.contract.Ensures;

View File

@ -0,0 +1,182 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import com.google.java.contract.Requires;
import java.util.*;
/**
* Split a collection of middle nodes in a graph into their shared prefix and suffix values
*
* This code performs the following transformation. Suppose I have a set of vertices V, such
* that each vertex is composed of sequence such that
*
* Vi = prefix + seq_i + suffix
*
* where prefix and suffix are shared sequences across all vertices V. This replaces each
* Vi with three nodes prefix, seq_i, and suffix connected in a simple chain.
*
* This operation can be performed in a very general case, without too much worry about the incoming
* and outgoing edge structure of each Vi. The partner algorithm SharedSequenceMerger can
* put these pieces back together in a smart way that maximizes the sharing of nodes
* while respecting complex connectivity.
*
* User: depristo
* Date: 3/22/13
* Time: 8:31 AM
*/
public class CommonSuffixSplitter {
/**
* Create a new graph that contains the vertices in toMerge with their shared suffix and prefix
* sequences extracted out.
*
*/
public CommonSuffixSplitter() {}
/**
* Simple single-function interface to split and then update a graph
*
* @param graph the graph containing the vertices in toMerge
* @param v The bottom node whose incoming vertices we'd like to split
* @return true if some useful splitting was done, false otherwise
*/
public boolean split(final SeqGraph graph, final SeqVertex v) {
if ( graph == null ) throw new IllegalArgumentException("graph cannot be null");
if ( v == null ) throw new IllegalArgumentException("v cannot be null");
if ( ! graph.vertexSet().contains(v) ) throw new IllegalArgumentException("graph doesn't contain vertex v " + v);
final Collection<SeqVertex> toMerge = graph.incomingVerticesOf(v);
if ( toMerge.size() < 2 )
// Can only split at least 2 vertices
return false;
else if ( ! safeToSplit(graph, v, toMerge) ) {
return false;
} else {
final SeqVertex suffixVTemplate = commonSuffix(toMerge);
if ( suffixVTemplate.isEmpty() ) {
return false;
} else {
final List<BaseEdge> edgesToRemove = new LinkedList<BaseEdge>();
// graph.printGraph(new File("split.pre_" + v.getSequenceString() + "." + counter + ".dot"), 0);
for ( final SeqVertex mid : toMerge ) {
// create my own copy of the suffix
final SeqVertex suffixV = new SeqVertex(suffixVTemplate.getSequence());
graph.addVertex(suffixV);
final SeqVertex prefixV = mid.withoutSuffix(suffixV.getSequence());
final BaseEdge out = graph.outgoingEdgeOf(mid);
final SeqVertex incomingTarget;
if ( prefixV == null ) {
// this node is entirely explained by suffix
incomingTarget = suffixV;
} else {
incomingTarget = prefixV;
graph.addVertex(prefixV);
graph.addEdge(prefixV, suffixV, new BaseEdge(out.isRef(), 0));
edgesToRemove.add(out);
}
graph.addEdge(suffixV, graph.getEdgeTarget(out), new BaseEdge(out));
for ( final BaseEdge in : graph.incomingEdgesOf(mid) ) {
graph.addEdge(graph.getEdgeSource(in), incomingTarget, new BaseEdge(in));
edgesToRemove.add(in);
}
}
graph.removeAllVertices(toMerge);
graph.removeAllEdges(edgesToRemove);
// graph.printGraph(new File("split.post_" + v.getSequenceString() + "." + counter++ + ".dot"), 0);
return true;
}
}
}
// private static int counter = 0;
/**
* Can we safely split up the vertices in toMerge?
*
* @param graph a graph
* @param bot a vertex whose incoming vertices we want to split
* @param toMerge the set of vertices we'd be splitting up
* @return true if we can safely split up toMerge
*/
private boolean safeToSplit(final SeqGraph graph, final SeqVertex bot, final Collection<SeqVertex> toMerge) {
for ( final SeqVertex m : toMerge ) {
final Set<BaseEdge> outs = graph.outgoingEdgesOf(m);
if ( m == bot || outs.size() != 1 || ! graph.outgoingVerticesOf(m).contains(bot) )
// m == bot => don't allow cycles in the graph
return false;
}
return true;
}
/**
* Return the longest suffix of bases shared among all provided vertices
*
* For example, if the vertices have sequences AC, CC, and ATC, this would return
* a single C. However, for ACC and TCC this would return CC. And for AC and TG this
* would return null;
*
* @param middleVertices a non-empty set of vertices
* @return a single vertex that contains the common suffix of all middle vertices
*/
@Requires("!middleVertices.isEmpty()")
protected static SeqVertex commonSuffix(final Collection<SeqVertex> middleVertices) {
final List<byte[]> kmers = Utils.getKmers(middleVertices);
final int min = Utils.minKmerLength(kmers);
final int suffixLen = Utils.compSuffixLen(kmers, min);
final byte[] kmer = kmers.get(0);
final byte[] suffix = Arrays.copyOfRange(kmer, kmer.length - suffixLen, kmer.length);
return new SeqVertex(suffix);
}
}

View File

@ -44,9 +44,10 @@
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import com.google.java.contract.Ensures;
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.KMerErrorCorrector;
import java.util.Arrays;
import java.util.HashMap;
@ -88,39 +89,6 @@ public class DeBruijnGraph extends BaseGraph<DeBruijnVertex> {
}
}
/**
* Error correct the kmers in this graph, returning a new graph built from those error corrected kmers
* @return an error corrected version of this (freshly allocated graph) or simply this graph if for some reason
* we cannot actually do the error correction
*/
protected DeBruijnGraph errorCorrect() {
final KMerErrorCorrector corrector = new KMerErrorCorrector(getKmerSize(), 1, 1, 5); // TODO -- should be static variables
for( final BaseEdge e : edgeSet() ) {
for ( final byte[] kmer : Arrays.asList(getEdgeSource(e).getSequence(), getEdgeTarget(e).getSequence())) {
// TODO -- need a cleaner way to deal with the ref weight
corrector.addKmer(kmer, e.isRef() ? 1000 : e.getMultiplicity());
}
}
if ( corrector.computeErrorCorrectionMap() ) {
final DeBruijnGraph correctedGraph = new DeBruijnGraph(getKmerSize());
for( final BaseEdge e : edgeSet() ) {
final byte[] source = corrector.getErrorCorrectedKmer(getEdgeSource(e).getSequence());
final byte[] target = corrector.getErrorCorrectedKmer(getEdgeTarget(e).getSequence());
if ( source != null && target != null ) {
correctedGraph.addKmersToGraph(source, target, e.isRef(), e.getMultiplicity());
}
}
return correctedGraph;
} else {
// the error correction wasn't possible, simply return this graph
return this;
}
}
/**
* Add edge to assembly graph connecting the two kmers
* @param kmer1 the source kmer for the edge
@ -168,7 +136,7 @@ public class DeBruijnGraph extends BaseGraph<DeBruijnVertex> {
* @return a newly allocated SequenceGraph
*/
@Ensures({"result != null"})
protected SeqGraph convertToSequenceGraph() {
public SeqGraph convertToSequenceGraph() {
final SeqGraph seqGraph = new SeqGraph(getKmerSize());
final Map<DeBruijnVertex, SeqVertex> vertexMap = new HashMap<DeBruijnVertex, SeqVertex>();

View File

@ -44,7 +44,7 @@
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import com.google.java.contract.Ensures;

View File

@ -44,7 +44,7 @@
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import com.google.common.collect.MinMaxPriorityQueue;
import com.google.java.contract.Ensures;

View File

@ -44,7 +44,7 @@
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
@ -67,7 +67,7 @@ import java.util.*;
* Time: 2:34 PM
*
*/
class Path<T extends BaseVertex> {
public class Path<T extends BaseVertex> {
private final static int MAX_CIGAR_ELEMENTS_BEFORE_FAILING_SW = 20;
// the last vertex seen in the path
@ -163,8 +163,9 @@ class Path<T extends BaseVertex> {
boolean first = true;
for ( final T v : getVertices() ) {
if ( first ) {
b.append(" -> ");
first = false;
} else {
b.append(" -> ");
}
b.append(v.getSequenceString());
}

View File

@ -44,10 +44,12 @@
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import org.apache.commons.lang.ArrayUtils;
import java.io.File;
import java.util.HashSet;
import java.util.Set;
/**
@ -57,6 +59,7 @@ import java.util.Set;
* @since 03/2013
*/
public class SeqGraph extends BaseGraph<SeqVertex> {
private final static boolean PRINT_SIMPLIFY_GRAPHS = false;
private final static int MIN_SUFFIX_TO_MERGE_TAILS = 5;
/**
@ -97,9 +100,16 @@ public class SeqGraph extends BaseGraph<SeqVertex> {
//logger.info("simplifyGraph iteration " + i);
// iterate until we haven't don't anything useful
didSomeWork = false;
//printGraph(new File("simplifyGraph." + i + ".dot"), 0);
if ( PRINT_SIMPLIFY_GRAPHS ) printGraph(new File("simplifyGraph." + i + ".dot"), 0);
didSomeWork |= new MergeDiamonds().transformUntilComplete();
didSomeWork |= new MergeTails().transformUntilComplete();
if ( PRINT_SIMPLIFY_GRAPHS ) printGraph(new File("simplifyGraph." + i + ".diamonds_and_tails.dot"), 0);
didSomeWork |= new SplitCommonSuffices().transformUntilComplete();
if ( PRINT_SIMPLIFY_GRAPHS ) printGraph(new File("simplifyGraph." + i + ".split_suffix.dot"), 0);
didSomeWork |= new MergeCommonSuffices().transformUntilComplete();
if ( PRINT_SIMPLIFY_GRAPHS ) printGraph(new File("simplifyGraph." + i + ".merge_suffix.dot"), 0);
didSomeWork |= new MergeHeadlessIncomingSources().transformUntilComplete();
didSomeWork |= zipLinearChains();
i++;
@ -109,7 +119,7 @@ public class SeqGraph extends BaseGraph<SeqVertex> {
/**
* Zip up all of the simple linear chains present in this graph.
*/
protected boolean zipLinearChains() {
public boolean zipLinearChains() {
boolean foundOne = false;
while( zipOneLinearChain() ) {
// just keep going until zipOneLinearChain says its done
@ -137,13 +147,16 @@ public class SeqGraph extends BaseGraph<SeqVertex> {
final Set<BaseEdge> outEdges = outgoingEdgesOf(outgoingVertex);
final Set<BaseEdge> inEdges = incomingEdgesOf(incomingVertex);
final BaseEdge singleOutEdge = outEdges.isEmpty() ? null : outEdges.iterator().next();
final BaseEdge singleInEdge = inEdges.isEmpty() ? null : inEdges.iterator().next();
if( inEdges.size() == 1 && outEdges.size() == 1 ) {
inEdges.iterator().next().setMultiplicity( inEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() / 2 ) );
outEdges.iterator().next().setMultiplicity( outEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() / 2 ) );
singleInEdge.setMultiplicity( singleInEdge.getMultiplicity() + ( e.getMultiplicity() / 2 ) );
singleOutEdge.setMultiplicity( singleOutEdge.getMultiplicity() + ( e.getMultiplicity() / 2 ) );
} else if( inEdges.size() == 1 ) {
inEdges.iterator().next().setMultiplicity( inEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() - 1 ) );
singleInEdge.setMultiplicity( Math.max(singleInEdge.getMultiplicity() + ( e.getMultiplicity() - 1 ), 0) );
} else if( outEdges.size() == 1 ) {
outEdges.iterator().next().setMultiplicity( outEdges.iterator().next().getMultiplicity() + ( e.getMultiplicity() - 1 ) );
singleOutEdge.setMultiplicity( Math.max( singleOutEdge.getMultiplicity() + ( e.getMultiplicity() - 1 ), 0) );
}
final SeqVertex addedVertex = new SeqVertex( ArrayUtils.addAll(incomingVertex.getSequence(), outgoingVertex.getSequence()) );
@ -297,6 +310,57 @@ public class SeqGraph extends BaseGraph<SeqVertex> {
}
}
/**
* Merge headless configurations:
*
* Performs the transformation:
*
* { x + S_i + y -> Z }
*
* goes to:
*
* { x -> S_i -> y -> Z }
*
* for all nodes that match this configuration.
*
* Differs from the diamond transform in that no top node is required
*/
protected class MergeCommonSuffices extends VertexBasedTransformer {
@Override
boolean tryToTransform(final SeqVertex bottom) {
return new SharedSequenceMerger().merge(SeqGraph.this, bottom);
}
}
/**
* Merge headless configurations:
*
* Performs the transformation:
*
* { x + S_i + y -> Z }
*
* goes to:
*
* { x -> S_i -> y -> Z }
*
* for all nodes that match this configuration.
*
* Differs from the diamond transform in that no top node is required
*/
protected class SplitCommonSuffices extends VertexBasedTransformer {
final Set<SeqVertex> alreadySplit = new HashSet<SeqVertex>();
@Override
boolean tryToTransform(final SeqVertex bottom) {
if ( alreadySplit.contains(bottom) )
return false;
else {
alreadySplit.add(bottom);
return new CommonSuffixSplitter().split(SeqGraph.this, bottom);
}
}
}
/**
* Merge headless configurations:
*

View File

@ -44,11 +44,10 @@
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.Utils;
import java.util.Arrays;
/**

View File

@ -0,0 +1,138 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import org.apache.commons.lang.ArrayUtils;
import java.util.*;
/**
* Merges the incoming vertices of a vertex V of a graph
*
* Looks at the vertices that are incoming to V (i.e., have an outgoing edge connecting to V). If
* they all have the same sequence, merges them into the sequence of V, and updates the graph
* as appropriate
*
* User: depristo
* Date: 3/22/13
* Time: 8:31 AM
*/
public class SharedSequenceMerger {
public SharedSequenceMerger() { }
/**
* Attempt to merge the incoming vertices of v
*
* @param graph the graph containing the vertex v
* @param v the vertex whose incoming vertices we want to merge
* @return true if some useful merging was done, false otherwise
*/
public boolean merge(final SeqGraph graph, final SeqVertex v) {
if ( graph == null ) throw new IllegalArgumentException("graph cannot be null");
if ( ! graph.vertexSet().contains(v) ) throw new IllegalArgumentException("graph doesn't contain vertex " + v);
final Set<SeqVertex> prevs = graph.incomingVerticesOf(v);
if ( ! canMerge(graph, v, prevs) )
return false;
else {
// graph.printGraph(new File("csm." + counter + "." + v.getSequenceString() + "_pre.dot"), 0);
final List<BaseEdge> edgesToRemove = new LinkedList<BaseEdge>();
final byte[] prevSeq = prevs.iterator().next().getSequence();
final SeqVertex newV = new SeqVertex(ArrayUtils.addAll(prevSeq, v.getSequence()));
graph.addVertex(newV);
for ( final SeqVertex prev : prevs ) {
for ( final BaseEdge prevIn : graph.incomingEdgesOf(prev) ) {
graph.addEdge(graph.getEdgeSource(prevIn), newV, new BaseEdge(prevIn));
edgesToRemove.add(prevIn);
}
}
for ( final BaseEdge e : graph.outgoingEdgesOf(v) ) {
graph.addEdge(newV, graph.getEdgeTarget(e), new BaseEdge(e));
}
graph.removeAllVertices(prevs);
graph.removeVertex(v);
graph.removeAllEdges(edgesToRemove);
// graph.printGraph(new File("csm." + counter++ + "." + v.getSequenceString() + "_post.dot"), 0);
return true;
}
}
//private static int counter = 0;
/**
* Can we safely merge the incoming vertices of v
*
* @param graph the graph containing v and incomingVertices
* @param v the vertex we want to merge into
* @param incomingVertices the incoming vertices of v
* @return true if we can safely merge incomingVertices
*/
private boolean canMerge(final SeqGraph graph, final SeqVertex v, final Collection<SeqVertex> incomingVertices) {
if ( incomingVertices.isEmpty() )
return false;
final SeqVertex first = incomingVertices.iterator().next();
for ( final SeqVertex prev : incomingVertices) {
if ( ! prev.seqEquals(first) )
return false;
final Collection<SeqVertex> prevOuts = graph.outgoingVerticesOf(prev);
if ( prevOuts.size() != 1 )
return false;
if ( prevOuts.iterator().next() != v )
return false;
}
return true;
}
}

View File

@ -44,7 +44,7 @@
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
@ -266,8 +266,8 @@ public class SharedVertexSequenceSplitter {
min = Math.min(min, v.getSequence().length);
}
final int prefixLen = compPrefixLen(kmers, min);
final int suffixLen = compSuffixLen(kmers, min - prefixLen);
final int prefixLen = Utils.compPrefixLen(kmers, min);
final int suffixLen = Utils.compSuffixLen(kmers, min - prefixLen);
final byte[] kmer = kmers.get(0);
final byte[] prefix = Arrays.copyOfRange(kmer, 0, prefixLen);
@ -275,47 +275,6 @@ public class SharedVertexSequenceSplitter {
return new Pair<SeqVertex, SeqVertex>(new SeqVertex(prefix), new SeqVertex(suffix));
}
/**
* Compute the maximum shared prefix length of list of bytes.
*
* @param listOfBytes a list of bytes with at least one element
* @param minLength the min. length among all byte[] in listOfBytes
* @return the number of shared bytes common at the start of all bytes
*/
@Requires({"listOfBytes.size() >= 1", "minLength >= 0"})
@Ensures("result >= 0")
protected static int compPrefixLen(final List<byte[]> listOfBytes, final int minLength) {
for ( int i = 0; i < minLength; i++ ) {
final byte b = listOfBytes.get(0)[i];
for ( int j = 1; j < listOfBytes.size(); j++ ) {
if ( b != listOfBytes.get(j)[i] )
return i;
}
}
return minLength;
}
/**
* Compute the maximum shared suffix length of list of bytes.
*
* @param listOfBytes a list of bytes with at least one element
* @param minLength the min. length among all byte[] in listOfBytes
* @return the number of shared bytes common at the end of all bytes
*/
@Requires({"listOfBytes.size() >= 1", "minLength >= 0"})
@Ensures("result >= 0")
protected static int compSuffixLen(final List<byte[]> listOfBytes, final int minLength) {
for ( int suffixLen = 0; suffixLen < minLength; suffixLen++ ) {
final byte b = listOfBytes.get(0)[listOfBytes.get(0).length - suffixLen - 1];
for ( int j = 1; j < listOfBytes.size(); j++ ) {
if ( b != listOfBytes.get(j)[listOfBytes.get(j).length - suffixLen - 1] )
return suffixLen;
}
}
return minLength;
}
/**
* Helper function that returns an edge that we should use for splitting
*

View File

@ -0,0 +1,138 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
/**
* Utility functions used in the graphs package
*
* User: depristo
* Date: 3/25/13
* Time: 9:42 PM
*/
final class Utils {
private Utils() {}
/**
* Compute the maximum shared prefix length of list of bytes.
*
* @param listOfBytes a list of bytes with at least one element
* @param minLength the min. length among all byte[] in listOfBytes
* @return the number of shared bytes common at the start of all bytes
*/
@Requires({"listOfBytes.size() >= 1", "minLength >= 0"})
@Ensures("result >= 0")
protected static int compPrefixLen(final List<byte[]> listOfBytes, final int minLength) {
for ( int i = 0; i < minLength; i++ ) {
final byte b = listOfBytes.get(0)[i];
for ( int j = 1; j < listOfBytes.size(); j++ ) {
if ( b != listOfBytes.get(j)[i] )
return i;
}
}
return minLength;
}
/**
* Compute the maximum shared suffix length of list of bytes.
*
* @param listOfBytes a list of bytes with at least one element
* @param minLength the min. length among all byte[] in listOfBytes
* @return the number of shared bytes common at the end of all bytes
*/
@Requires({"listOfBytes.size() >= 1", "minLength >= 0"})
@Ensures("result >= 0")
protected static int compSuffixLen(final List<byte[]> listOfBytes, final int minLength) {
for ( int suffixLen = 0; suffixLen < minLength; suffixLen++ ) {
final byte b = listOfBytes.get(0)[listOfBytes.get(0).length - suffixLen - 1];
for ( int j = 1; j < listOfBytes.size(); j++ ) {
if ( b != listOfBytes.get(j)[listOfBytes.get(j).length - suffixLen - 1] )
return suffixLen;
}
}
return minLength;
}
/**
* Get the list of kmers as byte[] from the vertices in the graph
*
* @param vertices a collection of vertices
* @return a list of their kmers in order of the iterator on vertices
*/
protected static List<byte[]> getKmers(final Collection<SeqVertex> vertices) {
final List<byte[]> kmers = new ArrayList<byte[]>(vertices.size());
for ( final SeqVertex v : vertices ) {
kmers.add(v.getSequence());
}
return kmers;
}
/**
* Get the minimum length of a collection of byte[]
*
* @param kmers a list of kmers whose .length min we want
* @return the min of the kmers, if kmers is empty the result is 0
*/
protected static int minKmerLength(final Collection<byte[]> kmers) {
if ( kmers == null ) throw new IllegalArgumentException("kmers cannot be null");
if ( kmers.isEmpty() ) return 0;
int min = Integer.MAX_VALUE;
for ( final byte[] kmer : kmers ) {
min = Math.min(min, kmer.length);
}
return min;
}
}

View File

@ -56,6 +56,7 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.DeBruijnGraph;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;

View File

@ -47,6 +47,7 @@
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.DeBruijnGraph;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

View File

@ -44,7 +44,7 @@
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
@ -102,4 +102,20 @@ public class BaseEdgeUnitTest extends BaseTest {
Assert.assertEquals(edges.get(2), e2);
Assert.assertEquals(edges.get(3), e1);
}
@Test
public void testMax() {
for ( final boolean firstIsRef : Arrays.asList(true, false) ) {
for ( final boolean secondIsRef : Arrays.asList(true, false) ) {
for ( final int firstMulti : Arrays.asList(1, 4) ) {
for ( final int secondMulti : Arrays.asList(2, 3) ) {
final BaseEdge expected = new BaseEdge(firstIsRef || secondIsRef, Math.max(firstMulti, secondMulti));
final BaseEdge actual = new BaseEdge(firstIsRef, firstMulti).max(new BaseEdge(secondIsRef, secondMulti));
Assert.assertEquals(actual.getMultiplicity(), expected.getMultiplicity());
Assert.assertEquals(actual.isRef(), expected.isRef());
}
}
}
}
}
}

View File

@ -44,7 +44,7 @@
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
@ -160,7 +160,7 @@ public class BaseGraphUnitTest extends BaseTest {
graph.addEdges(g3, b7, b8, b7); // cycle is bad
graph.addEdges(g3, b9, b9); // self-cycle is bad
final boolean debug = true;
final boolean debug = false;
if ( debug ) good.printGraph(new File("expected.dot"), 0);
if ( debug ) graph.printGraph(new File("bad.dot"), 0);
graph.removePathsNotConnectedToRef();
@ -169,6 +169,63 @@ public class BaseGraphUnitTest extends BaseTest {
Assert.assertTrue(BaseGraph.graphEquals(graph, good), "Failed to remove exactly the bad nodes");
}
@Test
public void testRemoveVerticesNotConnectedToRefRegardlessOfEdgeDirection() throws Exception {
final SeqGraph graph = new SeqGraph();
SeqVertex src = new SeqVertex("A");
SeqVertex end = new SeqVertex("A");
SeqVertex g1 = new SeqVertex("C");
SeqVertex g2 = new SeqVertex("G");
SeqVertex g3 = new SeqVertex("T");
SeqVertex g4 = new SeqVertex("AA");
SeqVertex g5 = new SeqVertex("AA");
SeqVertex g6 = new SeqVertex("AA");
SeqVertex g8 = new SeqVertex("AA");
SeqVertex g7 = new SeqVertex("AA");
SeqVertex gPrev = new SeqVertex("AA");
SeqVertex gPrev1 = new SeqVertex("AA");
SeqVertex gPrev2 = new SeqVertex("AA");
SeqVertex gAfter = new SeqVertex("AA");
SeqVertex gAfter1 = new SeqVertex("AA");
SeqVertex gAfter2 = new SeqVertex("AA");
SeqVertex b1 = new SeqVertex("CC");
SeqVertex b2 = new SeqVertex("GG");
SeqVertex b3 = new SeqVertex("TT");
SeqVertex b4 = new SeqVertex("AAA");
SeqVertex b5 = new SeqVertex("CCC");
SeqVertex b6 = new SeqVertex("GGG");
graph.addVertices(src, end, g1, g2, g3, g4, g5, g6, g7, g8, gPrev, gPrev1, gPrev2, gAfter, gAfter1, gAfter2);
graph.addEdges(new BaseEdge(true, 1), src, g1, g2, g4, end);
graph.addEdges(src, g1, g5, g6, g7, end);
graph.addEdges(src, g1, g5, g8, g7, end);
graph.addEdges(src, g1, g3, end);
// these should be kept, but are in the wrong direction
graph.addEdges(gPrev, src);
graph.addEdges(gPrev1, gPrev2, src);
graph.addEdges(end, gAfter);
graph.addEdges(end, gAfter1, gAfter2);
// the current state of the graph is the good one
final SeqGraph good = (SeqGraph)graph.clone();
// now add the bads to the graph
graph.addVertices(b1, b2, b3, b4, b5, b6);
graph.addEdges(b2, b3); // b2 -> b3
graph.addEdges(b4, b5, b4); // cycle
graph.addEdges(b6, b6); // isolated self cycle
final boolean debug = false;
if ( debug ) good.printGraph(new File("expected.dot"), 0);
if ( debug ) graph.printGraph(new File("bad.dot"), 0);
graph.removeVerticesNotConnectedToRefRegardlessOfEdgeDirection();
if ( debug ) graph.printGraph(new File("actual.dot"), 0);
Assert.assertTrue(BaseGraph.graphEquals(graph, good), "Failed to remove exactly the bad nodes");
}
@Test
public void testPrintEmptyGraph() throws Exception {
final File tmp = File.createTempFile("tmp", "dot");

View File

@ -44,7 +44,7 @@
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;

View File

@ -0,0 +1,160 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.util.*;
public class CommonSuffixMergerUnitTest extends BaseTest {
private final static boolean PRINT_GRAPHS = true;
@DataProvider(name = "CompleteCycleData")
public Object[][] makeCompleteCycleData() {
return makeSplitMergeData(-1);
}
public static class SplitMergeData {
final SeqGraph graph;
final SeqVertex v;
final String commonSuffix;
public SplitMergeData(SeqGraph graph, SeqVertex v, String commonSuffix) {
this.graph = graph;
this.v = v;
this.commonSuffix = commonSuffix;
}
}
public static Object[][] makeSplitMergeData(final int maxTests) {
List<Object[]> tests = new ArrayList<Object[]>();
final List<String> bases = Arrays.asList("A", "C", "G", "T");
for ( final String commonSuffix : Arrays.asList("", "A", "AT") ) {
for ( final int nBots : Arrays.asList(0, 1, 2) ) {
for ( final int nMids : Arrays.asList(1, 2, 3) ) {
for ( int nTops = 0; nTops < nMids; nTops++ ) {
for ( int nTopConnections = 1; nTopConnections <= nMids; nTopConnections++ ) {
int multi = 1;
final SeqGraph graph = new SeqGraph();
final SeqVertex v = new SeqVertex("GGGG");
graph.addVertex(v);
final LinkedList<SeqVertex> tops = new LinkedList<SeqVertex>();
final LinkedList<SeqVertex> mids = new LinkedList<SeqVertex>();
for ( int i = 0; i < nMids; i++) {
final SeqVertex mid = new SeqVertex(bases.get(i) + commonSuffix);
graph.addVertex(mid);
graph.addEdge(mid, v, new BaseEdge(i == 0, multi++));
mids.add(mid);
tops.add(new SeqVertex(bases.get(i)));
}
graph.addVertices(tops);
for ( final SeqVertex t : tops ) {
for ( int i = 0; i < nTopConnections; i++ ) {
graph.addEdge(t, mids.get(i), new BaseEdge(i == 0, multi++));
}
}
for ( int i = 0; i < nBots; i++ ) {
final SeqVertex bot = new SeqVertex(bases.get(i));
graph.addVertex(bot);
graph.addEdge(v, bot, new BaseEdge(i == 0, multi++));
}
tests.add(new Object[]{new SplitMergeData(graph, v, commonSuffix)});
}
}
}
}
}
final List<Object[]> toUse = maxTests == -1 ? tests : tests.subList(0, Math.min(tests.size(), maxTests));
return toUse.toArray(new Object[][]{});
}
public static void assertSameHaplotypes(final String name, final SeqGraph actual, final SeqGraph original) {
try {
final Set<String> haplotypes = new HashSet<String>();
final List<Path<SeqVertex>> originalPaths = new KBestPaths<SeqVertex>().getKBestPaths(original);
for ( final Path<SeqVertex> path : originalPaths )
haplotypes.add(new String(path.getBases()));
final List<Path<SeqVertex>> splitPaths = new KBestPaths<SeqVertex>().getKBestPaths(actual);
for ( final Path<SeqVertex> path : splitPaths ) {
final String h = new String(path.getBases());
Assert.assertTrue(haplotypes.contains(h), "Failed to find haplotype " + h);
}
if ( splitPaths.size() == originalPaths.size() ) {
for ( int i = 0; i < originalPaths.size(); i++ ) {
Assert.assertTrue(splitPaths.get(i).equalSequence(originalPaths.get(i)), "Paths not equal " + splitPaths.get(i) + " vs. original " + originalPaths.get(i));
}
}
} catch ( AssertionError e ) {
if ( PRINT_GRAPHS ) original.printGraph(new File(String.format("%s.original.dot", name, actual.vertexSet().size())), 0);
if ( PRINT_GRAPHS ) actual.printGraph(new File(String.format("%s.actual.dot", name, actual.vertexSet().size())), 0);
throw e;
}
}
@Test(dataProvider = "CompleteCycleData")
public void testMerging(final SplitMergeData data) {
final SeqGraph original = (SeqGraph)data.graph.clone();
final SharedSequenceMerger splitter = new SharedSequenceMerger();
splitter.merge(data.graph, data.v);
assertSameHaplotypes(String.format("suffixMerge.%s.%d", data.commonSuffix, data.graph.vertexSet().size()), data.graph, original);
}
}

View File

@ -0,0 +1,113 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
public class CommonSuffixSplitterUnitTest extends BaseTest {
@DataProvider(name = "SplitData")
public Object[][] makeSplitData() {
return CommonSuffixMergerUnitTest.makeSplitMergeData(-1);
}
@Test(dataProvider = "SplitData")
public void testSplit(final CommonSuffixMergerUnitTest.SplitMergeData data) {
final boolean expectedMerge = ! data.commonSuffix.isEmpty() && data.graph.inDegreeOf(data.v) > 1;
final SeqGraph original = (SeqGraph)data.graph.clone();
// original.printGraph(new File("original.dot"), 0);
final CommonSuffixSplitter splitter = new CommonSuffixSplitter();
final boolean succeed = splitter.split(data.graph, data.v);
// data.graph.printGraph(new File("actual.dot"), 0);
Assert.assertEquals(succeed, expectedMerge, "Not excepted merge success/fail result");
if ( succeed ) {
Assert.assertEquals(data.graph.incomingVerticesOf(data.v).iterator().next().getSequenceString(), data.commonSuffix, "Common suffix not computed correctly");
}
CommonSuffixMergerUnitTest.assertSameHaplotypes(String.format("suffixSplit.%s.%d", data.commonSuffix, data.graph.vertexSet().size()), data.graph, original);
}
@Test
public void testSplitPrevHaveMultipleEdges() {
final SeqGraph original = new SeqGraph();
final SeqVertex v1 = new SeqVertex("A");
final SeqVertex v2 = new SeqVertex("A");
final SeqVertex v3 = new SeqVertex("A");
final SeqVertex v4 = new SeqVertex("A");
original.addVertices(v1, v2, v3, v4);
original.addEdges(v1, v3);
Assert.assertFalse(new CommonSuffixSplitter().split(original, v3), "Cannot split graph with only one vertex");
original.addEdges(v2, v3);
original.addEdges(v2, v4);
Assert.assertFalse(new CommonSuffixSplitter().split(original, v3), "Cannot split graph with multiple outgoing edges from middle nodes");
}
@Test
public void testSplitNoCycles() {
final SeqGraph original = new SeqGraph();
final SeqVertex v1 = new SeqVertex("A");
final SeqVertex v2 = new SeqVertex("C");
final SeqVertex v3 = new SeqVertex("C");
final SeqVertex v4 = new SeqVertex("G");
original.addVertices(v1, v2, v3, v4);
original.addEdges(v1, v3, v4);
original.addEdges(v1, v2, v4);
Assert.assertTrue(new CommonSuffixSplitter().split((SeqGraph)original.clone(), v4), "Should be able to split pre-cycle graph");
original.addEdges(v4, v4);
Assert.assertFalse(new CommonSuffixSplitter().split(original, v4), "Cannot split graph with a cycle of the bottom list");
}
}

View File

@ -44,7 +44,7 @@
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import org.broadinstitute.sting.BaseTest;
import org.testng.annotations.Test;

View File

@ -44,7 +44,7 @@
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;

View File

@ -1,50 +1,50 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
@ -315,12 +315,16 @@ public class SeqGraphUnitTest extends BaseTest {
public void testMerging(final SeqGraph graph, final SeqGraph expected) {
final SeqGraph merged = (SeqGraph)graph.clone();
merged.simplifyGraph(1);
// if ( ! SeqGraph.graphEquals(merged, expected) ) {
// graph.printGraph(new File("graph.dot"), 0);
// merged.printGraph(new File("merged.dot"), 0);
// expected.printGraph(new File("expected.dot"), 0);
// }
Assert.assertTrue(SeqGraph.graphEquals(merged, expected));
try {
Assert.assertTrue(SeqGraph.graphEquals(merged, expected));
} catch (AssertionError e) {
// if ( ! SeqGraph.graphEquals(merged, expected) ) {
// graph.printGraph(new File("graph.dot"), 0);
// merged.printGraph(new File("merged.dot"), 0);
// expected.printGraph(new File("expected.dot"), 0);
// }
throw e;
}
}
// A -> ACT -> C [non-ref]

View File

@ -44,7 +44,7 @@
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
@ -52,7 +52,6 @@ import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class SeqVertexUnitTest extends BaseTest {

View File

@ -44,7 +44,7 @@
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
@ -98,10 +98,10 @@ public class SharedVertexSequenceSplitterUnitTest extends BaseTest {
min = Math.min(min, s.length());
}
final int actualPrefixLen = SharedVertexSequenceSplitter.compPrefixLen(bytes, min);
final int actualPrefixLen = org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.Utils.compPrefixLen(bytes, min);
Assert.assertEquals(actualPrefixLen, expectedPrefixLen, "Failed prefix test");
final int actualSuffixLen = SharedVertexSequenceSplitter.compSuffixLen(bytes, min - actualPrefixLen);
final int actualSuffixLen = org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.Utils.compSuffixLen(bytes, min - actualPrefixLen);
Assert.assertEquals(actualSuffixLen, expectedSuffixLen, "Failed suffix test");
}