HaplotypeCaller now has the ability to only consider the best N haplotypes for genotyping
-- Added a -dontGenotype mode for testing assembly efficiency -- However, it looks like this has a very negative impact on the quality of the results, so the code should be deleted
This commit is contained in:
parent
a8fb26bf01
commit
ffea6dd95f
|
|
@ -52,6 +52,7 @@ import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import org.apache.commons.lang.ArrayUtils;
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.Haplotype;
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
|
@ -73,6 +74,7 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class DeBruijnAssembler extends LocalAssemblyEngine {
|
public class DeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
|
private final static Logger logger = Logger.getLogger(DeBruijnAssembler.class);
|
||||||
|
|
||||||
private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers
|
private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers
|
||||||
private static final int NUM_BEST_PATHS_PER_KMER_GRAPH = 11;
|
private static final int NUM_BEST_PATHS_PER_KMER_GRAPH = 11;
|
||||||
|
|
@ -85,18 +87,20 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
private static final double SW_GAP = -22.0; //-1.0-1.0/3.0;
|
private static final double SW_GAP = -22.0; //-1.0-1.0/3.0;
|
||||||
private static final double SW_GAP_EXTEND = -1.2; //-1.0/.0;
|
private static final double SW_GAP_EXTEND = -1.2; //-1.0/.0;
|
||||||
|
|
||||||
private final boolean DEBUG;
|
private final boolean debug;
|
||||||
private final PrintStream GRAPH_WRITER;
|
private final PrintStream graphWriter;
|
||||||
private final List<DeBruijnAssemblyGraph> graphs = new ArrayList<DeBruijnAssemblyGraph>();
|
private final List<DeBruijnAssemblyGraph> graphs = new ArrayList<DeBruijnAssemblyGraph>();
|
||||||
private final int MIN_KMER;
|
private final int minKmer;
|
||||||
|
private final int maxHaplotypesToConsider;
|
||||||
|
|
||||||
private int PRUNE_FACTOR = 2;
|
private int PRUNE_FACTOR = 2;
|
||||||
|
|
||||||
public DeBruijnAssembler(final boolean debug, final PrintStream graphWriter, final int minKmer) {
|
public DeBruijnAssembler(final boolean debug, final PrintStream graphWriter, final int minKmer, final int maxHaplotypesToConsider) {
|
||||||
super();
|
super();
|
||||||
DEBUG = debug;
|
this.debug = debug;
|
||||||
GRAPH_WRITER = graphWriter;
|
this.graphWriter = graphWriter;
|
||||||
MIN_KMER = minKmer;
|
this.minKmer = minKmer;
|
||||||
|
this.maxHaplotypesToConsider = maxHaplotypesToConsider;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -123,7 +127,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
createDeBruijnGraphs( activeRegion.getReads(), refHaplotype );
|
createDeBruijnGraphs( activeRegion.getReads(), refHaplotype );
|
||||||
|
|
||||||
// print the graphs if the appropriate debug option has been turned on
|
// print the graphs if the appropriate debug option has been turned on
|
||||||
if( GRAPH_WRITER != null ) {
|
if( graphWriter != null ) {
|
||||||
printGraphs();
|
printGraphs();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -136,11 +140,12 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
graphs.clear();
|
graphs.clear();
|
||||||
|
|
||||||
final int maxKmer = ReadUtils.getMaxReadLength(reads) - KMER_OVERLAP - 1;
|
final int maxKmer = ReadUtils.getMaxReadLength(reads) - KMER_OVERLAP - 1;
|
||||||
if( maxKmer < MIN_KMER ) { return; } // Reads are too small for assembly so don't try to create any assembly graphs
|
if( maxKmer < minKmer) { return; } // Reads are too small for assembly so don't try to create any assembly graphs
|
||||||
|
|
||||||
// create the graph for each possible kmer
|
// create the graph for each possible kmer
|
||||||
for( int kmer = maxKmer; kmer >= MIN_KMER; kmer -= GRAPH_KMER_STEP ) {
|
for( int kmer = maxKmer; kmer >= minKmer; kmer -= GRAPH_KMER_STEP ) {
|
||||||
final DeBruijnAssemblyGraph graph = createGraphFromSequences( reads, kmer, refHaplotype, DEBUG );
|
//if ( debug ) logger.info("Creating de Bruijn graph for " + kmer + " kmer using " + reads.size() + " reads");
|
||||||
|
final DeBruijnAssemblyGraph graph = createGraphFromSequences( reads, kmer, refHaplotype, debug);
|
||||||
if( graph != null ) { // graphs that fail during creation ( for example, because there are cycles in the reference graph ) will show up here as a null graph object
|
if( graph != null ) { // graphs that fail during creation ( for example, because there are cycles in the reference graph ) will show up here as a null graph object
|
||||||
// do a series of steps to clean up the raw assembly graph to make it analysis-ready
|
// do a series of steps to clean up the raw assembly graph to make it analysis-ready
|
||||||
pruneGraph(graph, PRUNE_FACTOR);
|
pruneGraph(graph, PRUNE_FACTOR);
|
||||||
|
|
@ -320,22 +325,22 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void printGraphs() {
|
protected void printGraphs() {
|
||||||
GRAPH_WRITER.println("digraph assemblyGraphs {");
|
graphWriter.println("digraph assemblyGraphs {");
|
||||||
for( final DeBruijnAssemblyGraph graph : graphs ) {
|
for( final DeBruijnAssemblyGraph graph : graphs ) {
|
||||||
for( final DeBruijnEdge edge : graph.edgeSet() ) {
|
for( final DeBruijnEdge edge : graph.edgeSet() ) {
|
||||||
if( edge.getMultiplicity() > PRUNE_FACTOR ) {
|
if( edge.getMultiplicity() > PRUNE_FACTOR ) {
|
||||||
GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [" + (edge.getMultiplicity() <= PRUNE_FACTOR ? "style=dotted,color=grey" : "label=\""+ edge.getMultiplicity() +"\"") + "];");
|
graphWriter.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [" + (edge.getMultiplicity() <= PRUNE_FACTOR ? "style=dotted,color=grey" : "label=\"" + edge.getMultiplicity() + "\"") + "];");
|
||||||
}
|
}
|
||||||
if( edge.isRef() ) {
|
if( edge.isRef() ) {
|
||||||
GRAPH_WRITER.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [color=red];");
|
graphWriter.println("\t" + graph.getEdgeSource(edge).toString() + " -> " + graph.getEdgeTarget(edge).toString() + " [color=red];");
|
||||||
}
|
}
|
||||||
if( !edge.isRef() && edge.getMultiplicity() <= PRUNE_FACTOR ) { System.out.println("Graph pruning warning!"); }
|
if( !edge.isRef() && edge.getMultiplicity() <= PRUNE_FACTOR ) { System.out.println("Graph pruning warning!"); }
|
||||||
}
|
}
|
||||||
for( final DeBruijnVertex v : graph.vertexSet() ) {
|
for( final DeBruijnVertex v : graph.vertexSet() ) {
|
||||||
GRAPH_WRITER.println("\t" + v.toString() + " [label=\"" + new String(graph.getAdditionalSequence(v)) + "\"]");
|
graphWriter.println("\t" + v.toString() + " [label=\"" + new String(graph.getAdditionalSequence(v)) + "\"]");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
GRAPH_WRITER.println("}");
|
graphWriter.println("}");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires({"refWithPadding.length > refHaplotype.getBases().length", "refLoc.containsP(activeRegionWindow)"})
|
@Requires({"refWithPadding.length > refHaplotype.getBases().length", "refLoc.containsP(activeRegionWindow)"})
|
||||||
|
|
@ -343,6 +348,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
private List<Haplotype> findBestPaths( final Haplotype refHaplotype, final byte[] refWithPadding, final GenomeLoc refLoc, final List<VariantContext> activeAllelesToGenotype, final GenomeLoc activeRegionWindow ) {
|
private List<Haplotype> findBestPaths( final Haplotype refHaplotype, final byte[] refWithPadding, final GenomeLoc refLoc, final List<VariantContext> activeAllelesToGenotype, final GenomeLoc activeRegionWindow ) {
|
||||||
|
|
||||||
// add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes
|
// add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes
|
||||||
|
// TODO -- this use of an array with contains lower may be a performance problem returning in an O(N^2) algorithm
|
||||||
final List<Haplotype> returnHaplotypes = new ArrayList<Haplotype>();
|
final List<Haplotype> returnHaplotypes = new ArrayList<Haplotype>();
|
||||||
refHaplotype.setAlignmentStartHapwrtRef(activeRegionWindow.getStart() - refLoc.getStart());
|
refHaplotype.setAlignmentStartHapwrtRef(activeRegionWindow.getStart() - refLoc.getStart());
|
||||||
final Cigar c = new Cigar();
|
final Cigar c = new Cigar();
|
||||||
|
|
@ -383,7 +389,8 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
}
|
}
|
||||||
if( !returnHaplotypes.contains(h) ) {
|
if( !returnHaplotypes.contains(h) ) {
|
||||||
h.setAlignmentStartHapwrtRef(activeRegionStart);
|
h.setAlignmentStartHapwrtRef(activeRegionStart);
|
||||||
h.setCigar( leftAlignedCigar );
|
h.setCigar(leftAlignedCigar);
|
||||||
|
h.setScore(path.getScore());
|
||||||
returnHaplotypes.add(h);
|
returnHaplotypes.add(h);
|
||||||
|
|
||||||
// for GGA mode, add the desired allele into the haplotype if it isn't already present
|
// for GGA mode, add the desired allele into the haplotype if it isn't already present
|
||||||
|
|
@ -409,18 +416,39 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if( DEBUG ) {
|
final List<Haplotype> finalHaplotypes = selectHighestScoringHaplotypes(returnHaplotypes);
|
||||||
if( returnHaplotypes.size() > 1 ) {
|
if ( finalHaplotypes.size() < returnHaplotypes.size() )
|
||||||
System.out.println("Found " + returnHaplotypes.size() + " candidate haplotypes to evaluate every read against.");
|
logger.info("Found " + finalHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against at " + refLoc);
|
||||||
|
|
||||||
|
if( debug ) {
|
||||||
|
if( finalHaplotypes.size() > 1 ) {
|
||||||
|
System.out.println("Found " + finalHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against.");
|
||||||
} else {
|
} else {
|
||||||
System.out.println("Found only the reference haplotype in the assembly graph.");
|
System.out.println("Found only the reference haplotype in the assembly graph.");
|
||||||
}
|
}
|
||||||
for( final Haplotype h : returnHaplotypes ) {
|
for( final Haplotype h : finalHaplotypes ) {
|
||||||
System.out.println( h.toString() );
|
System.out.println( h.toString() );
|
||||||
System.out.println( "> Cigar = " + h.getCigar() + " : " + h.getCigar().getReferenceLength() );
|
System.out.println( "> Cigar = " + h.getCigar() + " : " + h.getCigar().getReferenceLength() + " score " + h.getScore() );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return returnHaplotypes;
|
|
||||||
|
return finalHaplotypes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Select the best scoring haplotypes among all present, returning no more than maxHaplotypesToConsider
|
||||||
|
*
|
||||||
|
* @param haplotypes a list of haplotypes to consider
|
||||||
|
* @return a sublist of the best haplotypes, with size() <= maxHaplotypesToConsider
|
||||||
|
*/
|
||||||
|
private List<Haplotype> selectHighestScoringHaplotypes(final List<Haplotype> haplotypes) {
|
||||||
|
if ( haplotypes.size() <= maxHaplotypesToConsider )
|
||||||
|
return haplotypes;
|
||||||
|
else {
|
||||||
|
final List<Haplotype> sorted = new ArrayList<Haplotype>(haplotypes);
|
||||||
|
Collections.sort(sorted, new Haplotype.ScoreComparator());
|
||||||
|
return sorted.subList(0, maxHaplotypesToConsider);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -55,6 +55,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||||
|
import org.broadinstitute.sting.gatk.downsampling.DownsamplingUtils;
|
||||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
||||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||||
|
|
@ -205,6 +206,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
@Argument(fullName="minKmer", shortName="minKmer", doc="Minimum kmer length to use in the assembly graph", required = false)
|
@Argument(fullName="minKmer", shortName="minKmer", doc="Minimum kmer length to use in the assembly graph", required = false)
|
||||||
protected int minKmer = 11;
|
protected int minKmer = 11;
|
||||||
|
|
||||||
|
@Advanced
|
||||||
|
@Argument(fullName="maxHaplotypesToConsider", shortName="maxHaplotypesToConsider", doc="Maximum number of haplotypes to consider in the likelihood calculation. Setting this number too high can have dramatic performance implications", required = false)
|
||||||
|
protected int maxHaplotypesToConsider = 100000;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If this flag is provided, the haplotype caller will include unmapped reads in the assembly and calling
|
* If this flag is provided, the haplotype caller will include unmapped reads in the assembly and calling
|
||||||
* when these reads occur in the region being analyzed. Typically, for paired end analyses, one pair of the
|
* when these reads occur in the region being analyzed. Typically, for paired end analyses, one pair of the
|
||||||
|
|
@ -227,6 +232,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
@Argument(fullName="justDetermineActiveRegions", shortName="justDetermineActiveRegions", doc = "If specified, the HC won't actually do any assembly or calling, it'll just run the upfront active region determination code. Useful for benchmarking and scalability testing", required=false)
|
@Argument(fullName="justDetermineActiveRegions", shortName="justDetermineActiveRegions", doc = "If specified, the HC won't actually do any assembly or calling, it'll just run the upfront active region determination code. Useful for benchmarking and scalability testing", required=false)
|
||||||
protected boolean justDetermineActiveRegions = false;
|
protected boolean justDetermineActiveRegions = false;
|
||||||
|
|
||||||
|
@Hidden
|
||||||
|
@Argument(fullName="dontGenotype", shortName="dontGenotype", doc = "If specified, the HC will do any assembly but won't do calling. Useful for benchmarking and scalability testing", required=false)
|
||||||
|
protected boolean dontGenotype = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate.
|
* rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate.
|
||||||
* dbSNP is not used in any way for the calculations themselves.
|
* dbSNP is not used in any way for the calculations themselves.
|
||||||
|
|
@ -296,6 +305,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
// reference base padding size
|
// reference base padding size
|
||||||
private static final int REFERENCE_PADDING = 500;
|
private static final int REFERENCE_PADDING = 500;
|
||||||
|
|
||||||
|
private final static int maxReadsInRegionPerSample = 1000; // TODO -- should be an argument
|
||||||
|
private final static int minReadsPerAlignmentStart = 5; // TODO -- should be an argument
|
||||||
|
|
||||||
// bases with quality less than or equal to this value are trimmed off the tails of the reads
|
// bases with quality less than or equal to this value are trimmed off the tails of the reads
|
||||||
private static final byte MIN_TAIL_QUALITY = 20;
|
private static final byte MIN_TAIL_QUALITY = 20;
|
||||||
|
|
||||||
|
|
@ -374,7 +386,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e);
|
throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
assemblyEngine = new DeBruijnAssembler( DEBUG, graphWriter, minKmer );
|
assemblyEngine = new DeBruijnAssembler( DEBUG, graphWriter, minKmer, maxHaplotypesToConsider );
|
||||||
likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM );
|
likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM );
|
||||||
genotypingEngine = new GenotypingEngine( DEBUG, annotationEngine, USE_FILTERED_READ_MAP_FOR_ANNOTATIONS );
|
genotypingEngine = new GenotypingEngine( DEBUG, annotationEngine, USE_FILTERED_READ_MAP_FOR_ANNOTATIONS );
|
||||||
|
|
||||||
|
|
@ -514,6 +526,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
// sort haplotypes to take full advantage of haplotype start offset optimizations in PairHMM
|
// sort haplotypes to take full advantage of haplotype start offset optimizations in PairHMM
|
||||||
Collections.sort( haplotypes, new Haplotype.HaplotypeBaseComparator() );
|
Collections.sort( haplotypes, new Haplotype.HaplotypeBaseComparator() );
|
||||||
|
|
||||||
|
if (dontGenotype)
|
||||||
|
return 1;
|
||||||
|
|
||||||
// evaluate each sample's reads against all haplotypes
|
// evaluate each sample's reads against all haplotypes
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, splitReadsBySample( activeRegion.getReads() ) );
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, splitReadsBySample( activeRegion.getReads() ) );
|
||||||
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList = splitReadsBySample( filteredReads );
|
final Map<String, List<GATKSAMRecord>> perSampleFilteredReadList = splitReadsBySample( filteredReads );
|
||||||
|
|
@ -575,7 +590,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
//
|
//
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
private void finalizeActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
|
private void finalizeActiveRegion( final ActiveRegion activeRegion ) {
|
||||||
if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); }
|
if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); }
|
||||||
final List<GATKSAMRecord> finalizedReadList = new ArrayList<GATKSAMRecord>();
|
final List<GATKSAMRecord> finalizedReadList = new ArrayList<GATKSAMRecord>();
|
||||||
final FragmentCollection<GATKSAMRecord> fragmentCollection = FragmentUtils.create( activeRegion.getReads() );
|
final FragmentCollection<GATKSAMRecord> fragmentCollection = FragmentUtils.create( activeRegion.getReads() );
|
||||||
|
|
@ -599,7 +614,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
activeRegion.addAll(ReadUtils.sortReadsByCoordinate(readsToUse));
|
|
||||||
|
activeRegion.addAll(DownsamplingUtils.levelCoverageByPosition(ReadUtils.sortReadsByCoordinate(readsToUse), maxReadsInRegionPerSample, minReadsPerAlignmentStart));
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<GATKSAMRecord> filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
|
private List<GATKSAMRecord> filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
|
||||||
|
|
|
||||||
|
|
@ -41,12 +41,12 @@ import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class Haplotype extends Allele {
|
public class Haplotype extends Allele {
|
||||||
|
|
||||||
private GenomeLoc genomeLocation = null;
|
private GenomeLoc genomeLocation = null;
|
||||||
private Map<Integer, VariantContext> eventMap = null;
|
private Map<Integer, VariantContext> eventMap = null;
|
||||||
private Cigar cigar;
|
private Cigar cigar;
|
||||||
private int alignmentStartHapwrtRef;
|
private int alignmentStartHapwrtRef;
|
||||||
private Event artificialEvent = null;
|
private Event artificialEvent = null;
|
||||||
|
private double score = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Main constructor
|
* Main constructor
|
||||||
|
|
@ -259,4 +259,34 @@ public class Haplotype extends Allele {
|
||||||
this.pos = pos;
|
this.pos = pos;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the score (an estimate of the support) of this haplotype
|
||||||
|
* @return a double, where higher values are better
|
||||||
|
*/
|
||||||
|
public double getScore() {
|
||||||
|
return this.isReference() ? Double.MAX_VALUE : score;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the score (an estimate of the support) of this haplotype.
|
||||||
|
*
|
||||||
|
* Note that if this is the reference haplotype it is always given Double.MAX_VALUE score
|
||||||
|
*
|
||||||
|
* @param score a double, where higher values are better
|
||||||
|
*/
|
||||||
|
public void setScore(double score) {
|
||||||
|
this.score = this.isReference() ? Double.MAX_VALUE : score;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A comparator that sorts haplotypes in decreasing order of score, so that the best supported
|
||||||
|
* haplotypes are at the top
|
||||||
|
*/
|
||||||
|
public static class ScoreComparator implements Comparator<Haplotype> {
|
||||||
|
@Override
|
||||||
|
public int compare(Haplotype o1, Haplotype o2) {
|
||||||
|
return -1 * Double.valueOf(o1.getScore()).compareTo(o2.getScore());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue