Merge branch 'master' of ssh://gsa4.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2012-08-21 10:35:23 -04:00
commit d0644b3565
14 changed files with 78 additions and 48 deletions

View File

@ -2,6 +2,9 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import org.jgrapht.graph.DefaultDirectedGraph;
import java.io.Serializable;
import java.util.Comparator;
/**
* Created by IntelliJ IDEA.
* User: ebanks
@ -9,7 +12,7 @@ import org.jgrapht.graph.DefaultDirectedGraph;
*/
// simple edge class for connecting nodes in the graph
public class DeBruijnEdge implements Comparable<DeBruijnEdge> {
public class DeBruijnEdge {
private int multiplicity;
private boolean isRef;
@ -53,8 +56,10 @@ public class DeBruijnEdge implements Comparable<DeBruijnEdge> {
return (graph.getEdgeSource(this).equals(graph2.getEdgeSource(edge))) && (graph.getEdgeTarget(this).equals(graph2.getEdgeTarget(edge)));
}
@Override
public int compareTo( final DeBruijnEdge that ) {
return this.multiplicity - that.multiplicity;
public static class EdgeWeightComparator implements Comparator<DeBruijnEdge>, Serializable {
@Override
public int compare(final DeBruijnEdge edge1, final DeBruijnEdge edge2) {
return edge1.multiplicity - edge2.multiplicity;
}
}
}

View File

@ -14,7 +14,7 @@ public class DeBruijnVertex {
public final int kmer;
public DeBruijnVertex( final byte[] sequence, final int kmer ) {
this.sequence = sequence;
this.sequence = sequence.clone();
this.kmer = kmer;
}
@ -37,7 +37,7 @@ public class DeBruijnVertex {
}
public byte[] getSequence() {
return sequence;
return sequence.clone();
}
public byte[] getSuffix() {

View File

@ -4,6 +4,7 @@ import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.jgrapht.graph.DefaultDirectedGraph;
import java.io.Serializable;
import java.util.*;
/**
@ -76,13 +77,15 @@ public class KBestPaths {
}
}
protected static class PathComparatorTotalScore implements Comparator<Path> {
protected static class PathComparatorTotalScore implements Comparator<Path>, Serializable {
@Override
public int compare(final Path path1, final Path path2) {
return path1.totalScore - path2.totalScore;
}
}
//protected static class PathComparatorLowestEdge implements Comparator<Path> {
//protected static class PathComparatorLowestEdge implements Comparator<Path>, Serializable {
// @Override
// public int compare(final Path path1, final Path path2) {
// return path2.lowestEdge - path1.lowestEdge;
// }
@ -124,7 +127,7 @@ public class KBestPaths {
// recursively run DFS
final ArrayList<DeBruijnEdge> edgeArrayList = new ArrayList<DeBruijnEdge>();
edgeArrayList.addAll(graph.outgoingEdgesOf(path.lastVertex));
Collections.sort(edgeArrayList);
Collections.sort(edgeArrayList, new DeBruijnEdge.EdgeWeightComparator());
Collections.reverse(edgeArrayList);
for ( final DeBruijnEdge edge : edgeArrayList ) {
// make sure the edge is not already in the path

View File

@ -78,10 +78,10 @@ public class LikelihoodCalculationEngine {
PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
// for each sample's reads
for( final String sample : perSampleReadList.keySet() ) {
for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sampleEntry : perSampleReadList.entrySet() ) {
//if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); }
// evaluate the likelihood of the reads given those haplotypes
computeReadLikelihoods( haplotypes, perSampleReadList.get(sample), sample, matchMetricArray, XMetricArray, YMetricArray );
computeReadLikelihoods( haplotypes, sampleEntry.getValue(), sampleEntry.getKey(), matchMetricArray, XMetricArray, YMetricArray );
}
}

View File

@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.pileup.PileupElement;
@ -31,7 +32,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
*/
protected final static Logger logger = Logger.getLogger(TraversalEngine.class);
private final LinkedList<org.broadinstitute.sting.utils.activeregion.ActiveRegion> workQueue = new LinkedList<org.broadinstitute.sting.utils.activeregion.ActiveRegion>();
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>();
@Override
@ -110,18 +111,18 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
// add these blocks of work to the work queue
// band-pass filter the list of isActive probabilities and turn into active regions
final ActivityProfile bandPassFiltered = profile.bandPassFilter();
final List<org.broadinstitute.sting.utils.activeregion.ActiveRegion> activeRegions = bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize );
final List<ActiveRegion> activeRegions = bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize );
// add active regions to queue of regions to process
// first check if can merge active regions over shard boundaries
if( !activeRegions.isEmpty() ) {
if( !workQueue.isEmpty() ) {
final org.broadinstitute.sting.utils.activeregion.ActiveRegion last = workQueue.getLast();
final org.broadinstitute.sting.utils.activeregion.ActiveRegion first = activeRegions.get(0);
final ActiveRegion last = workQueue.getLast();
final ActiveRegion first = activeRegions.get(0);
if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
workQueue.removeLast();
activeRegions.remove(first);
workQueue.add( new org.broadinstitute.sting.utils.activeregion.ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) );
workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) );
}
}
workQueue.addAll( activeRegions );

View File

@ -48,6 +48,7 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.Serializable;
import java.util.*;
/**
@ -111,7 +112,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
return map;
}
private static class HaplotypeComparator implements Comparator<Haplotype> {
private static class HaplotypeComparator implements Comparator<Haplotype>, Serializable {
public int compare(Haplotype a, Haplotype b) {
if (a.getQualitySum() < b.getQualitySum())

View File

@ -42,7 +42,7 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
protected RecalibrationTables recalibrationTables;
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) {
this.covariates = covariates;
this.covariates = covariates.clone();
this.recalibrationTables = recalibrationTables;
}

View File

@ -29,10 +29,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.io.*;
import java.util.*;
/**
@ -41,7 +38,7 @@ import java.util.*;
* Date: Mar 10, 2011
*/
public class Tranche implements Comparable<Tranche> {
public class Tranche {
private static final int CURRENT_VERSION = 5;
public double ts, minVQSLod, knownTiTv, novelTiTv;
@ -83,10 +80,14 @@ public class Tranche implements Comparable<Tranche> {
return accessibleTruthSites > 0 ? callsAtTruthSites / (1.0*accessibleTruthSites) : 0.0;
}
public int compareTo(Tranche other) {
return Double.compare(this.ts, other.ts);
public static class TrancheTruthSensitivityComparator implements Comparator<Tranche>, Serializable {
@Override
public int compare(final Tranche tranche1, final Tranche tranche2) {
return Double.compare(tranche1.ts, tranche2.ts);
}
}
@Override
public String toString() {
return String.format("Tranche ts=%.2f minVQSLod=%.4f known=(%d @ %.4f) novel=(%d @ %.4f) truthSites(%d accessible, %d called), name=%s]",
ts, minVQSLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, callsAtTruthSites, name);
@ -102,7 +103,7 @@ public class Tranche implements Comparable<Tranche> {
final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
final PrintStream stream = new PrintStream(bytes);
Collections.sort(tranches);
Collections.sort( tranches, new TrancheTruthSensitivityComparator() );
stream.println("# Variant quality score tranches file");
stream.println("# Version number " + CURRENT_VERSION);
@ -183,7 +184,7 @@ public class Tranche implements Comparable<Tranche> {
}
}
Collections.sort(tranches);
Collections.sort( tranches, new TrancheTruthSensitivityComparator() );
return tranches;
} catch( FileNotFoundException e ) {
throw new UserException.CouldNotReadInputFile(f, e);

View File

@ -146,7 +146,7 @@ public class TrancheManager {
public static List<Tranche> findTranches( final ArrayList<VariantDatum> data, final double[] trancheThresholds, final SelectionMetric metric, final VariantRecalibratorArgumentCollection.Mode model, final File debugFile ) {
logger.info(String.format("Finding %d tranches for %d variants", trancheThresholds.length, data.size()));
Collections.sort(data);
Collections.sort( data, new VariantDatum.VariantDatumLODComparator() );
metric.calculateRunningMetric(data);
if ( debugFile != null) { writeTranchesDebuggingInfo(debugFile, data, metric); }

View File

@ -158,7 +158,7 @@ public class VariantDataManager {
logger.info( "Found " + numBadSitesAdded + " variants overlapping bad sites training tracks." );
// Next sort the variants by the LOD coming from the positive model and add to the list the bottom X percent of variants
Collections.sort( data );
Collections.sort( data, new VariantDatum.VariantDatumLODComparator() );
final int numToAdd = Math.max( minimumNumber - trainingData.size(), Math.round((float)bottomPercentage * data.size()) );
if( numToAdd > data.size() ) {
throw new UserException.BadInput( "Error during negative model training. Minimum number of variants to use in training is larger than the whole call set. One can attempt to lower the --minNumBadVariants arugment but this is unsafe." );

View File

@ -27,13 +27,16 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.io.Serializable;
import java.util.Comparator;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Mar 4, 2011
*/
public class VariantDatum implements Comparable<VariantDatum> {
public class VariantDatum {
public double[] annotations;
public boolean[] isNull;
@ -52,8 +55,10 @@ public class VariantDatum implements Comparable<VariantDatum> {
public int worstAnnotation;
public MultivariateGaussian assignment; // used in K-means implementation
@Override
public int compareTo( final VariantDatum other ) {
return Double.compare(this.lod, other.lod);
public static class VariantDatumLODComparator implements Comparator<VariantDatum>, Serializable {
@Override
public int compare(final VariantDatum datum1, final VariantDatum datum2) {
return Double.compare(datum1.lod, datum2.lod);
}
}
}

View File

@ -55,22 +55,22 @@ public class Haplotype {
* @param bases bases
* @param qual qual
*/
public Haplotype(byte[] bases, int qual) {
this.bases = bases;
public Haplotype( final byte[] bases, final int qual ) {
this.bases = bases.clone();
quals = new double[bases.length];
Arrays.fill(quals, (double)qual);
}
public Haplotype(byte[] bases, double[] quals) {
this.bases = bases;
this.quals = quals;
public Haplotype( final byte[] bases, final double[] quals ) {
this.bases = bases.clone();
this.quals = quals.clone();
}
public Haplotype(byte[] bases) {
public Haplotype( final byte[] bases ) {
this(bases, 0);
}
public Haplotype(byte[] bases, GenomeLoc loc) {
public Haplotype( final byte[] bases, final GenomeLoc loc ) {
this(bases);
this.genomeLocation = loc;
}
@ -140,10 +140,10 @@ public class Haplotype {
}
public double[] getQuals() {
return quals;
return quals.clone();
}
public byte[] getBases() {
return bases;
return bases.clone();
}
public long getStartPosition() {

View File

@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.StingException;
import java.io.Serializable;
import java.util.Comparator;
import java.util.TreeSet;
@ -434,12 +435,14 @@ public class MannWhitneyU {
* A comparator class which uses dithering on tie-breaking to ensure that the internal treeset drops no values
* and to ensure that rank ties are broken at random.
*/
private class DitheringComparator implements Comparator<Pair<Number,USet>> {
private static class DitheringComparator implements Comparator<Pair<Number,USet>>, Serializable {
public DitheringComparator() {}
@Override
public boolean equals(Object other) { return false; }
@Override
public int compare(Pair<Number,USet> left, Pair<Number,USet> right) {
double comp = Double.compare(left.first.doubleValue(),right.first.doubleValue());
if ( comp > 0 ) { return 1; }

View File

@ -15,7 +15,7 @@ import java.util.ArrayList;
* Date: 1/4/12
*/
public class ActiveRegion implements HasGenomeLocation, Comparable<ActiveRegion> {
public class ActiveRegion implements HasGenomeLocation {
private final ArrayList<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
private final GenomeLoc activeRegionLoc;
@ -73,10 +73,6 @@ public class ActiveRegion implements HasGenomeLocation, Comparable<ActiveRegion>
Math.min(referenceReader.getSequenceDictionary().getSequence(fullExtentReferenceLoc.getContig()).getSequenceLength(), fullExtentReferenceLoc.getStop() + padding) ).getBases();
}
@Override
public int compareTo( final ActiveRegion other ) {
return this.getLocation().compareTo(other.getLocation());
}
@Override
public GenomeLoc getLocation() { return activeRegionLoc; }
@ -97,4 +93,19 @@ public class ActiveRegion implements HasGenomeLocation, Comparable<ActiveRegion>
if ( extendedLoc.compareTo(other.extendedLoc) != 0 ) return false;
return true;
}
/**
* A comparator class which is used to sort ActiveRegions by their start location
*/
/*
public static class ActiveRegionStartLocationComparator implements Comparator<ActiveRegion> {
public ActiveRegionStartLocationComparator() {}
@Override
public int compare(final ActiveRegion left, final ActiveRegion right) {
return left.getLocation().compareTo(right.getLocation());
}
}
*/
}