Merge branch 'master' of ssh://gsa4.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2012-08-21 10:35:23 -04:00
commit d0644b3565
14 changed files with 78 additions and 48 deletions

View File

@ -2,6 +2,9 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import org.jgrapht.graph.DefaultDirectedGraph; import org.jgrapht.graph.DefaultDirectedGraph;
import java.io.Serializable;
import java.util.Comparator;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
* User: ebanks * User: ebanks
@ -9,7 +12,7 @@ import org.jgrapht.graph.DefaultDirectedGraph;
*/ */
// simple edge class for connecting nodes in the graph // simple edge class for connecting nodes in the graph
public class DeBruijnEdge implements Comparable<DeBruijnEdge> { public class DeBruijnEdge {
private int multiplicity; private int multiplicity;
private boolean isRef; private boolean isRef;
@ -53,8 +56,10 @@ public class DeBruijnEdge implements Comparable<DeBruijnEdge> {
return (graph.getEdgeSource(this).equals(graph2.getEdgeSource(edge))) && (graph.getEdgeTarget(this).equals(graph2.getEdgeTarget(edge))); return (graph.getEdgeSource(this).equals(graph2.getEdgeSource(edge))) && (graph.getEdgeTarget(this).equals(graph2.getEdgeTarget(edge)));
} }
public static class EdgeWeightComparator implements Comparator<DeBruijnEdge>, Serializable {
@Override @Override
public int compareTo( final DeBruijnEdge that ) { public int compare(final DeBruijnEdge edge1, final DeBruijnEdge edge2) {
return this.multiplicity - that.multiplicity; return edge1.multiplicity - edge2.multiplicity;
}
} }
} }

View File

@ -14,7 +14,7 @@ public class DeBruijnVertex {
public final int kmer; public final int kmer;
public DeBruijnVertex( final byte[] sequence, final int kmer ) { public DeBruijnVertex( final byte[] sequence, final int kmer ) {
this.sequence = sequence; this.sequence = sequence.clone();
this.kmer = kmer; this.kmer = kmer;
} }
@ -37,7 +37,7 @@ public class DeBruijnVertex {
} }
public byte[] getSequence() { public byte[] getSequence() {
return sequence; return sequence.clone();
} }
public byte[] getSuffix() { public byte[] getSuffix() {

View File

@ -4,6 +4,7 @@ import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.jgrapht.graph.DefaultDirectedGraph; import org.jgrapht.graph.DefaultDirectedGraph;
import java.io.Serializable;
import java.util.*; import java.util.*;
/** /**
@ -76,13 +77,15 @@ public class KBestPaths {
} }
} }
protected static class PathComparatorTotalScore implements Comparator<Path> { protected static class PathComparatorTotalScore implements Comparator<Path>, Serializable {
@Override
public int compare(final Path path1, final Path path2) { public int compare(final Path path1, final Path path2) {
return path1.totalScore - path2.totalScore; return path1.totalScore - path2.totalScore;
} }
} }
//protected static class PathComparatorLowestEdge implements Comparator<Path> { //protected static class PathComparatorLowestEdge implements Comparator<Path>, Serializable {
// @Override
// public int compare(final Path path1, final Path path2) { // public int compare(final Path path1, final Path path2) {
// return path2.lowestEdge - path1.lowestEdge; // return path2.lowestEdge - path1.lowestEdge;
// } // }
@ -124,7 +127,7 @@ public class KBestPaths {
// recursively run DFS // recursively run DFS
final ArrayList<DeBruijnEdge> edgeArrayList = new ArrayList<DeBruijnEdge>(); final ArrayList<DeBruijnEdge> edgeArrayList = new ArrayList<DeBruijnEdge>();
edgeArrayList.addAll(graph.outgoingEdgesOf(path.lastVertex)); edgeArrayList.addAll(graph.outgoingEdgesOf(path.lastVertex));
Collections.sort(edgeArrayList); Collections.sort(edgeArrayList, new DeBruijnEdge.EdgeWeightComparator());
Collections.reverse(edgeArrayList); Collections.reverse(edgeArrayList);
for ( final DeBruijnEdge edge : edgeArrayList ) { for ( final DeBruijnEdge edge : edgeArrayList ) {
// make sure the edge is not already in the path // make sure the edge is not already in the path

View File

@ -78,10 +78,10 @@ public class LikelihoodCalculationEngine {
PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH); PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
// for each sample's reads // for each sample's reads
for( final String sample : perSampleReadList.keySet() ) { for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sampleEntry : perSampleReadList.entrySet() ) {
//if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); } //if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); }
// evaluate the likelihood of the reads given those haplotypes // evaluate the likelihood of the reads given those haplotypes
computeReadLikelihoods( haplotypes, perSampleReadList.get(sample), sample, matchMetricArray, XMetricArray, YMetricArray ); computeReadLikelihoods( haplotypes, sampleEntry.getValue(), sampleEntry.getKey(), matchMetricArray, XMetricArray, YMetricArray );
} }
} }

View File

@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.activeregion.ActivityProfile; import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
@ -31,7 +32,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
*/ */
protected final static Logger logger = Logger.getLogger(TraversalEngine.class); protected final static Logger logger = Logger.getLogger(TraversalEngine.class);
private final LinkedList<org.broadinstitute.sting.utils.activeregion.ActiveRegion> workQueue = new LinkedList<org.broadinstitute.sting.utils.activeregion.ActiveRegion>(); private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>(); private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>();
@Override @Override
@ -110,18 +111,18 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
// add these blocks of work to the work queue // add these blocks of work to the work queue
// band-pass filter the list of isActive probabilities and turn into active regions // band-pass filter the list of isActive probabilities and turn into active regions
final ActivityProfile bandPassFiltered = profile.bandPassFilter(); final ActivityProfile bandPassFiltered = profile.bandPassFilter();
final List<org.broadinstitute.sting.utils.activeregion.ActiveRegion> activeRegions = bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ); final List<ActiveRegion> activeRegions = bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize );
// add active regions to queue of regions to process // add active regions to queue of regions to process
// first check if can merge active regions over shard boundaries // first check if can merge active regions over shard boundaries
if( !activeRegions.isEmpty() ) { if( !activeRegions.isEmpty() ) {
if( !workQueue.isEmpty() ) { if( !workQueue.isEmpty() ) {
final org.broadinstitute.sting.utils.activeregion.ActiveRegion last = workQueue.getLast(); final ActiveRegion last = workQueue.getLast();
final org.broadinstitute.sting.utils.activeregion.ActiveRegion first = activeRegions.get(0); final ActiveRegion first = activeRegions.get(0);
if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) { if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
workQueue.removeLast(); workQueue.removeLast();
activeRegions.remove(first); activeRegions.remove(first);
workQueue.add( new org.broadinstitute.sting.utils.activeregion.ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) ); workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) );
} }
} }
workQueue.addAll( activeRegions ); workQueue.addAll( activeRegions );

View File

@ -48,6 +48,7 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.Serializable;
import java.util.*; import java.util.*;
/** /**
@ -111,7 +112,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot
return map; return map;
} }
private static class HaplotypeComparator implements Comparator<Haplotype> { private static class HaplotypeComparator implements Comparator<Haplotype>, Serializable {
public int compare(Haplotype a, Haplotype b) { public int compare(Haplotype a, Haplotype b) {
if (a.getQualitySum() < b.getQualitySum()) if (a.getQualitySum() < b.getQualitySum())

View File

@ -42,7 +42,7 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
protected RecalibrationTables recalibrationTables; protected RecalibrationTables recalibrationTables;
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) { public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) {
this.covariates = covariates; this.covariates = covariates.clone();
this.recalibrationTables = recalibrationTables; this.recalibrationTables = recalibrationTables;
} }

View File

@ -29,10 +29,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.ByteArrayOutputStream; import java.io.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.*; import java.util.*;
/** /**
@ -41,7 +38,7 @@ import java.util.*;
* Date: Mar 10, 2011 * Date: Mar 10, 2011
*/ */
public class Tranche implements Comparable<Tranche> { public class Tranche {
private static final int CURRENT_VERSION = 5; private static final int CURRENT_VERSION = 5;
public double ts, minVQSLod, knownTiTv, novelTiTv; public double ts, minVQSLod, knownTiTv, novelTiTv;
@ -83,10 +80,14 @@ public class Tranche implements Comparable<Tranche> {
return accessibleTruthSites > 0 ? callsAtTruthSites / (1.0*accessibleTruthSites) : 0.0; return accessibleTruthSites > 0 ? callsAtTruthSites / (1.0*accessibleTruthSites) : 0.0;
} }
public int compareTo(Tranche other) { public static class TrancheTruthSensitivityComparator implements Comparator<Tranche>, Serializable {
return Double.compare(this.ts, other.ts); @Override
public int compare(final Tranche tranche1, final Tranche tranche2) {
return Double.compare(tranche1.ts, tranche2.ts);
}
} }
@Override
public String toString() { public String toString() {
return String.format("Tranche ts=%.2f minVQSLod=%.4f known=(%d @ %.4f) novel=(%d @ %.4f) truthSites(%d accessible, %d called), name=%s]", return String.format("Tranche ts=%.2f minVQSLod=%.4f known=(%d @ %.4f) novel=(%d @ %.4f) truthSites(%d accessible, %d called), name=%s]",
ts, minVQSLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, callsAtTruthSites, name); ts, minVQSLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, callsAtTruthSites, name);
@ -102,7 +103,7 @@ public class Tranche implements Comparable<Tranche> {
final ByteArrayOutputStream bytes = new ByteArrayOutputStream(); final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
final PrintStream stream = new PrintStream(bytes); final PrintStream stream = new PrintStream(bytes);
Collections.sort(tranches); Collections.sort( tranches, new TrancheTruthSensitivityComparator() );
stream.println("# Variant quality score tranches file"); stream.println("# Variant quality score tranches file");
stream.println("# Version number " + CURRENT_VERSION); stream.println("# Version number " + CURRENT_VERSION);
@ -183,7 +184,7 @@ public class Tranche implements Comparable<Tranche> {
} }
} }
Collections.sort(tranches); Collections.sort( tranches, new TrancheTruthSensitivityComparator() );
return tranches; return tranches;
} catch( FileNotFoundException e ) { } catch( FileNotFoundException e ) {
throw new UserException.CouldNotReadInputFile(f, e); throw new UserException.CouldNotReadInputFile(f, e);

View File

@ -146,7 +146,7 @@ public class TrancheManager {
public static List<Tranche> findTranches( final ArrayList<VariantDatum> data, final double[] trancheThresholds, final SelectionMetric metric, final VariantRecalibratorArgumentCollection.Mode model, final File debugFile ) { public static List<Tranche> findTranches( final ArrayList<VariantDatum> data, final double[] trancheThresholds, final SelectionMetric metric, final VariantRecalibratorArgumentCollection.Mode model, final File debugFile ) {
logger.info(String.format("Finding %d tranches for %d variants", trancheThresholds.length, data.size())); logger.info(String.format("Finding %d tranches for %d variants", trancheThresholds.length, data.size()));
Collections.sort(data); Collections.sort( data, new VariantDatum.VariantDatumLODComparator() );
metric.calculateRunningMetric(data); metric.calculateRunningMetric(data);
if ( debugFile != null) { writeTranchesDebuggingInfo(debugFile, data, metric); } if ( debugFile != null) { writeTranchesDebuggingInfo(debugFile, data, metric); }

View File

@ -158,7 +158,7 @@ public class VariantDataManager {
logger.info( "Found " + numBadSitesAdded + " variants overlapping bad sites training tracks." ); logger.info( "Found " + numBadSitesAdded + " variants overlapping bad sites training tracks." );
// Next sort the variants by the LOD coming from the positive model and add to the list the bottom X percent of variants // Next sort the variants by the LOD coming from the positive model and add to the list the bottom X percent of variants
Collections.sort( data ); Collections.sort( data, new VariantDatum.VariantDatumLODComparator() );
final int numToAdd = Math.max( minimumNumber - trainingData.size(), Math.round((float)bottomPercentage * data.size()) ); final int numToAdd = Math.max( minimumNumber - trainingData.size(), Math.round((float)bottomPercentage * data.size()) );
if( numToAdd > data.size() ) { if( numToAdd > data.size() ) {
throw new UserException.BadInput( "Error during negative model training. Minimum number of variants to use in training is larger than the whole call set. One can attempt to lower the --minNumBadVariants arugment but this is unsafe." ); throw new UserException.BadInput( "Error during negative model training. Minimum number of variants to use in training is larger than the whole call set. One can attempt to lower the --minNumBadVariants arugment but this is unsafe." );

View File

@ -27,13 +27,16 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import java.io.Serializable;
import java.util.Comparator;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
* User: rpoplin * User: rpoplin
* Date: Mar 4, 2011 * Date: Mar 4, 2011
*/ */
public class VariantDatum implements Comparable<VariantDatum> { public class VariantDatum {
public double[] annotations; public double[] annotations;
public boolean[] isNull; public boolean[] isNull;
@ -52,8 +55,10 @@ public class VariantDatum implements Comparable<VariantDatum> {
public int worstAnnotation; public int worstAnnotation;
public MultivariateGaussian assignment; // used in K-means implementation public MultivariateGaussian assignment; // used in K-means implementation
public static class VariantDatumLODComparator implements Comparator<VariantDatum>, Serializable {
@Override @Override
public int compareTo( final VariantDatum other ) { public int compare(final VariantDatum datum1, final VariantDatum datum2) {
return Double.compare(this.lod, other.lod); return Double.compare(datum1.lod, datum2.lod);
}
} }
} }

View File

@ -55,22 +55,22 @@ public class Haplotype {
* @param bases bases * @param bases bases
* @param qual qual * @param qual qual
*/ */
public Haplotype(byte[] bases, int qual) { public Haplotype( final byte[] bases, final int qual ) {
this.bases = bases; this.bases = bases.clone();
quals = new double[bases.length]; quals = new double[bases.length];
Arrays.fill(quals, (double)qual); Arrays.fill(quals, (double)qual);
} }
public Haplotype(byte[] bases, double[] quals) { public Haplotype( final byte[] bases, final double[] quals ) {
this.bases = bases; this.bases = bases.clone();
this.quals = quals; this.quals = quals.clone();
} }
public Haplotype(byte[] bases) { public Haplotype( final byte[] bases ) {
this(bases, 0); this(bases, 0);
} }
public Haplotype(byte[] bases, GenomeLoc loc) { public Haplotype( final byte[] bases, final GenomeLoc loc ) {
this(bases); this(bases);
this.genomeLocation = loc; this.genomeLocation = loc;
} }
@ -140,10 +140,10 @@ public class Haplotype {
} }
public double[] getQuals() { public double[] getQuals() {
return quals; return quals.clone();
} }
public byte[] getBases() { public byte[] getBases() {
return bases; return bases.clone();
} }
public long getStartPosition() { public long getStartPosition() {

View File

@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.StingException;
import java.io.Serializable;
import java.util.Comparator; import java.util.Comparator;
import java.util.TreeSet; import java.util.TreeSet;
@ -434,12 +435,14 @@ public class MannWhitneyU {
* A comparator class which uses dithering on tie-breaking to ensure that the internal treeset drops no values * A comparator class which uses dithering on tie-breaking to ensure that the internal treeset drops no values
* and to ensure that rank ties are broken at random. * and to ensure that rank ties are broken at random.
*/ */
private class DitheringComparator implements Comparator<Pair<Number,USet>> { private static class DitheringComparator implements Comparator<Pair<Number,USet>>, Serializable {
public DitheringComparator() {} public DitheringComparator() {}
@Override
public boolean equals(Object other) { return false; } public boolean equals(Object other) { return false; }
@Override
public int compare(Pair<Number,USet> left, Pair<Number,USet> right) { public int compare(Pair<Number,USet> left, Pair<Number,USet> right) {
double comp = Double.compare(left.first.doubleValue(),right.first.doubleValue()); double comp = Double.compare(left.first.doubleValue(),right.first.doubleValue());
if ( comp > 0 ) { return 1; } if ( comp > 0 ) { return 1; }

View File

@ -15,7 +15,7 @@ import java.util.ArrayList;
* Date: 1/4/12 * Date: 1/4/12
*/ */
public class ActiveRegion implements HasGenomeLocation, Comparable<ActiveRegion> { public class ActiveRegion implements HasGenomeLocation {
private final ArrayList<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>(); private final ArrayList<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
private final GenomeLoc activeRegionLoc; private final GenomeLoc activeRegionLoc;
@ -73,10 +73,6 @@ public class ActiveRegion implements HasGenomeLocation, Comparable<ActiveRegion>
Math.min(referenceReader.getSequenceDictionary().getSequence(fullExtentReferenceLoc.getContig()).getSequenceLength(), fullExtentReferenceLoc.getStop() + padding) ).getBases(); Math.min(referenceReader.getSequenceDictionary().getSequence(fullExtentReferenceLoc.getContig()).getSequenceLength(), fullExtentReferenceLoc.getStop() + padding) ).getBases();
} }
@Override
public int compareTo( final ActiveRegion other ) {
return this.getLocation().compareTo(other.getLocation());
}
@Override @Override
public GenomeLoc getLocation() { return activeRegionLoc; } public GenomeLoc getLocation() { return activeRegionLoc; }
@ -97,4 +93,19 @@ public class ActiveRegion implements HasGenomeLocation, Comparable<ActiveRegion>
if ( extendedLoc.compareTo(other.extendedLoc) != 0 ) return false; if ( extendedLoc.compareTo(other.extendedLoc) != 0 ) return false;
return true; return true;
} }
/**
* A comparator class which is used to sort ActiveRegions by their start location
*/
/*
public static class ActiveRegionStartLocationComparator implements Comparator<ActiveRegion> {
public ActiveRegionStartLocationComparator() {}
@Override
public int compare(final ActiveRegion left, final ActiveRegion right) {
return left.getLocation().compareTo(right.getLocation());
}
}
*/
} }