Balancing training classes between SNP/Indel and TP/FP.

-- This results in much more consistent distribution of LOD scores for SNPs and Indels.
-- Removing genotype summary stats since they are now produced by default.
-- Added functionality to specify certain subsets of the training data to be used in Tranche file generation, -good:tranche=true set.vcf
This commit is contained in:
Ryan Poplin 2014-03-12 10:19:16 -04:00
parent de2a2442d9
commit 7d11b4d5f1
1 changed files with 3 additions and 0 deletions

View File

@ -1537,6 +1537,9 @@ public class MathUtils {
* @param N - the number of elements to draw
*/
public static <T> List<T> randomSample(final List<T> list, final int N) {
if (list.isEmpty() ) {
return list;
}
return sliceListByIndices(sampleIndicesWithReplacement(list.size(),N),list);
}