diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java index 6bc6153df..37fc96681 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java @@ -37,6 +37,28 @@ public class VariantsToBinaryPed extends RodWalker { @ArgumentCollection protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); + /** + * The metaData file can take two formats, the first of which is the first 6 lines of the standard ped file. This + * is what Plink describes as a fam file. An example fam file is (note that there is no header): + * + * CEUTrio NA12878 NA12891 NA12892 2 -9 + * CEUTrio NA12891 UNKN1 UNKN2 2 -9 + * CEUTrio NA12892 UNKN3 UNKN4 1 -9 + * + * where the entries are (FamilyID IndividualID DadID MomID Phenotype Sex) + * + * An alternate format is a two-column key-value file + * + * NA12878 fid=CEUTrio;dad=NA12891;mom=NA12892;sex=2;phenotype=-9 + * NA12891 fid=CEUTrio;sex=2;phenotype=-9 + * NA12892 fid=CEUTrio;sex=1;phenotype=-9 + * + * wherein unknown parents needn't be specified. The columns are the individual ID, and a list of key-value pairs. + * + * Regardless of which file is specified, the walker will output a .fam file alongside the bed file. If the + * command line has "-md [name].fam", the fam file will simply be copied. However, if a metadata file of the + * alternate format is passed by "-md [name].txt", the walker will construct a formatted .fam file from the data. + */ @Input(shortName="m",fullName = "metaData",required=true,doc="Sample metadata file. You may specify a .fam file " + "(in which case it will be copied to the file you provide as fam output).") File metaDataFile; diff --git a/public/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java b/public/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java index 8339e38c9..601f90b4d 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java +++ b/public/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java @@ -30,16 +30,26 @@ public class MannWhitneyU { private int sizeSet2; private ExactMode exactMode; - public MannWhitneyU() { - observations = new TreeSet>(new DitheringComparator()); + public MannWhitneyU(ExactMode mode, boolean dither) { + if ( dither ) + observations = new TreeSet>(new DitheringComparator()); + else + observations = new TreeSet>(new NumberedPairComparator()); sizeSet1 = 0; sizeSet2 = 0; - exactMode = ExactMode.POINT; + exactMode = mode; + } + + public MannWhitneyU() { + this(ExactMode.POINT,true); + } + + public MannWhitneyU(boolean dither) { + this(ExactMode.POINT,dither); } public MannWhitneyU(ExactMode mode) { - super(); - exactMode = mode; + this(mode,true); } /** @@ -451,6 +461,22 @@ public class MannWhitneyU { } } + /** + * A comparator that reaches into the pair and compares numbers without tie-braking. + */ + private static class NumberedPairComparator implements Comparator>, Serializable { + + public NumberedPairComparator() {} + + @Override + public boolean equals(Object other) { return false; } + + @Override + public int compare(Pair left, Pair right ) { + return Double.compare(left.first.doubleValue(),right.first.doubleValue()); + } + } + public enum USet { SET1, SET2 } public enum ExactMode { POINT, CUMULATIVE } diff --git a/public/java/test/org/broadinstitute/sting/utils/MWUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/MWUnitTest.java index 6a01bb0b4..edd1bc356 100755 --- a/public/java/test/org/broadinstitute/sting/utils/MWUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/MWUnitTest.java @@ -40,12 +40,15 @@ public class MWUnitTest extends BaseTest { Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu.getObservations(),MannWhitneyU.USet.SET2),11L); MannWhitneyU mwu2 = new MannWhitneyU(); + MannWhitneyU mwuNoDither = new MannWhitneyU(false); for ( int dp : new int[]{2,4,5,6,8} ) { mwu2.add(dp,MannWhitneyU.USet.SET1); + mwuNoDither.add(dp,MannWhitneyU.USet.SET1); } for ( int dp : new int[]{1,3,7,9,10,11,12,13} ) { mwu2.add(dp,MannWhitneyU.USet.SET2); + mwuNoDither.add(dp,MannWhitneyU.USet.SET2); } MannWhitneyU.ExactMode pm = MannWhitneyU.ExactMode.POINT; @@ -54,6 +57,8 @@ public class MWUnitTest extends BaseTest { // tests using the hypothesis that set 2 dominates set 1 (U value = 10) Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu2.getObservations(),MannWhitneyU.USet.SET1),10L); Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu2.getObservations(),MannWhitneyU.USet.SET2),30L); + Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwuNoDither.getObservations(),MannWhitneyU.USet.SET1),10L); + Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwuNoDither.getObservations(),MannWhitneyU.USet.SET2),30L); Pair sizes = mwu2.getSetSizes();