Merge branch 'master' of ssh://gsa2/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Eric Banks 2012-09-20 14:14:58 -04:00
commit 747694f7c2
3 changed files with 58 additions and 5 deletions

View File

@ -37,6 +37,28 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
@ArgumentCollection @ArgumentCollection
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
/**
* The metaData file can take two formats, the first of which is the first 6 lines of the standard ped file. This
* is what Plink describes as a fam file. An example fam file is (note that there is no header):
*
* CEUTrio NA12878 NA12891 NA12892 2 -9
* CEUTrio NA12891 UNKN1 UNKN2 2 -9
* CEUTrio NA12892 UNKN3 UNKN4 1 -9
*
* where the entries are (FamilyID IndividualID DadID MomID Phenotype Sex)
*
* An alternate format is a two-column key-value file
*
* NA12878 fid=CEUTrio;dad=NA12891;mom=NA12892;sex=2;phenotype=-9
* NA12891 fid=CEUTrio;sex=2;phenotype=-9
* NA12892 fid=CEUTrio;sex=1;phenotype=-9
*
* wherein unknown parents needn't be specified. The columns are the individual ID, and a list of key-value pairs.
*
* Regardless of which file is specified, the walker will output a .fam file alongside the bed file. If the
* command line has "-md [name].fam", the fam file will simply be copied. However, if a metadata file of the
* alternate format is passed by "-md [name].txt", the walker will construct a formatted .fam file from the data.
*/
@Input(shortName="m",fullName = "metaData",required=true,doc="Sample metadata file. You may specify a .fam file " + @Input(shortName="m",fullName = "metaData",required=true,doc="Sample metadata file. You may specify a .fam file " +
"(in which case it will be copied to the file you provide as fam output).") "(in which case it will be copied to the file you provide as fam output).")
File metaDataFile; File metaDataFile;

View File

@ -30,16 +30,26 @@ public class MannWhitneyU {
private int sizeSet2; private int sizeSet2;
private ExactMode exactMode; private ExactMode exactMode;
public MannWhitneyU() { public MannWhitneyU(ExactMode mode, boolean dither) {
observations = new TreeSet<Pair<Number,USet>>(new DitheringComparator()); if ( dither )
observations = new TreeSet<Pair<Number,USet>>(new DitheringComparator());
else
observations = new TreeSet<Pair<Number,USet>>(new NumberedPairComparator());
sizeSet1 = 0; sizeSet1 = 0;
sizeSet2 = 0; sizeSet2 = 0;
exactMode = ExactMode.POINT; exactMode = mode;
}
public MannWhitneyU() {
this(ExactMode.POINT,true);
}
public MannWhitneyU(boolean dither) {
this(ExactMode.POINT,dither);
} }
public MannWhitneyU(ExactMode mode) { public MannWhitneyU(ExactMode mode) {
super(); this(mode,true);
exactMode = mode;
} }
/** /**
@ -451,6 +461,22 @@ public class MannWhitneyU {
} }
} }
/**
* A comparator that reaches into the pair and compares numbers without tie-braking.
*/
private static class NumberedPairComparator implements Comparator<Pair<Number,USet>>, Serializable {
public NumberedPairComparator() {}
@Override
public boolean equals(Object other) { return false; }
@Override
public int compare(Pair<Number,USet> left, Pair<Number,USet> right ) {
return Double.compare(left.first.doubleValue(),right.first.doubleValue());
}
}
public enum USet { SET1, SET2 } public enum USet { SET1, SET2 }
public enum ExactMode { POINT, CUMULATIVE } public enum ExactMode { POINT, CUMULATIVE }

View File

@ -40,12 +40,15 @@ public class MWUnitTest extends BaseTest {
Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu.getObservations(),MannWhitneyU.USet.SET2),11L); Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu.getObservations(),MannWhitneyU.USet.SET2),11L);
MannWhitneyU mwu2 = new MannWhitneyU(); MannWhitneyU mwu2 = new MannWhitneyU();
MannWhitneyU mwuNoDither = new MannWhitneyU(false);
for ( int dp : new int[]{2,4,5,6,8} ) { for ( int dp : new int[]{2,4,5,6,8} ) {
mwu2.add(dp,MannWhitneyU.USet.SET1); mwu2.add(dp,MannWhitneyU.USet.SET1);
mwuNoDither.add(dp,MannWhitneyU.USet.SET1);
} }
for ( int dp : new int[]{1,3,7,9,10,11,12,13} ) { for ( int dp : new int[]{1,3,7,9,10,11,12,13} ) {
mwu2.add(dp,MannWhitneyU.USet.SET2); mwu2.add(dp,MannWhitneyU.USet.SET2);
mwuNoDither.add(dp,MannWhitneyU.USet.SET2);
} }
MannWhitneyU.ExactMode pm = MannWhitneyU.ExactMode.POINT; MannWhitneyU.ExactMode pm = MannWhitneyU.ExactMode.POINT;
@ -54,6 +57,8 @@ public class MWUnitTest extends BaseTest {
// tests using the hypothesis that set 2 dominates set 1 (U value = 10) // tests using the hypothesis that set 2 dominates set 1 (U value = 10)
Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu2.getObservations(),MannWhitneyU.USet.SET1),10L); Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu2.getObservations(),MannWhitneyU.USet.SET1),10L);
Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu2.getObservations(),MannWhitneyU.USet.SET2),30L); Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu2.getObservations(),MannWhitneyU.USet.SET2),30L);
Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwuNoDither.getObservations(),MannWhitneyU.USet.SET1),10L);
Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwuNoDither.getObservations(),MannWhitneyU.USet.SET2),30L);
Pair<Integer,Integer> sizes = mwu2.getSetSizes(); Pair<Integer,Integer> sizes = mwu2.getSetSizes();