Reorganization of Sample

-- Moved Gender and Afflication to separate public enums
-- PedReader 90% implemented
-- Improve interface cleanup to XReadLines and UserException
This commit is contained in:
Mark DePristo 2011-09-30 15:50:54 -04:00
parent c1cf6bc45a
commit 84160bd83f
12 changed files with 386 additions and 136 deletions

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.samples;
/**
* Categorical sample trait for association and analysis
*
* Samples can have unknown status, be affected or unaffected by the
* categorical trait, or they can be marked as actually having a
* quantitative trait value (stored in an associated value in the Sample class)
*
* @author Mark DePristo
* @since Sept. 2011
*/
public enum Affection {
/** Status is unknown */
UNKNOWN,
/** Suffers from the disease */
AFFECTED,
/** Unaffected by the disease */
UNAFFECTED,
/** A quantitative trait: value of the trait is stored elsewhere */
QUANTITATIVE
}

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.samples;
/**
* ENUM of possible human genders: male, female, or unknown
*/
public enum Gender {
MALE,
FEMALE,
UNKNOWN
}

View File

@ -32,6 +32,8 @@ import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.Reader;
import java.util.*;
/**
@ -115,10 +117,6 @@ public class PedReader {
final static private Set<String> CATAGORICAL_TRAIT_VALUES = new HashSet<String>(Arrays.asList("-9", "0", "1", "2"));
final static private String commentMarker = "#";
private final File source;
private final List<PedRecord> records;
public enum MissingPedFields {
NO_FAMILY_ID,
NO_PARENTS,
@ -127,8 +125,8 @@ public class PedReader {
}
// phenotype
private final static String PHENOTYPE_MISSING_VALUE = "-9";
private final static String PHENOTYPE_MISSING_VALUE_SECONDARY = "0";
private final static String MISSING_VALUE1 = "-9";
private final static String MISSING_VALUE2 = "0";
private final static String PHENOTYPE_UNAFFECTED = "1";
private final static String PHENOTYPE_AFFECTED = "2";
@ -137,14 +135,15 @@ public class PedReader {
private final static String SEX_FEMALE = "2";
// other=unknown
public PedReader(File source, EnumSet<MissingPedFields> missingFields) throws FileNotFoundException {
this.source = source;
List<String> lines = new XReadLines(source).readLines();
this.records = parsePedLines(lines, missingFields);
public PedReader() { }
public final List<Sample> parse(File source, EnumSet<MissingPedFields> missingFields, SampleDataSource sampleDB) throws FileNotFoundException {
logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
return parse(new FileReader(source), missingFields, sampleDB);
}
private final List<PedRecord> parsePedLines(final List<String> lines, EnumSet<MissingPedFields> missingFields) {
logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
public final List<Sample> parse(Reader reader, EnumSet<MissingPedFields> missingFields, SampleDataSource sampleDB) {
final List<String> lines = new XReadLines(reader).readLines();
// What are the record offsets?
final int familyPos = missingFields.contains(MissingPedFields.NO_FAMILY_ID) ? -1 : 0;
@ -153,7 +152,7 @@ public class PedReader {
final int maternalPos = missingFields.contains(MissingPedFields.NO_PARENTS) ? -1 : paternalPos + 1;
final int sexPos = missingFields.contains(MissingPedFields.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
final int phenotypePos = missingFields.contains(MissingPedFields.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos));
final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1;
// go through once and determine properties
int lineNo = 1;
@ -164,7 +163,7 @@ public class PedReader {
String[] parts = line.split("\\W+");
if ( parts.length != nExpectedFields )
throw new UserException.MalformedFile(source, "Bad PED line " + lineNo + ": wrong number of fields");
throw new UserException.MalformedFile(reader.toString(), "Bad PED line " + lineNo + ": wrong number of fields");
if ( phenotypePos != -1 ) {
isQT = isQT || CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]);
@ -177,75 +176,55 @@ public class PedReader {
// now go through and parse each record
lineNo = 1;
final List<PedRecord> recs = new ArrayList<PedRecord>(splits.size());
final List<Sample> samples = new ArrayList<Sample>(splits.size());
for ( final String[] parts : splits ) {
String familyID = null, individualID, paternalID = null, maternalID = null;
Sample.Gender sex = Sample.Gender.UNKNOWN;
double quantitativePhenotype = Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE;
Sample.Affection affection = Sample.Affection.UNKNOWN;
Gender sex = Gender.UNKNOWN;
double quantitativePhenotype = Sample.UNSET_QT;
Affection affection = Affection.UNKNOWN;
if ( familyPos != -1 ) familyID = parts[familyPos];
if ( familyPos != -1 ) familyID = maybeMissing(parts[familyPos]);
individualID = parts[samplePos];
if ( paternalPos != -1 ) paternalID = parts[paternalPos];
if ( maternalPos != -1 ) maternalID = parts[maternalPos];
if ( paternalPos != -1 ) paternalID = maybeMissing(parts[paternalPos]);
if ( maternalPos != -1 ) maternalID = maybeMissing(parts[maternalPos]);
if ( sexPos != -1 ) {
if ( parts[sexPos].equals(SEX_MALE) ) sex = Sample.Gender.MALE;
else if ( parts[sexPos].equals(SEX_FEMALE) ) sex = Sample.Gender.FEMALE;
else sex = Sample.Gender.UNKNOWN;
if ( parts[sexPos].equals(SEX_MALE) ) sex = Gender.MALE;
else if ( parts[sexPos].equals(SEX_FEMALE) ) sex = Gender.FEMALE;
else sex = Gender.UNKNOWN;
}
if ( phenotypePos != -1 ) {
if ( isQT ) {
if ( parts[phenotypePos].equals(PHENOTYPE_MISSING_VALUE) )
affection = Sample.Affection.UNKNOWN;
if ( parts[phenotypePos].equals(MISSING_VALUE1) )
affection = Affection.UNKNOWN;
else {
affection = Sample.Affection.QUANTITATIVE;
affection = Affection.QUANTITATIVE;
quantitativePhenotype = Double.valueOf(parts[phenotypePos]);
}
} else {
if ( parts[phenotypePos].equals(PHENOTYPE_MISSING_VALUE) ) affection = Sample.Affection.UNKNOWN;
else if ( parts[phenotypePos].equals(PHENOTYPE_MISSING_VALUE_SECONDARY) ) affection = Sample.Affection.UNKNOWN;
else if ( parts[phenotypePos].equals(PHENOTYPE_UNAFFECTED) ) affection = Sample.Affection.UNAFFECTED;
else if ( parts[phenotypePos].equals(PHENOTYPE_AFFECTED) ) affection = Sample.Affection.AFFECTED;
if ( parts[phenotypePos].equals(MISSING_VALUE1) ) affection = Affection.UNKNOWN;
else if ( parts[phenotypePos].equals(MISSING_VALUE2) ) affection = Affection.UNKNOWN;
else if ( parts[phenotypePos].equals(PHENOTYPE_UNAFFECTED) ) affection = Affection.UNAFFECTED;
else if ( parts[phenotypePos].equals(PHENOTYPE_AFFECTED) ) affection = Affection.AFFECTED;
else throw new ReviewedStingException("Unexpected phenotype type " + parts[phenotypePos] + " at line " + lineNo);
}
}
recs.add(new PedRecord(familyID, individualID, paternalID, maternalID, sex, quantitativePhenotype, affection));
final Sample s = new Sample(familyID, sampleDB, individualID, paternalID, maternalID, sex, affection, quantitativePhenotype);
samples.add(s);
sampleDB.addSample(s);
lineNo++;
}
return Collections.unmodifiableList(recs);
sampleDB.validate(samples);
return samples;
}
public List<PedRecord> getRecords() {
return records;
}
public void fillSampleDB(SampleDataSource db) {
for ( final PedRecord rec : getRecords() ) {
}
}
}
class PedRecord {
final String familyID, individualID, paternalID, maternalID;
final Sample.Gender sex;
final double quantitativePhenotype;
final Sample.Affection affection;
PedRecord(final String familyID, final String individualID,
final String paternalID, final String maternalID,
final Sample.Gender sex,
final double quantitativePhenotype, final Sample.Affection affection) {
this.familyID = familyID;
this.individualID = individualID;
this.paternalID = paternalID;
this.maternalID = maternalID;
this.sex = sex;
this.quantitativePhenotype = quantitativePhenotype;
this.affection = affection;
private final static String maybeMissing(final String string) {
if ( string.equals(MISSING_VALUE1) || string.equals(MISSING_VALUE2) )
return null;
else
return string;
}
}

View File

@ -10,37 +10,18 @@ import java.util.Map;
*/
public class Sample implements java.io.Serializable {
final private String familyID, paternalID, maternalID;
final private Sample.Gender gender;
final private Gender gender;
final private double quantitativePhenotype;
final private Sample.Affection affection;
final private Affection affection;
final private String ID;
final private SampleDataSource dataSource;
final private Map<String, Object> properties = new HashMap<String, Object>();
// todo -- conditionally add the property map -- should be empty by default
private final Map<String, Object> properties = new HashMap<String, Object>();
public enum Gender {
MALE,
FEMALE,
UNKNOWN
}
public enum Affection {
/** Status is unknown */
UNKNOWN,
/** Suffers from the disease */
AFFECTED,
/** Unaffected by the disease */
UNAFFECTED,
/** A quantitative trait: value of the trait is stored elsewhere */
QUANTITATIVE
}
public final static double UNSET_QUANTITIATIVE_TRAIT_VALUE = Double.NaN;
public final static double UNSET_QT = Double.NaN;
public Sample(final String ID, final SampleDataSource dataSource,
final String familyID, final String paternalID, final String maternalID,
final Gender gender, final double quantitativePhenotype, final Affection affection) {
final Gender gender, final Affection affection, final double quantitativePhenotype) {
this.familyID = familyID;
this.paternalID = paternalID;
this.maternalID = maternalID;
@ -51,20 +32,31 @@ public class Sample implements java.io.Serializable {
this.dataSource = dataSource;
}
public Sample(final String ID, final SampleDataSource dataSource,
final String familyID, final String paternalID, final String maternalID, final Gender gender) {
this(ID, dataSource, familyID, paternalID, maternalID, gender,
UNSET_QUANTITIATIVE_TRAIT_VALUE, Affection.UNKNOWN);
protected Sample(final String ID,
final String familyID, final String paternalID, final String maternalID,
final Gender gender, final Affection affection, final double quantitativePhenotype) {
this(ID, null, familyID, paternalID, maternalID, gender, affection, quantitativePhenotype);
}
public Sample(final String ID, final SampleDataSource dataSource, final double quantitativePhenotype, final Affection affection) {
this(ID, dataSource, null, null, null, Gender.UNKNOWN, quantitativePhenotype, affection);
protected Sample(final String ID,
final String familyID, final String paternalID, final String maternalID,
final Gender gender, final Affection affection) {
this(ID, null, familyID, paternalID, maternalID, gender, affection, UNSET_QT);
}
public Sample(final String ID, final SampleDataSource dataSource,
final String familyID, final String paternalID, final String maternalID, final Gender gender) {
this(ID, dataSource, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT);
}
public Sample(final String ID, final SampleDataSource dataSource, final Affection affection, final double quantitativePhenotype) {
this(ID, dataSource, null, null, null, Gender.UNKNOWN, affection, quantitativePhenotype);
}
public Sample(String id, SampleDataSource dataSource) {
this(id, dataSource,
null, null, null,
Gender.UNKNOWN, UNSET_QUANTITIATIVE_TRAIT_VALUE, Affection.UNKNOWN);
this(id, dataSource, null, null, null,
Gender.UNKNOWN, Affection.UNKNOWN, UNSET_QT);
}
// -------------------------------------------------------------------------------------
@ -77,7 +69,6 @@ public class Sample implements java.io.Serializable {
return ID;
}
public String getFamilyID() {
return familyID;
}
@ -157,21 +148,4 @@ public class Sample implements java.io.Serializable {
public boolean hasExtraProperty(String key) {
return properties.containsKey(key);
}
// @Override
// public boolean equals(Object o) {
// if (this == o) return true;
// if (o == null || getClass() != o.getClass()) return false;
//
// Sample sample = (Sample) o;
// if (ID != null ? !ID.equals(sample.ID) : sample.ID != null) return false;
// if (properties != null ? !properties.equals(sample.properties) : sample.properties != null) return false;
//
// return true;
// }
//
// @Override
// public int hashCode() {
// return ID != null ? ID.hashCode() : "".hashCode();
// }
}

View File

@ -55,7 +55,7 @@ public class SampleDataSource {
/**
* Hallucinates sample objects for all the samples in the SAM file and stores them
*/
public SampleDataSource addSamples(SAMFileHeader header) {
protected SampleDataSource addSamples(SAMFileHeader header) {
for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
if (getSample(sampleName) == null) {
Sample newSample = new Sample(sampleName, this);
@ -65,7 +65,7 @@ public class SampleDataSource {
return this;
}
public SampleDataSource addSamples(final List<File> sampleFiles) {
protected SampleDataSource addSamples(final List<File> sampleFiles) {
// add files consecutively
for (File file : sampleFiles) {
addSamples(file);
@ -77,7 +77,7 @@ public class SampleDataSource {
* Parse one sample file and integrate it with samples that are already there
* Fail quickly if we find any errors in the file
*/
public SampleDataSource addSamples(File sampleFile) {
protected SampleDataSource addSamples(File sampleFile) {
return this;
}
@ -85,7 +85,7 @@ public class SampleDataSource {
* Add a sample to the collection
* @param sample to be added
*/
private SampleDataSource addSample(Sample sample) {
protected SampleDataSource addSample(Sample sample) {
samples.put(sample.getID(), sample);
return this;
}
@ -138,8 +138,6 @@ public class SampleDataSource {
//
// --------------------------------------------------------------------------------
/**
* Get number of sample objects
* @return size of samples map
@ -209,4 +207,18 @@ public class SampleDataSource {
}
return samples;
}
// --------------------------------------------------------------------------------
//
// Validation
//
// --------------------------------------------------------------------------------
public final void validate() {
validate(getSamples());
}
public final void validate(Collection<Sample> samplesToCheck) {
}
}

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.samples.Gender;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VQSRCalibrationCurve;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -248,7 +248,7 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
Map<String,Genotype> preferredGenotypes = preferredVC.getGenotypes();
Map<String,Genotype> otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null;
for ( String sample : samples ) {
boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Sample.Gender.MALE;
boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE;
Genotype genotype;
boolean isValidation;

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.samples.Gender;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
@ -41,7 +42,7 @@ import org.broadinstitute.sting.gatk.walkers.Requires;
public class CountMalesWalker extends ReadWalker<Integer, Integer> {
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
Sample sample = getSampleDB().getSample(read);
return sample.getGender() == Sample.Gender.MALE ? 1 : 0;
return sample.getGender() == Gender.MALE ? 1 : 0;
}
public Integer reduceInit() { return 0; }

View File

@ -215,6 +215,10 @@ public class UserException extends ReviewedStingException {
super(String.format("File %s is malformed: %s caused by %s", f.getAbsolutePath(), message, e.getMessage()));
}
public MalformedFile(String name, String message) {
super(String.format("File associated with name %s is malformed: %s", name, message));
}
public MalformedFile(String name, String message, Exception e) {
super(String.format("File associated with name %s is malformed: %s caused by %s", name, message, e.getMessage()));
}

View File

@ -99,9 +99,9 @@ public class XReadLines implements Iterator<String>, Iterable<String> {
*
* @param reader
*/
public XReadLines(final BufferedReader reader, final boolean trimWhitespace) {
public XReadLines(final Reader reader, final boolean trimWhitespace) {
try {
this.in = reader;
this.in = new BufferedReader(reader);
nextline = readNextLine();
this.trimWhitespace = trimWhitespace;
} catch(IOException e) {
@ -109,7 +109,7 @@ public class XReadLines implements Iterator<String>, Iterable<String> {
}
}
public XReadLines(final BufferedReader reader) throws FileNotFoundException {
public XReadLines(final Reader reader) {
this(reader, true);
}

View File

@ -40,6 +40,7 @@ import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.baq.BAQ;
import java.io.File;
import java.util.Collections;
import java.util.Iterator;
@ -85,8 +86,6 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
(byte)0);
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
SampleDataSource sampleDataSource = new SampleDataSource().addSamples(reader.getFileHeader());
// Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());

View File

@ -0,0 +1,201 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.samples;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.StringReader;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
/**
* UnitTest for PedReader
*
* @author Mark DePristo
* @since 2011
*/
public class PedReaderUnitTest extends BaseTest {
private static Logger logger = Logger.getLogger(PedReaderUnitTest.class);
private class PedReaderTest extends TestDataProvider {
public String fileContents;
public List<Sample> expectedSamples;
private PedReaderTest(final String name, final List<Sample> expectedSamples, final String fileContents) {
super(PedReaderTest.class, name);
this.fileContents = fileContents;
this.expectedSamples = expectedSamples;
}
}
// Family ID
// Individual ID
// Paternal ID
// Maternal ID
// Sex (1=male; 2=female; other=unknown)
// Phenotype
//
// -9 missing
// 0 missing
// 1 unaffected
// 2 affected
@DataProvider(name = "readerTest")
public Object[][] createPEDFiles() {
new PedReaderTest("singleRecordMale",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED)),
"fam1 kid 0 0 1 1");
new PedReaderTest("singleRecordFemale",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.FEMALE, Affection.UNAFFECTED)),
"fam1 kid 0 0 2 1");
new PedReaderTest("singleRecordMissingGender",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.UNKNOWN, Affection.UNKNOWN)),
"fam1 kid 0 0 0 0");
// Affection
new PedReaderTest("singleRecordAffected",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.AFFECTED)),
"fam1 kid 0 0 1 2");
new PedReaderTest("singleRecordUnaffected",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED)),
"fam1 kid 0 0 1 1");
new PedReaderTest("singleRecordMissingAffection-9",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNKNOWN)),
"fam1 kid 0 0 1 -9");
new PedReaderTest("singleRecordMissingAffection0",
Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNKNOWN)),
"fam1 kid 0 0 1 0");
new PedReaderTest("multipleUnrelated",
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.AFFECTED),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.UNAFFECTED)),
String.format("%s\n%s",
"fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 2"));
new PedReaderTest("explicitTrio",
Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)),
String.format("%s\n%s\n%s",
"fam1 kid dad mom 1 2",
"fam1 dad 0 0 1 1",
"fam1 mom 0 0 2 2"));
new PedReaderTest("implicitTrio",
Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
"fam1 kid dad mom 1 1");
new PedReaderTest("partialTrio",
Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED),
new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
String.format("%s\n%s",
"fam1 kid dad mom 1 2",
"fam1 dad 0 0 1 1"));
new PedReaderTest("bigPedigree",
Arrays.asList(
new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED),
new Sample("dad", "fam1", "granddad1", "grandma1", Gender.MALE, Affection.UNAFFECTED),
new Sample("granddad1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("grandma1", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN),
new Sample("mom", "fam1", "granddad2", "grandma2", Gender.FEMALE, Affection.AFFECTED),
new Sample("granddad2", "fam1", null, null, Gender.MALE, Affection.UNKNOWN),
new Sample("grandma2", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)),
String.format("%s\n%s\n%s",
"fam1 kid dad mom 1 2",
"fam1 dad granddad1 grandma1 1 1",
"fam1 mom granddad2 grandma2 2 2"));
// Quantitative trait
new PedReaderTest("QuantitativeTrait",
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
String.format("%s\n%s",
"fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 10.0"));
new PedReaderTest("QuantitativeTraitWithMissing",
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN, Sample.UNSET_QT),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
String.format("%s\n%s",
"fam1 s1 0 0 1 -9",
"fam2 s2 0 0 2 10.0"));
new PedReaderTest("QuantitativeTraitOnlyInts",
Arrays.asList(
new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0),
new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)),
String.format("%s\n%s",
"fam1 s1 0 0 1 1",
"fam2 s2 0 0 2 10"));
return PedReaderTest.getTests(PedReaderTest.class);
}
private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedFields> missing) {
logger.warn("Test " + test);
PedReader reader = new PedReader();
SampleDataSource sampleDB = new SampleDataSource();
List<Sample> readSamples = reader.parse(new StringReader(myFileContents), missing, sampleDB);
Assert.assertEquals(test.expectedSamples, readSamples, "Parsed incorrect number of samples");
}
@Test(enabled = true, dataProvider = "readerTest")
public void testPedReader(PedReaderTest test) {
runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class));
}
@Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader")
public void testPedReaderWithComments(PedReaderTest test) {
runTest(test, "#comment\n" + test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class));
}
@Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader")
public void testPedReaderWithMissing(PedReaderTest test) {
// todo -- test MISSING by splicing strings
//runTest(test, "#comment\n" + test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class));
}
}

View File

@ -22,17 +22,17 @@ public class SampleUnitTest extends BaseTest {
public void init() {
db = new SampleDataSource();
fam1A = new Sample("1A", db, "fam1", "1B", "1C", Sample.Gender.UNKNOWN);
fam1B = new Sample("1B", db, "fam1", null, null, Sample.Gender.MALE);
fam1C = new Sample("1C", db, "fam1", null, null, Sample.Gender.FEMALE);
fam1A = new Sample("1A", db, "fam1", "1B", "1C", Gender.UNKNOWN);
fam1B = new Sample("1B", db, "fam1", null, null, Gender.MALE);
fam1C = new Sample("1C", db, "fam1", null, null, Gender.FEMALE);
s1 = new Sample("s1", db);
s2 = new Sample("s2", db);
trait1 = new Sample("t1", db, Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE, Sample.Affection.AFFECTED);
trait2 = new Sample("t2", db, Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE, Sample.Affection.UNAFFECTED);
trait3 = new Sample("t3", db, Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE, Sample.Affection.UNKNOWN);
trait4 = new Sample("t4", db, 1.0, Sample.Affection.QUANTITATIVE);
trait1 = new Sample("t1", db, Affection.AFFECTED, Sample.UNSET_QT);
trait2 = new Sample("t2", db, Affection.UNAFFECTED, Sample.UNSET_QT);
trait3 = new Sample("t3", db, Affection.UNKNOWN, Sample.UNSET_QT);
trait4 = new Sample("t4", db, Affection.QUANTITATIVE, 1.0);
}
/**
@ -47,8 +47,8 @@ public class SampleUnitTest extends BaseTest {
@Test()
public void testGenders() {
Assert.assertTrue(fam1A.getGender() == Sample.Gender.UNKNOWN);
Assert.assertTrue(fam1B.getGender() == Sample.Gender.MALE);
Assert.assertTrue(fam1C.getGender() == Sample.Gender.FEMALE);
Assert.assertTrue(fam1A.getGender() == Gender.UNKNOWN);
Assert.assertTrue(fam1B.getGender() == Gender.MALE);
Assert.assertTrue(fam1C.getGender() == Gender.FEMALE);
}
}