From 84160bd83fd92cc6f89f715f41976d2d1512cfb0 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 30 Sep 2011 15:50:54 -0400 Subject: [PATCH] Reorganization of Sample -- Moved Gender and Afflication to separate public enums -- PedReader 90% implemented -- Improve interface cleanup to XReadLines and UserException --- .../sting/gatk/samples/Affection.java | 46 ++++ .../sting/gatk/samples/Gender.java | 34 +++ .../sting/gatk/samples/PedReader.java | 101 ++++----- .../sting/gatk/samples/Sample.java | 76 +++---- .../sting/gatk/samples/SampleDataSource.java | 24 ++- .../beagle/ProduceBeagleInputWalker.java | 4 +- .../gatk/walkers/qc/CountMalesWalker.java | 3 +- .../sting/utils/exceptions/UserException.java | 4 + .../sting/utils/text/XReadLines.java | 6 +- .../reads/DownsamplerBenchmark.java | 3 +- .../sting/gatk/samples/PedReaderUnitTest.java | 201 ++++++++++++++++++ .../sting/gatk/samples/SampleUnitTest.java | 20 +- 12 files changed, 386 insertions(+), 136 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/samples/Affection.java create mode 100644 public/java/src/org/broadinstitute/sting/gatk/samples/Gender.java create mode 100644 public/java/test/org/broadinstitute/sting/gatk/samples/PedReaderUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/Affection.java b/public/java/src/org/broadinstitute/sting/gatk/samples/Affection.java new file mode 100644 index 000000000..de0dba884 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/Affection.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.samples; + +/** + * Categorical sample trait for association and analysis + * + * Samples can have unknown status, be affected or unaffected by the + * categorical trait, or they can be marked as actually having a + * quantitative trait value (stored in an associated value in the Sample class) + * + * @author Mark DePristo + * @since Sept. 2011 + */ +public enum Affection { + /** Status is unknown */ + UNKNOWN, + /** Suffers from the disease */ + AFFECTED, + /** Unaffected by the disease */ + UNAFFECTED, + /** A quantitative trait: value of the trait is stored elsewhere */ + QUANTITATIVE +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/Gender.java b/public/java/src/org/broadinstitute/sting/gatk/samples/Gender.java new file mode 100644 index 000000000..6fb44804a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/Gender.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.samples; + +/** +* ENUM of possible human genders: male, female, or unknown +*/ +public enum Gender { + MALE, + FEMALE, + UNKNOWN +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java b/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java index 6514cffe4..added09b6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java @@ -32,6 +32,8 @@ import org.broadinstitute.sting.utils.text.XReadLines; import java.io.File; import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.Reader; import java.util.*; /** @@ -115,10 +117,6 @@ public class PedReader { final static private Set CATAGORICAL_TRAIT_VALUES = new HashSet(Arrays.asList("-9", "0", "1", "2")); final static private String commentMarker = "#"; - private final File source; - private final List records; - - public enum MissingPedFields { NO_FAMILY_ID, NO_PARENTS, @@ -127,8 +125,8 @@ public class PedReader { } // phenotype - private final static String PHENOTYPE_MISSING_VALUE = "-9"; - private final static String PHENOTYPE_MISSING_VALUE_SECONDARY = "0"; + private final static String MISSING_VALUE1 = "-9"; + private final static String MISSING_VALUE2 = "0"; private final static String PHENOTYPE_UNAFFECTED = "1"; private final static String PHENOTYPE_AFFECTED = "2"; @@ -137,14 +135,15 @@ public class PedReader { private final static String SEX_FEMALE = "2"; // other=unknown - public PedReader(File source, EnumSet missingFields) throws FileNotFoundException { - this.source = source; - List lines = new XReadLines(source).readLines(); - this.records = parsePedLines(lines, missingFields); + public PedReader() { } + + public final List parse(File source, EnumSet missingFields, SampleDataSource sampleDB) throws FileNotFoundException { + logger.info("Reading PED file " + source + " with missing fields: " + missingFields); + return parse(new FileReader(source), missingFields, sampleDB); } - private final List parsePedLines(final List lines, EnumSet missingFields) { - logger.info("Reading PED file " + source + " with missing fields: " + missingFields); + public final List parse(Reader reader, EnumSet missingFields, SampleDataSource sampleDB) { + final List lines = new XReadLines(reader).readLines(); // What are the record offsets? final int familyPos = missingFields.contains(MissingPedFields.NO_FAMILY_ID) ? -1 : 0; @@ -153,7 +152,7 @@ public class PedReader { final int maternalPos = missingFields.contains(MissingPedFields.NO_PARENTS) ? -1 : paternalPos + 1; final int sexPos = missingFields.contains(MissingPedFields.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1; final int phenotypePos = missingFields.contains(MissingPedFields.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1; - final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)); + final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1; // go through once and determine properties int lineNo = 1; @@ -164,7 +163,7 @@ public class PedReader { String[] parts = line.split("\\W+"); if ( parts.length != nExpectedFields ) - throw new UserException.MalformedFile(source, "Bad PED line " + lineNo + ": wrong number of fields"); + throw new UserException.MalformedFile(reader.toString(), "Bad PED line " + lineNo + ": wrong number of fields"); if ( phenotypePos != -1 ) { isQT = isQT || CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]); @@ -177,75 +176,55 @@ public class PedReader { // now go through and parse each record lineNo = 1; - final List recs = new ArrayList(splits.size()); + final List samples = new ArrayList(splits.size()); for ( final String[] parts : splits ) { String familyID = null, individualID, paternalID = null, maternalID = null; - Sample.Gender sex = Sample.Gender.UNKNOWN; - double quantitativePhenotype = Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE; - Sample.Affection affection = Sample.Affection.UNKNOWN; + Gender sex = Gender.UNKNOWN; + double quantitativePhenotype = Sample.UNSET_QT; + Affection affection = Affection.UNKNOWN; - if ( familyPos != -1 ) familyID = parts[familyPos]; + if ( familyPos != -1 ) familyID = maybeMissing(parts[familyPos]); individualID = parts[samplePos]; - if ( paternalPos != -1 ) paternalID = parts[paternalPos]; - if ( maternalPos != -1 ) maternalID = parts[maternalPos]; + if ( paternalPos != -1 ) paternalID = maybeMissing(parts[paternalPos]); + if ( maternalPos != -1 ) maternalID = maybeMissing(parts[maternalPos]); if ( sexPos != -1 ) { - if ( parts[sexPos].equals(SEX_MALE) ) sex = Sample.Gender.MALE; - else if ( parts[sexPos].equals(SEX_FEMALE) ) sex = Sample.Gender.FEMALE; - else sex = Sample.Gender.UNKNOWN; + if ( parts[sexPos].equals(SEX_MALE) ) sex = Gender.MALE; + else if ( parts[sexPos].equals(SEX_FEMALE) ) sex = Gender.FEMALE; + else sex = Gender.UNKNOWN; } if ( phenotypePos != -1 ) { if ( isQT ) { - if ( parts[phenotypePos].equals(PHENOTYPE_MISSING_VALUE) ) - affection = Sample.Affection.UNKNOWN; + if ( parts[phenotypePos].equals(MISSING_VALUE1) ) + affection = Affection.UNKNOWN; else { - affection = Sample.Affection.QUANTITATIVE; + affection = Affection.QUANTITATIVE; quantitativePhenotype = Double.valueOf(parts[phenotypePos]); } } else { - if ( parts[phenotypePos].equals(PHENOTYPE_MISSING_VALUE) ) affection = Sample.Affection.UNKNOWN; - else if ( parts[phenotypePos].equals(PHENOTYPE_MISSING_VALUE_SECONDARY) ) affection = Sample.Affection.UNKNOWN; - else if ( parts[phenotypePos].equals(PHENOTYPE_UNAFFECTED) ) affection = Sample.Affection.UNAFFECTED; - else if ( parts[phenotypePos].equals(PHENOTYPE_AFFECTED) ) affection = Sample.Affection.AFFECTED; + if ( parts[phenotypePos].equals(MISSING_VALUE1) ) affection = Affection.UNKNOWN; + else if ( parts[phenotypePos].equals(MISSING_VALUE2) ) affection = Affection.UNKNOWN; + else if ( parts[phenotypePos].equals(PHENOTYPE_UNAFFECTED) ) affection = Affection.UNAFFECTED; + else if ( parts[phenotypePos].equals(PHENOTYPE_AFFECTED) ) affection = Affection.AFFECTED; else throw new ReviewedStingException("Unexpected phenotype type " + parts[phenotypePos] + " at line " + lineNo); } } - recs.add(new PedRecord(familyID, individualID, paternalID, maternalID, sex, quantitativePhenotype, affection)); - + final Sample s = new Sample(familyID, sampleDB, individualID, paternalID, maternalID, sex, affection, quantitativePhenotype); + samples.add(s); + sampleDB.addSample(s); lineNo++; } - return Collections.unmodifiableList(recs); + sampleDB.validate(samples); + return samples; } - public List getRecords() { - return records; - } - - public void fillSampleDB(SampleDataSource db) { - for ( final PedRecord rec : getRecords() ) { - } - } -} - -class PedRecord { - final String familyID, individualID, paternalID, maternalID; - final Sample.Gender sex; - final double quantitativePhenotype; - final Sample.Affection affection; - - PedRecord(final String familyID, final String individualID, - final String paternalID, final String maternalID, - final Sample.Gender sex, - final double quantitativePhenotype, final Sample.Affection affection) { - this.familyID = familyID; - this.individualID = individualID; - this.paternalID = paternalID; - this.maternalID = maternalID; - this.sex = sex; - this.quantitativePhenotype = quantitativePhenotype; - this.affection = affection; + private final static String maybeMissing(final String string) { + if ( string.equals(MISSING_VALUE1) || string.equals(MISSING_VALUE2) ) + return null; + else + return string; } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java b/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java index db905a16e..3426cf678 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java @@ -10,37 +10,18 @@ import java.util.Map; */ public class Sample implements java.io.Serializable { final private String familyID, paternalID, maternalID; - final private Sample.Gender gender; + final private Gender gender; final private double quantitativePhenotype; - final private Sample.Affection affection; + final private Affection affection; final private String ID; final private SampleDataSource dataSource; + final private Map properties = new HashMap(); - // todo -- conditionally add the property map -- should be empty by default - private final Map properties = new HashMap(); - - public enum Gender { - MALE, - FEMALE, - UNKNOWN - } - - public enum Affection { - /** Status is unknown */ - UNKNOWN, - /** Suffers from the disease */ - AFFECTED, - /** Unaffected by the disease */ - UNAFFECTED, - /** A quantitative trait: value of the trait is stored elsewhere */ - QUANTITATIVE - } - - public final static double UNSET_QUANTITIATIVE_TRAIT_VALUE = Double.NaN; + public final static double UNSET_QT = Double.NaN; public Sample(final String ID, final SampleDataSource dataSource, final String familyID, final String paternalID, final String maternalID, - final Gender gender, final double quantitativePhenotype, final Affection affection) { + final Gender gender, final Affection affection, final double quantitativePhenotype) { this.familyID = familyID; this.paternalID = paternalID; this.maternalID = maternalID; @@ -51,20 +32,31 @@ public class Sample implements java.io.Serializable { this.dataSource = dataSource; } - public Sample(final String ID, final SampleDataSource dataSource, - final String familyID, final String paternalID, final String maternalID, final Gender gender) { - this(ID, dataSource, familyID, paternalID, maternalID, gender, - UNSET_QUANTITIATIVE_TRAIT_VALUE, Affection.UNKNOWN); + protected Sample(final String ID, + final String familyID, final String paternalID, final String maternalID, + final Gender gender, final Affection affection, final double quantitativePhenotype) { + this(ID, null, familyID, paternalID, maternalID, gender, affection, quantitativePhenotype); } - public Sample(final String ID, final SampleDataSource dataSource, final double quantitativePhenotype, final Affection affection) { - this(ID, dataSource, null, null, null, Gender.UNKNOWN, quantitativePhenotype, affection); + protected Sample(final String ID, + final String familyID, final String paternalID, final String maternalID, + final Gender gender, final Affection affection) { + this(ID, null, familyID, paternalID, maternalID, gender, affection, UNSET_QT); + } + + + public Sample(final String ID, final SampleDataSource dataSource, + final String familyID, final String paternalID, final String maternalID, final Gender gender) { + this(ID, dataSource, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT); + } + + public Sample(final String ID, final SampleDataSource dataSource, final Affection affection, final double quantitativePhenotype) { + this(ID, dataSource, null, null, null, Gender.UNKNOWN, affection, quantitativePhenotype); } public Sample(String id, SampleDataSource dataSource) { - this(id, dataSource, - null, null, null, - Gender.UNKNOWN, UNSET_QUANTITIATIVE_TRAIT_VALUE, Affection.UNKNOWN); + this(id, dataSource, null, null, null, + Gender.UNKNOWN, Affection.UNKNOWN, UNSET_QT); } // ------------------------------------------------------------------------------------- @@ -77,7 +69,6 @@ public class Sample implements java.io.Serializable { return ID; } - public String getFamilyID() { return familyID; } @@ -157,21 +148,4 @@ public class Sample implements java.io.Serializable { public boolean hasExtraProperty(String key) { return properties.containsKey(key); } - -// @Override -// public boolean equals(Object o) { -// if (this == o) return true; -// if (o == null || getClass() != o.getClass()) return false; -// -// Sample sample = (Sample) o; -// if (ID != null ? !ID.equals(sample.ID) : sample.ID != null) return false; -// if (properties != null ? !properties.equals(sample.properties) : sample.properties != null) return false; -// -// return true; -// } -// -// @Override -// public int hashCode() { -// return ID != null ? ID.hashCode() : "".hashCode(); -// } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDataSource.java index d5285271b..e0d159947 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDataSource.java @@ -55,7 +55,7 @@ public class SampleDataSource { /** * Hallucinates sample objects for all the samples in the SAM file and stores them */ - public SampleDataSource addSamples(SAMFileHeader header) { + protected SampleDataSource addSamples(SAMFileHeader header) { for (String sampleName : SampleUtils.getSAMFileSamples(header)) { if (getSample(sampleName) == null) { Sample newSample = new Sample(sampleName, this); @@ -65,7 +65,7 @@ public class SampleDataSource { return this; } - public SampleDataSource addSamples(final List sampleFiles) { + protected SampleDataSource addSamples(final List sampleFiles) { // add files consecutively for (File file : sampleFiles) { addSamples(file); @@ -77,7 +77,7 @@ public class SampleDataSource { * Parse one sample file and integrate it with samples that are already there * Fail quickly if we find any errors in the file */ - public SampleDataSource addSamples(File sampleFile) { + protected SampleDataSource addSamples(File sampleFile) { return this; } @@ -85,7 +85,7 @@ public class SampleDataSource { * Add a sample to the collection * @param sample to be added */ - private SampleDataSource addSample(Sample sample) { + protected SampleDataSource addSample(Sample sample) { samples.put(sample.getID(), sample); return this; } @@ -138,8 +138,6 @@ public class SampleDataSource { // // -------------------------------------------------------------------------------- - - /** * Get number of sample objects * @return size of samples map @@ -209,4 +207,18 @@ public class SampleDataSource { } return samples; } + + // -------------------------------------------------------------------------------- + // + // Validation + // + // -------------------------------------------------------------------------------- + + public final void validate() { + validate(getSamples()); + } + + public final void validate(Collection samplesToCheck) { + + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index cdf1913f7..b722220f9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.samples.Sample; +import org.broadinstitute.sting.gatk.samples.Gender; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VQSRCalibrationCurve; import org.broadinstitute.sting.utils.GenomeLoc; @@ -248,7 +248,7 @@ public class ProduceBeagleInputWalker extends RodWalker { Map preferredGenotypes = preferredVC.getGenotypes(); Map otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null; for ( String sample : samples ) { - boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Sample.Gender.MALE; + boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE; Genotype genotype; boolean isValidation; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMalesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMalesWalker.java index d776fe415..24c06d101 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMalesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMalesWalker.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.qc; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.samples.Gender; import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; @@ -41,7 +42,7 @@ import org.broadinstitute.sting.gatk.walkers.Requires; public class CountMalesWalker extends ReadWalker { public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) { Sample sample = getSampleDB().getSample(read); - return sample.getGender() == Sample.Gender.MALE ? 1 : 0; + return sample.getGender() == Gender.MALE ? 1 : 0; } public Integer reduceInit() { return 0; } diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 70f7387f4..77f1ed6c0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -215,6 +215,10 @@ public class UserException extends ReviewedStingException { super(String.format("File %s is malformed: %s caused by %s", f.getAbsolutePath(), message, e.getMessage())); } + public MalformedFile(String name, String message) { + super(String.format("File associated with name %s is malformed: %s", name, message)); + } + public MalformedFile(String name, String message, Exception e) { super(String.format("File associated with name %s is malformed: %s caused by %s", name, message, e.getMessage())); } diff --git a/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java b/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java index 52b6f3b01..49e9ddf52 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java @@ -99,9 +99,9 @@ public class XReadLines implements Iterator, Iterable { * * @param reader */ - public XReadLines(final BufferedReader reader, final boolean trimWhitespace) { + public XReadLines(final Reader reader, final boolean trimWhitespace) { try { - this.in = reader; + this.in = new BufferedReader(reader); nextline = readNextLine(); this.trimWhitespace = trimWhitespace; } catch(IOException e) { @@ -109,7 +109,7 @@ public class XReadLines implements Iterator, Iterable { } } - public XReadLines(final BufferedReader reader) throws FileNotFoundException { + public XReadLines(final Reader reader) { this(reader, true); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java index 41d7a23c6..0d5734d43 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java @@ -40,6 +40,7 @@ import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.baq.BAQ; +import java.io.File; import java.util.Collections; import java.util.Iterator; @@ -85,8 +86,6 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark { (byte)0); GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary()); - SampleDataSource sampleDataSource = new SampleDataSource().addSamples(reader.getFileHeader()); - // Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out? Iterator readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter()); LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups()); diff --git a/public/java/test/org/broadinstitute/sting/gatk/samples/PedReaderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/samples/PedReaderUnitTest.java new file mode 100644 index 000000000..1cad634dd --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/samples/PedReaderUnitTest.java @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.samples; + +import org.apache.log4j.Logger; +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.StringReader; +import java.util.Arrays; +import java.util.EnumSet; +import java.util.List; + +/** + * UnitTest for PedReader + * + * @author Mark DePristo + * @since 2011 + */ +public class PedReaderUnitTest extends BaseTest { + private static Logger logger = Logger.getLogger(PedReaderUnitTest.class); + + private class PedReaderTest extends TestDataProvider { + public String fileContents; + public List expectedSamples; + + private PedReaderTest(final String name, final List expectedSamples, final String fileContents) { + super(PedReaderTest.class, name); + this.fileContents = fileContents; + this.expectedSamples = expectedSamples; + } + } + +// Family ID +// Individual ID +// Paternal ID +// Maternal ID +// Sex (1=male; 2=female; other=unknown) +// Phenotype +// +// -9 missing +// 0 missing +// 1 unaffected +// 2 affected + + @DataProvider(name = "readerTest") + public Object[][] createPEDFiles() { + new PedReaderTest("singleRecordMale", + Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED)), + "fam1 kid 0 0 1 1"); + + new PedReaderTest("singleRecordFemale", + Arrays.asList(new Sample("kid", "fam1", null, null, Gender.FEMALE, Affection.UNAFFECTED)), + "fam1 kid 0 0 2 1"); + + new PedReaderTest("singleRecordMissingGender", + Arrays.asList(new Sample("kid", "fam1", null, null, Gender.UNKNOWN, Affection.UNKNOWN)), + "fam1 kid 0 0 0 0"); + + // Affection + new PedReaderTest("singleRecordAffected", + Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.AFFECTED)), + "fam1 kid 0 0 1 2"); + + new PedReaderTest("singleRecordUnaffected", + Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED)), + "fam1 kid 0 0 1 1"); + + new PedReaderTest("singleRecordMissingAffection-9", + Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNKNOWN)), + "fam1 kid 0 0 1 -9"); + + new PedReaderTest("singleRecordMissingAffection0", + Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNKNOWN)), + "fam1 kid 0 0 1 0"); + + new PedReaderTest("multipleUnrelated", + Arrays.asList( + new Sample("s1", "fam1", null, null, Gender.MALE, Affection.AFFECTED), + new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.UNAFFECTED)), + String.format("%s\n%s", + "fam1 s1 0 0 1 1", + "fam2 s2 0 0 2 2")); + + new PedReaderTest("explicitTrio", + Arrays.asList( + new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), + new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED), + new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED)), + String.format("%s\n%s\n%s", + "fam1 kid dad mom 1 2", + "fam1 dad 0 0 1 1", + "fam1 mom 0 0 2 2")); + + new PedReaderTest("implicitTrio", + Arrays.asList( + new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), + new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNKNOWN), + new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)), + "fam1 kid dad mom 1 1"); + + new PedReaderTest("partialTrio", + Arrays.asList( + new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), + new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED), + new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)), + String.format("%s\n%s", + "fam1 kid dad mom 1 2", + "fam1 dad 0 0 1 1")); + + new PedReaderTest("bigPedigree", + Arrays.asList( + new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), + new Sample("dad", "fam1", "granddad1", "grandma1", Gender.MALE, Affection.UNAFFECTED), + new Sample("granddad1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN), + new Sample("grandma1", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN), + new Sample("mom", "fam1", "granddad2", "grandma2", Gender.FEMALE, Affection.AFFECTED), + new Sample("granddad2", "fam1", null, null, Gender.MALE, Affection.UNKNOWN), + new Sample("grandma2", "fam1", null, null, Gender.FEMALE, Affection.UNKNOWN)), + String.format("%s\n%s\n%s", + "fam1 kid dad mom 1 2", + "fam1 dad granddad1 grandma1 1 1", + "fam1 mom granddad2 grandma2 2 2")); + + // Quantitative trait + new PedReaderTest("QuantitativeTrait", + Arrays.asList( + new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0), + new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)), + String.format("%s\n%s", + "fam1 s1 0 0 1 1", + "fam2 s2 0 0 2 10.0")); + + new PedReaderTest("QuantitativeTraitWithMissing", + Arrays.asList( + new Sample("s1", "fam1", null, null, Gender.MALE, Affection.UNKNOWN, Sample.UNSET_QT), + new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)), + String.format("%s\n%s", + "fam1 s1 0 0 1 -9", + "fam2 s2 0 0 2 10.0")); + + new PedReaderTest("QuantitativeTraitOnlyInts", + Arrays.asList( + new Sample("s1", "fam1", null, null, Gender.MALE, Affection.QUANTITATIVE, 1.0), + new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.QUANTITATIVE, 10.0)), + String.format("%s\n%s", + "fam1 s1 0 0 1 1", + "fam2 s2 0 0 2 10")); + + return PedReaderTest.getTests(PedReaderTest.class); + } + + private static final void runTest(PedReaderTest test, String myFileContents, EnumSet missing) { + logger.warn("Test " + test); + PedReader reader = new PedReader(); + SampleDataSource sampleDB = new SampleDataSource(); + List readSamples = reader.parse(new StringReader(myFileContents), missing, sampleDB); + Assert.assertEquals(test.expectedSamples, readSamples, "Parsed incorrect number of samples"); + } + + @Test(enabled = true, dataProvider = "readerTest") + public void testPedReader(PedReaderTest test) { + runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class)); + } + + @Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader") + public void testPedReaderWithComments(PedReaderTest test) { + runTest(test, "#comment\n" + test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class)); + } + + @Test(enabled = true, dataProvider = "readerTest", dependsOnMethods = "testPedReader") + public void testPedReaderWithMissing(PedReaderTest test) { + // todo -- test MISSING by splicing strings + //runTest(test, "#comment\n" + test.fileContents, EnumSet.noneOf(PedReader.MissingPedFields.class)); + } + +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/samples/SampleUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/samples/SampleUnitTest.java index e8d1772b8..279319edb 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/samples/SampleUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/samples/SampleUnitTest.java @@ -22,17 +22,17 @@ public class SampleUnitTest extends BaseTest { public void init() { db = new SampleDataSource(); - fam1A = new Sample("1A", db, "fam1", "1B", "1C", Sample.Gender.UNKNOWN); - fam1B = new Sample("1B", db, "fam1", null, null, Sample.Gender.MALE); - fam1C = new Sample("1C", db, "fam1", null, null, Sample.Gender.FEMALE); + fam1A = new Sample("1A", db, "fam1", "1B", "1C", Gender.UNKNOWN); + fam1B = new Sample("1B", db, "fam1", null, null, Gender.MALE); + fam1C = new Sample("1C", db, "fam1", null, null, Gender.FEMALE); s1 = new Sample("s1", db); s2 = new Sample("s2", db); - trait1 = new Sample("t1", db, Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE, Sample.Affection.AFFECTED); - trait2 = new Sample("t2", db, Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE, Sample.Affection.UNAFFECTED); - trait3 = new Sample("t3", db, Sample.UNSET_QUANTITIATIVE_TRAIT_VALUE, Sample.Affection.UNKNOWN); - trait4 = new Sample("t4", db, 1.0, Sample.Affection.QUANTITATIVE); + trait1 = new Sample("t1", db, Affection.AFFECTED, Sample.UNSET_QT); + trait2 = new Sample("t2", db, Affection.UNAFFECTED, Sample.UNSET_QT); + trait3 = new Sample("t3", db, Affection.UNKNOWN, Sample.UNSET_QT); + trait4 = new Sample("t4", db, Affection.QUANTITATIVE, 1.0); } /** @@ -47,8 +47,8 @@ public class SampleUnitTest extends BaseTest { @Test() public void testGenders() { - Assert.assertTrue(fam1A.getGender() == Sample.Gender.UNKNOWN); - Assert.assertTrue(fam1B.getGender() == Sample.Gender.MALE); - Assert.assertTrue(fam1C.getGender() == Sample.Gender.FEMALE); + Assert.assertTrue(fam1A.getGender() == Gender.UNKNOWN); + Assert.assertTrue(fam1B.getGender() == Gender.MALE); + Assert.assertTrue(fam1C.getGender() == Gender.FEMALE); } }